## Cargar datos

In [12]:
import pandas as pd
import matplotlib.pyplot as plt
import matplotlib.image as mpimg

df = pd.read_csv('../Dataset/dataset_normalizado.csv')

df.head()

Unnamed: 0,bedrooms,bathrooms,area,zipcode,price,bathroom_image,bedroom_image,kitchen_image,frontal_image
0,0.536137,1.341651,1.378536,-0.795699,869500,1_bathroom.jpg,1_bedroom.jpg,1_kitchen.jpg,1_frontal.jpg
1,0.536137,0.336704,0.798734,-7.640277,865200,2_bathroom.jpg,2_bedroom.jpg,2_kitchen.jpg,2_frontal.jpg
2,-0.325224,1.341651,1.272375,-0.794159,889000,3_bathroom.jpg,3_bedroom.jpg,3_kitchen.jpg,3_frontal.jpg
3,1.397499,2.346599,1.35322,-0.794719,910000,4_bathroom.jpg,4_bedroom.jpg,4_kitchen.jpg,4_frontal.jpg
4,-0.325224,1.341651,1.429983,-0.794159,971226,5_bathroom.jpg,5_bedroom.jpg,5_kitchen.jpg,5_frontal.jpg


## Modificar columnas

In [13]:
# Agregar columna extra para el color
df['imagen_mosaico'] = [f'{x + 1}.png' for x in df.index.values]

df.head()


Unnamed: 0,bedrooms,bathrooms,area,zipcode,price,bathroom_image,bedroom_image,kitchen_image,frontal_image,imagen_mosaico
0,0.536137,1.341651,1.378536,-0.795699,869500,1_bathroom.jpg,1_bedroom.jpg,1_kitchen.jpg,1_frontal.jpg,1.png
1,0.536137,0.336704,0.798734,-7.640277,865200,2_bathroom.jpg,2_bedroom.jpg,2_kitchen.jpg,2_frontal.jpg,2.png
2,-0.325224,1.341651,1.272375,-0.794159,889000,3_bathroom.jpg,3_bedroom.jpg,3_kitchen.jpg,3_frontal.jpg,3.png
3,1.397499,2.346599,1.35322,-0.794719,910000,4_bathroom.jpg,4_bedroom.jpg,4_kitchen.jpg,4_frontal.jpg,4.png
4,-0.325224,1.341651,1.429983,-0.794159,971226,5_bathroom.jpg,5_bedroom.jpg,5_kitchen.jpg,5_frontal.jpg,5.png


In [None]:
# Eliminar columnas innecesarias
del df['bathroom_image']
del df['bedroom_image']
del df['kitchen_image']
del df['frontal_image']

# Ordenar columnas
df = df[['imagen_mosaico', 'bathrooms', 'bedrooms', 'area', 'zipcode', 'price']]

df.head()

In [16]:
df.to_csv('../Dataset/dataset_normalizado_mosaico.csv', index=False)

## Transformar imagenes a imagen mosaico

In [18]:
from tqdm import tqdm
import numpy as np
import cv2
import os

def imagen_mosaico(df: pd, path_base: str, path_destino: str):
	"""
	Funcion para crear una imagen mosaico a partir de las 4 imagenes de una casa

	Argumentos:
		df: DataFrame de pandas con las rutas de las imagenes de las casas
		path_base: Ruta base de las imagenes
	"""
	# Verificar si el directorio de destino existe
	if not os.path.exists(path_destino):
		os.makedirs(path_destino)

	images = []

	for i in tqdm(df.index.values, desc="Creando imágenes mosaico", unit="imagen"):
		# Obtener las 4 rutas de las imagenes
		columnas_imagenes = ['bathroom_image', 'bedroom_image', 'frontal_image', 'kitchen_image']
		housePaths = [f"{path_base}{df[columna][i]}" for columna in columnas_imagenes]


		# Inicializar la lista de imagenes de entrada y la imagen de salida
		inputImages = []
		outputImage = np.zeros((400, 400, 3), dtype="uint8")

		# Iterar sobre las rutas de las imagenes
		for housePath in housePaths:
			image = cv2.imread(housePath)
			# image = cv2.resize(image, (50, 50))
			inputImages.append(image)

        # Crear imagen mosaico
		outputImage[0:200, 0:200] = inputImages[0]
		outputImage[0:200, 200:400] = inputImages[1]
		outputImage[200:400, 200:400] = inputImages[2]
		outputImage[200:400, 0:200] = inputImages[3]

		# Guardar la imagen mosaico
		cv2.imwrite(f"{path_destino}{df['imagen_mosaico'][i]}", outputImage)

# Crear las imagenes mosaico
imagen_mosaico(df, "../Dataset/imagenes_ajustadas/", "../Dataset/imagenes_mosaico/")


Creando imágenes mosaico: 100%|██████████| 535/535 [00:13<00:00, 38.86imagen/s]


## Dividir dataset en train y test

In [2]:
import pandas as pd

df = pd.read_csv('../Dataset/dataset_normalizado_mosaico.csv')
df.head()

Unnamed: 0,imagen_mosaico,bathrooms,bedrooms,area,zipcode,price
0,1.png,1.341651,0.536137,1.378536,-0.795699,869500
1,2.png,0.336704,0.536137,0.798734,-7.640277,865200
2,3.png,1.341651,-0.325224,1.272375,-0.794159,889000
3,4.png,2.346599,1.397499,1.35322,-0.794719,910000
4,5.png,1.341651,-0.325224,1.429983,-0.794159,971226


In [5]:
from sklearn.model_selection import train_test_split

# --> Dividir dataset
X_train, X_test, y_train, y_test = train_test_split(df[['imagen_mosaico','bathrooms','bedrooms','area','zipcode']], df['price'], test_size=0.2, random_state=42)

# --> Juntar X_train e y_train
train = pd.concat([X_train, y_train], axis=1)

# --> Juntar X_test e y_test
test = pd.concat([X_test, y_test], axis=1)

# --> Exportar datasets
train.to_csv('../Dataset/train_mosaico.csv', index=False)
test.to_csv('../Dataset/test_mosaico.csv', index=False)

In [6]:
train_load = pd.read_csv('../Dataset/train_mosaico.csv')
test_load = pd.read_csv('../Dataset/test_mosaico.csv')

len(train_load), len(test_load)

(428, 107)