## Load dataset and libraries

In [38]:
import os
import torch
import torch.nn as nn
from torchvision import models, transforms
from PIL import Image
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import MinMaxScaler
from torch.utils.data import DataLoader, Dataset

In [39]:
# Verificar si hay una GPU disponible
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print(f"Using device: {device}")

Using device: cuda


In [40]:
seed = 29
torch.manual_seed(seed)
torch.cuda.manual_seed_all(seed)

In [41]:
dataset_path = 'Dataset/Mosaico/'
img_path = 'Dataset/Mosaico/Imagenes/'
model_path = 'model/'

In [42]:
dataset = pd.read_csv(f"{dataset_path}train.csv")
dataset.head()

Unnamed: 0,bedrooms,bathrooms,area,zipcode,mosaic_image,price
0,2,1.0,1184,91901,82.png,397500
1,2,2.0,1248,93446,471.png,175000
2,5,4.0,4190,85255,18.png,1199000
3,2,2.0,1152,92276,352.png,99900
4,2,1.0,1000,92276,416.png,67000


## Architecture

In [43]:
# Modelo preentrenado VGG16 para extraer características
vgg16 = models.vgg16(pretrained=True)

# Quitamos la última capa fully connected (classifier) para obtener un vector de características
vgg16.classifier = nn.Sequential(*list(vgg16.classifier.children())[:-1])
vgg16.eval()



VGG(
  (features): Sequential(
    (0): Conv2d(3, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (1): ReLU(inplace=True)
    (2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (3): ReLU(inplace=True)
    (4): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (5): Conv2d(64, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (6): ReLU(inplace=True)
    (7): Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (8): ReLU(inplace=True)
    (9): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (10): Conv2d(128, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (11): ReLU(inplace=True)
    (12): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (13): ReLU(inplace=True)
    (14): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (15): ReLU(inplace=True)
    (16): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1

In [44]:
# Modelo final para predecir el precio de una casa a partir de imágenes y datos tabulares
class HousePriceModelVGG16(nn.Module):
    def __init__(self, num_tabular_features):
        super(HousePriceModelVGG16, self).__init__()

        # Red preentrenada para imágenes (VGG16)
        self.vgg16 = models.vgg16(pretrained=True)
        self.vgg16.classifier = nn.Sequential(*list(self.vgg16.classifier.children())[:-1])  # Quitar capa final fully connected

        # Capa completamente conectada para datos tabulares
        self.fc_tabular = nn.Sequential(
            nn.Linear(num_tabular_features, 128),
            nn.ReLU(),
            nn.Linear(128, 64),
            nn.ReLU()
        )

        # Capa final para combinar características visuales y tabulares
        self.fc_combined = nn.Sequential(
            nn.Linear(4096 + 64, 512),  # 4096 características de VGG16 + 64 de los datos tabulares
            nn.ReLU(),
            nn.Linear(512, 256),
            nn.ReLU(),
            nn.Linear(256, 1)  # Predicción final de precio
        )

    def forward(self, images, tabular_data):
        # Pasar las imágenes a través de VGG16
        visual_features = self.vgg16(images)
        
        # Pasar los datos tabulares a través de la red fully connected
        tabular_features = self.fc_tabular(tabular_data)
        
        # Concatenar las características visuales y tabulares
        combined_features = torch.cat((visual_features, tabular_features), dim=1)
        
        # Pasar por la capa final para la predicción
        output = self.fc_combined(combined_features)
        
        return output

In [45]:
class RMSELoss(nn.Module):
    def __init__(self):
        super(RMSELoss, self).__init__()
        self.mse = nn.MSELoss()

    def forward(self, yhat, y):
        return torch.sqrt(self.mse(yhat, y))

## Process information

In [46]:
# Dataset personalizado para combinar imágenes y datos tabulares
class HousePriceDataset(Dataset):
    def __init__(self, dataset, image_folder, transform=None):
        self.dataset = dataset
        self.image_folder = image_folder
        self.transform = transform

    def __len__(self):
        return len(self.dataset)

    def __getitem__(self, idx):
        row = self.dataset.iloc[idx]
        img_name = row['mosaic_image']
        img_path = os.path.join(self.image_folder, img_name)
        image = Image.open(img_path).convert('RGB')  # Asegurarse de que la imagen está en formato RGB

        if self.transform:
            image = self.transform(image)  # Aplicamos la transformación para convertir a tensor

        tabular_data = row[['bedrooms', 'bathrooms', 'area', 'zipcode']].values.astype(np.float32)
        price = row['price']

        return image, torch.tensor(tabular_data), torch.tensor(price, dtype=torch.float32)


## Training and validation cycles

In [47]:
# Entrenamiento del modelo (modificado para GPU)
def train_model(model, train_loader, test_loader, num_epochs=10, learning_rate=0.001):
    # criterion = nn.MSELoss()
    criterion = RMSELoss()  
    optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)

    for epoch in range(num_epochs):
        model.train()
        running_loss = 0.0
        
        for images, tabular_data, prices in train_loader:
            # Mover los datos a la GPU
            images = images.to(device)
            tabular_data = tabular_data.to(device)
            prices = prices.to(device)

            optimizer.zero_grad()
            outputs = model(images, tabular_data)
            loss = criterion(outputs.squeeze(), prices)
            loss.backward()
            optimizer.step()
            running_loss += loss.item()

        print(f'Epoch {epoch+1}/{num_epochs}, Loss: {running_loss/len(train_loader)}')

    print('Entrenamiento completo')

# Evaluación del modelo (modificado para GPU)
def evaluate_model(model, test_loader):
    model.eval()
    predictions = []
    actuals = []
    
    with torch.no_grad():
        for images, tabular_data, prices in test_loader:
            # Mover los datos a la GPU
            images = images.to(device)
            tabular_data = tabular_data.to(device)
            prices = prices.to(device)

            outputs = model(images, tabular_data)

            # Asegurarse de que las salidas sean de la forma correcta
            outputs = outputs.cpu().numpy()  # Convertir a numpy
            prices = prices.cpu().numpy()    # Convertir a numpy

            # Si las dimensiones de outputs son 0D (escalar), convertimos a 1D
            if outputs.ndim == 0:
                outputs = np.expand_dims(outputs, axis=0)
            if prices.ndim == 0:
                prices = np.expand_dims(prices, axis=0)

            predictions.extend(outputs)  # No usar squeeze() aquí
            actuals.extend(prices)

    # Calcular MAE
    mae = np.mean(np.abs(np.array(predictions) - np.array(actuals)))
    print(f'Mean Absolute Error (MAE): {mae}')

## Excute

In [48]:
# Normalizar las características tabulares
scaler = MinMaxScaler()
dataset[['bedrooms', 'bathrooms', 'area', 'zipcode']] = scaler.fit_transform(
    dataset[['bedrooms', 'bathrooms', 'area', 'zipcode']]
)

In [49]:
# Split dataset into training and validation sets
train_data, val_data = train_test_split(dataset, test_size=0.2, random_state=42)

In [50]:
# Create datasets
train_dataset = HousePriceDataset(train_data, img_path, transform=transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
]))

val_dataset = HousePriceDataset(val_data, img_path, transform=transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
]))

print(f'Training samples: {len(train_dataset)}')
print(f'Validation samples: {len(val_dataset)}')

Training samples: 322
Validation samples: 81


In [51]:
# Create data loaders
train_loader = DataLoader(train_dataset, batch_size=16, shuffle=True)
val_loader = DataLoader(val_dataset, batch_size=16, shuffle=False)

print(f"Train dataset size: {len(train_loader)}")
print(f"Validation dataset size: {len(val_loader)}")

Train dataset size: 21
Validation dataset size: 6


In [52]:
# Instanciar el modelo y entrenarlo
num_tabular_features = 4
model = HousePriceModelVGG16(num_tabular_features).to(device)



In [56]:
# Entrenamiento del modelo
train_model(model, train_loader, val_loader, num_epochs=100)

# Evaluación del modelo
evaluate_model(model, val_loader)

Epoch 1/100, Loss: 137840.0228794643
Epoch 2/100, Loss: 75986.66496930804
Epoch 3/100, Loss: 96250.14453125
Epoch 4/100, Loss: 99313.22163318453
Epoch 5/100, Loss: 95921.18257068453
Epoch 6/100, Loss: 92013.44475446429
Epoch 7/100, Loss: 87197.30826822917
Epoch 8/100, Loss: 73914.07012648809
Epoch 9/100, Loss: 75226.98009672618
Epoch 10/100, Loss: 88067.48251488095
Epoch 11/100, Loss: 105683.96316964286
Epoch 12/100, Loss: 87666.05143229167
Epoch 13/100, Loss: 70674.97340029762
Epoch 14/100, Loss: 70279.4453125
Epoch 15/100, Loss: 84587.79892113095
Epoch 16/100, Loss: 68374.86328125
Epoch 17/100, Loss: 93073.74683779762
Epoch 18/100, Loss: 74653.28497023809
Epoch 19/100, Loss: 89014.99293154762
Epoch 20/100, Loss: 80169.80115327382
Epoch 21/100, Loss: 103015.53292410714
Epoch 22/100, Loss: 140868.85751488095
Epoch 23/100, Loss: 119680.16592261905
Epoch 24/100, Loss: 77204.58742559524
Epoch 25/100, Loss: 74936.28218005953
Epoch 26/100, Loss: 75021.75093005953
Epoch 27/100, Loss: 80565.2