1. Install Required Libraries

In [4]:
# Run this first in Colab
!pip install -q kagglehub torch torchvision torchaudio pandas scikit-learn joblib pillow tqdm gradio


2. Download Tabular Dataset (House Prices)

In [9]:
!kaggle competitions download -c house-prices-advanced-regression-techniques -p ./house_prices_tabular
!unzip -o ./house_prices_tabular/house-prices-advanced-regression-techniques.zip -d ./house_prices_tabular


Downloading house-prices-advanced-regression-techniques.zip to ./house_prices_tabular
  0% 0.00/199k [00:00<?, ?B/s]
100% 199k/199k [00:00<00:00, 426MB/s]
Archive:  ./house_prices_tabular/house-prices-advanced-regression-techniques.zip
  inflating: ./house_prices_tabular/data_description.txt  
  inflating: ./house_prices_tabular/sample_submission.csv  
  inflating: ./house_prices_tabular/test.csv  
  inflating: ./house_prices_tabular/train.csv  


3. Download House Images Dataset

In [10]:
import kagglehub

image_path = kagglehub.dataset_download("ted8080/house-prices-and-images-socal")
print("Path to image dataset:", image_path)


Downloading from https://www.kaggle.com/api/v1/datasets/download/ted8080/house-prices-and-images-socal?dataset_version_number=1...


100%|██████████| 369M/369M [00:07<00:00, 53.0MB/s]

Extracting files...





Path to image dataset: /root/.cache/kagglehub/datasets/ted8080/house-prices-and-images-socal/versions/1


4. Load Tabular + Image Dataset

In [28]:
import os
import pandas as pd

# Correct dataset paths
tabular_path = "/root/.cache/kagglehub/datasets/ted8080/house-prices-and-images-socal/versions/1/socal2.csv"
image_dir = "/root/.cache/kagglehub/datasets/ted8080/house-prices-and-images-socal/versions/1/socal2/socal_pics"

# Load tabular data
tabular_data = pd.read_csv(tabular_path)
print("Tabular Data Shape:", tabular_data.shape)
print(tabular_data.head())

# ✅ Fix: Target column is 'price' in this dataset
target_column = "price"

# Collect image paths & match with tabular dataset
image_paths = []
all_ids = []

for fname in os.listdir(image_dir)[:2000]:  # limit for testing
    if fname.endswith(".jpg") or fname.endswith(".png"):
        img_id = int(os.path.splitext(fname)[0])  # extract numeric id
        img_path = os.path.join(image_dir, fname)
        if img_id in tabular_data["image_id"].values:
            image_paths.append(img_path)
            all_ids.append(img_id)

# Create merged dataframe with paths
merged_df = tabular_data[tabular_data["image_id"].isin(all_ids)].copy()
merged_df["image_path"] = image_paths

print("Final merged dataset shape:", merged_df.shape)
print(merged_df[[ "image_id", "street", "citi", "bed", "bath", "sqft", "price", "image_path"]].head())


Tabular Data Shape: (15474, 8)
   image_id                 street             citi  n_citi  bed  bath  sqft  \
0         0  1317 Van Buren Avenue  Salton City, CA     317    3   2.0  1560   
1         1         124 C Street W      Brawley, CA      48    3   2.0   713   
2         2        2304 Clark Road     Imperial, CA     152    3   1.0   800   
3         3     755 Brawley Avenue      Brawley, CA      48    3   1.0  1082   
4         4  2207 R Carrillo Court     Calexico, CA      55    4   3.0  2547   

    price  
0  201900  
1  228500  
2  273950  
3  350000  
4  385100  
Final merged dataset shape: (2000, 9)
    image_id                     street                 citi  bed  bath  sqft  \
1          1             124 C Street W          Brawley, CA    3   2.0   713   
12        12         818 155th Street W         Rosamond, CA    2   2.0  2466   
17        17  12471 Boy Scout Camp Road     Frazier Park, CA    8   3.1  4236   
20        20           632 Grove Street            Arv

5. Preprocessing Tabular + Image Data & Define Model

In [48]:
import torch
from torch.utils.data import Dataset
from sklearn.preprocessing import StandardScaler
from PIL import Image
import numpy as np
import torch.nn as nn
from torchvision import transforms

# -------------------------
# 1. Tabular Preprocessing
# -------------------------

# Select numeric features
num_features = ["bed", "bath", "sqft"]
scaler = StandardScaler()
tabular_scaled = scaler.fit_transform(merged_df[num_features])

# Target (price)
y = merged_df["price"].values

# -------------------------
# 2. Image Preprocessing
# -------------------------

# Define transforms (resize + tensor + normalize)
img_transform = transforms.Compose([
    transforms.Resize((128, 128)),   # Resize all images to 128x128
    transforms.ToTensor(),           # Convert to tensor
    transforms.Normalize([0.5, 0.5, 0.5], [0.5, 0.5, 0.5])  # normalize 3 channels
])

# -------------------------
# 3. Custom Dataset
# -------------------------

class HousePriceDataset(Dataset):
    def __init__(self, df, tabular_data, targets, transform=None):
        self.df = df.reset_index(drop=True)
        self.tabular_data = tabular_data
        self.targets = targets
        self.transform = transform

    def __len__(self):
        return len(self.df)

    def __getitem__(self, idx):
        # ✅ Fix column name to match Step 3
        img_path = self.df.iloc[idx]["image_path"]

        # Open image
        image = Image.open(img_path).convert("RGB")
        if self.transform:
            image = self.transform(image)

        # Tabular features
        tab_x = torch.tensor(self.tabular_data[idx], dtype=torch.float32)

        # Target
        y = torch.tensor(self.targets[idx], dtype=torch.float32)

        return image, tab_x, y


# -------------------------
# 4. Define Model
# -------------------------

class HousePriceModel(nn.Module):
    def __init__(self, tabular_input_dim):
        super(HousePriceModel, self).__init__()

        # CNN for image
        self.cnn = nn.Sequential(
            nn.Conv2d(3, 16, 3, stride=2, padding=1),
            nn.ReLU(),
            nn.Conv2d(16, 32, 3, stride=2, padding=1),
            nn.ReLU(),
            nn.AdaptiveAvgPool2d((4, 4))
        )
        self.cnn_out_dim = 32 * 4 * 4

        # Tabular MLP
        self.tabular_mlp = nn.Sequential(
            nn.Linear(tabular_input_dim, 64),
            nn.ReLU(),
            nn.Linear(64, 32),
            nn.ReLU()
        )

        # Combined layers
        self.fc = nn.Sequential(
            nn.Linear(self.cnn_out_dim + 32, 64),
            nn.ReLU(),
            nn.Linear(64, 1)
        )

    def forward(self, tab_x, img_x):
        img_feat = self.cnn(img_x).view(img_x.size(0), -1)
        tab_feat = self.tabular_mlp(tab_x)
        combined = torch.cat([img_feat, tab_feat], dim=1)
        return self.fc(combined)


# -------------------------
# 5. Test Dataset
# -------------------------

dataset = HousePriceDataset(merged_df, tabular_scaled, y, transform=img_transform)

sample_img, sample_tabular, sample_price = dataset[0]
print("Image shape:", sample_img.shape)
print("Tabular features:", sample_tabular)
print("Target price:", sample_price)


Image shape: torch.Size([3, 128, 128])
Tabular features: tensor([-0.4857, -0.4947, -1.3583])
Target price: tensor(228500.)


6. DataLoader Setup and Train/Test Split

In [49]:
from torch.utils.data import DataLoader, random_split

# Define device
device = "cuda" if torch.cuda.is_available() else "cpu"

# Split dataset into train/validation/test
total_size = len(dataset)
train_size = int(0.7 * total_size)
valid_size = int(0.15 * total_size)
test_size  = total_size - train_size - valid_size

train_ds, valid_ds, test_ds = random_split(dataset, [train_size, valid_size, test_size], generator=torch.Generator().manual_seed(42))

print(f"Train size: {len(train_ds)}, Validation size: {len(valid_ds)}, Test size: {len(test_ds)}")

# Define batch size
batch_size = 16

# Create DataLoaders
train_loader = DataLoader(train_ds, batch_size=batch_size, shuffle=True)
valid_loader = DataLoader(valid_ds, batch_size=batch_size, shuffle=False)
test_loader  = DataLoader(test_ds,  batch_size=batch_size, shuffle=False)

# Test a batch
sample_batch = next(iter(train_loader))
sample_imgs, sample_tabs, sample_prices = sample_batch
print("Sample batch images shape:", sample_imgs.shape)
print("Sample batch tabular shape:", sample_tabs.shape)
print("Sample batch targets shape:", sample_prices.shape)


Train size: 1400, Validation size: 300, Test size: 300
Sample batch images shape: torch.Size([16, 3, 128, 128])
Sample batch tabular shape: torch.Size([16, 3])
Sample batch targets shape: torch.Size([16])


7. Model Training Setup

In [51]:

import torch.optim as optim
import torch.nn as nn

# Get tabular input dimension
tabular_input_dim = tabular_scaled.shape[1]

# Instantiate the model
model = HousePriceModel(tabular_input_dim).to(device)

# Loss function
criterion = nn.MSELoss()

# Optimizer
optimizer = optim.Adam(model.parameters(), lr=1e-3, weight_decay=1e-5)

# Learning rate scheduler (optional) – fixed for PyTorch version
scheduler = optim.lr_scheduler.ReduceLROnPlateau(
    optimizer,
    mode='min',
    factor=0.5,
    patience=5
)

# Training parameters
num_epochs = 20

print(f"Model ready on device: {device}")
print(f"Number of epochs: {num_epochs}, Batch size: {batch_size}")


Model ready on device: cpu
Number of epochs: 20, Batch size: 16


8. Training Loop

In [52]:
import time

best_val_loss = float("inf")

for epoch in range(num_epochs):
    start_time = time.time()

    # === Training ===
    model.train()
    train_loss = 0.0
    for imgs, tabs, targets in train_loader:
        imgs, tabs, targets = imgs.to(device), tabs.to(device), targets.to(device).unsqueeze(1)

        optimizer.zero_grad()
        outputs = model(tabs, imgs)
        loss = criterion(outputs, targets)
        loss.backward()
        optimizer.step()

        train_loss += loss.item() * imgs.size(0)

    train_loss /= len(train_loader.dataset)

    # === Validation ===
    model.eval()
    val_loss = 0.0
    with torch.no_grad():
        for imgs, tabs, targets in valid_loader:
            imgs, tabs, targets = imgs.to(device), tabs.to(device), targets.to(device).unsqueeze(1)
            outputs = model(tabs, imgs)
            loss = criterion(outputs, targets)
            val_loss += loss.item() * imgs.size(0)
    val_loss /= len(valid_loader.dataset)

    # Step scheduler
    scheduler.step(val_loss)

    end_time = time.time()
    epoch_mins, epoch_secs = divmod(int(end_time - start_time), 60)

    print(f"Epoch {epoch+1}/{num_epochs} | "
          f"Train Loss: {train_loss:.4f} | Val Loss: {val_loss:.4f} | "
          f"Time: {epoch_mins}m {epoch_secs}s")

    # Save best model
    if val_loss < best_val_loss:
        best_val_loss = val_loss
        MODEL_PATH = "house_price_model.pth"
        torch.save(model.state_dict(), MODEL_PATH)
        print(f"Best model saved at {MODEL_PATH}")


Epoch 1/20 | Train Loss: 633201257910.8572 | Val Loss: 641647099248.6400 | Time: 0m 9s
Best model saved at house_price_model.pth
Epoch 2/20 | Train Loss: 583705935778.3772 | Val Loss: 500135847963.3066 | Time: 0m 8s
Best model saved at house_price_model.pth
Epoch 3/20 | Train Loss: 309945761089.8286 | Val Loss: 182233595630.9333 | Time: 0m 9s
Best model saved at house_price_model.pth
Epoch 4/20 | Train Loss: 163700318488.8686 | Val Loss: 166199872334.5067 | Time: 0m 9s
Best model saved at house_price_model.pth
Epoch 5/20 | Train Loss: 161340520810.7886 | Val Loss: 164836617912.3200 | Time: 0m 8s
Best model saved at house_price_model.pth
Epoch 6/20 | Train Loss: 159280291395.2914 | Val Loss: 161756724428.8000 | Time: 0m 9s
Best model saved at house_price_model.pth
Epoch 7/20 | Train Loss: 156591783584.9143 | Val Loss: 158822953615.3600 | Time: 0m 9s
Best model saved at house_price_model.pth
Epoch 8/20 | Train Loss: 153409927952.0914 | Val Loss: 155040080418.1333 | Time: 0m 9s
Best model

9. Model Evaluation on Test Set

In [53]:

from sklearn.metrics import mean_absolute_error, mean_squared_error
import math

# Load the best saved model
loaded_model = HousePriceModel(tabular_input_dim).to(device)
loaded_model.load_state_dict(torch.load(MODEL_PATH))
loaded_model.eval()

# Store predictions and true values
y_true = []
y_pred = []

with torch.no_grad():
    for imgs, tabs, targets in test_loader:
        imgs, tabs = imgs.to(device), tabs.to(device)
        outputs = loaded_model(tabs, imgs)
        y_true.extend(targets.numpy())
        y_pred.extend(outputs.cpu().numpy().flatten())

# Compute metrics
mae = mean_absolute_error(y_true, y_pred)
rmse = math.sqrt(mean_squared_error(y_true, y_pred))

print(f"Test MAE: {mae:.2f}")
print(f"Test RMSE: {rmse:.2f}")


Test MAE: 232567.48
Test RMSE: 320440.89


10. Save and Load Model for Inference

In [54]:

# 1️⃣ Save the model state dict
MODEL_PATH = "house_price_model.pth"
torch.save(model.state_dict(), MODEL_PATH)
print(f"Model saved at {MODEL_PATH}")

# 2️⃣ Load the model for inference
loaded_model = HousePriceModel(tabular_input_dim).to(device)
loaded_model.load_state_dict(torch.load(MODEL_PATH))
loaded_model.eval()
print("Model loaded and ready for inference.")

# 3️⃣ Test a single sample from test set
sample_img, sample_tab, sample_price = test_ds[0]
sample_img = sample_img.unsqueeze(0).to(device)
sample_tab = sample_tab.unsqueeze(0).to(device)

with torch.no_grad():
    pred_price = loaded_model(sample_tab, sample_img).item()

print(f"True price: {sample_price}, Predicted price: {pred_price:.2f}")


Model saved at house_price_model.pth
Model loaded and ready for inference.
True price: 325000.0, Predicted price: 486212.62


11. Gradio Web App for Multimodal House Price Prediction

In [56]:
import gradio as gr
import torch
from PIL import Image
import numpy as np
from torchvision import transforms
from sklearn.preprocessing import StandardScaler

# Load trained model
loaded_model = HousePriceModel(tabular_input_dim).to(device)
loaded_model.load_state_dict(torch.load("house_price_model.pth"))
loaded_model.eval()

# Image preprocessing
img_transform = transforms.Compose([
    transforms.Resize((128, 128)),
    transforms.ToTensor(),
    transforms.Normalize([0.5], [0.5])
])

# Standard scaler (use the one fitted on training data)
scaler = StandardScaler()
scaler.fit(tabular_scaled)  # from Step 4

# Prediction function
def predict_house_price(bed, bath, sqft, img):
    # Tabular
    tab_features = np.array([[bed, bath, sqft]])
    tab_scaled = torch.tensor(scaler.transform(tab_features), dtype=torch.float32).to(device)

    # Image
    img = img.convert("RGB")
    img_tensor = img_transform(img).unsqueeze(0).to(device)

    # Prediction
    with torch.no_grad():
        pred_price = loaded_model(tab_scaled, img_tensor).item()

    return round(pred_price, 2)

# Gradio interface
with gr.Blocks() as demo:
    gr.Markdown("## 🏡 Multimodal House Price Predictor")

    with gr.Row():
        bed_input = gr.Number(label="Bedrooms", value=3)
        bath_input = gr.Number(label="Bathrooms", value=2)
        sqft_input = gr.Number(label="Square Footage", value=1500)

    img_input = gr.Image(type="pil", label="Upload House Image")

    output = gr.Number(label="Predicted Price ($)")

    submit_btn = gr.Button("Predict")
    submit_btn.click(
        fn=predict_house_price,
        inputs=[bed_input, bath_input, sqft_input, img_input],
        outputs=output
    )

# Launch
demo.launch(share=True)


Colab notebook detected. To show errors in colab notebook, set debug=True in launch()
* Running on public URL: https://adfd0bdf5269d632a7.gradio.live

This share link expires in 1 week. For free permanent hosting and GPU upgrades, run `gradio deploy` from the terminal in the working directory to deploy to Hugging Face Spaces (https://huggingface.co/spaces)


