In [2]:
import os
import pandas as pd
import numpy as np
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import Dataset, DataLoader
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import mean_absolute_error, mean_squared_error
import random

#Set random seeds for reproducibility
torch.manual_seed(42)
np.random.seed(42)
random.seed(42)

#Generate or Load Dataset

CSV_PATH = 'housing_data.csv'

def create_sample_csv():
    data = {
        'id': list(range(1, 11)),
        'bedrooms': [3, 4, 2, 5, 3, 4, 2, 3, 4, 3],
        'bathrooms': [2, 3, 1, 4, 2, 2.5, 1, 2, 3, 2],
        'sqft': [1500, 2200, 900, 3000, 1600, 2100, 850, 1450, 2300, 1550],
        'price': [350000, 550000, 200000, 750000, 375000, 520000, 190000, 340000, 580000, 360000],
        'image_path': [f'house_{i}.jpg' for i in range(1, 11)]
    }
    df = pd.DataFrame(data)
    df.to_csv(CSV_PATH, index=False)
    print(f"‚úÖ Created {CSV_PATH}")

if not os.path.exists(CSV_PATH):
    create_sample_csv()

df = pd.read_csv(CSV_PATH)
print("üìä Dataset Loaded:")
print(df.head())

#Simulate Image Feature Extraction with CNN

class SimulatedCNNFeatureExtractor:
    def __init__(self, feature_dim=128):
        self.feature_dim = feature_dim

    def extract_features(self, image_paths):
#Simulate CNN features: random vectors (replace with real CNN inference)
        features = np.random.randn(len(image_paths), self.feature_dim).astype(np.float32)
        return features

#Extract "image features" (simulated)
feature_extractor = SimulatedCNNFeatureExtractor(feature_dim=128)
image_features = feature_extractor.extract_features(df['image_path'].tolist())
print(f"üñºÔ∏è  Simulated image features shape: {image_features.shape}")

#Prepare Tabular Features

tabular_features = df[['bedrooms', 'bathrooms', 'sqft']].values.astype(np.float32)
prices = df['price'].values.astype(np.float32).reshape(-1, 1)

#Standardize tabular features
scaler_tabular = StandardScaler()
tabular_features_scaled = scaler_tabular.fit_transform(tabular_features)

#Combine image + tabular features
X_combined = np.concatenate([image_features, tabular_features_scaled], axis=1)
y = prices

print(f"üîó Combined features shape: {X_combined.shape}")

#Create PyTorch Dataset

class HousingDataset(Dataset):
    def __init__(self, features, targets):
        self.features = torch.tensor(features, dtype=torch.float32)
        self.targets = torch.tensor(targets, dtype=torch.float32)

    def __len__(self):
        return len(self.features)

    def __getitem__(self, idx):
        return self.features[idx], self.targets[idx]

#Split data
X_train, X_val, y_train, y_val = train_test_split(X_combined, y, test_size=0.2, random_state=42)

train_dataset = HousingDataset(X_train, y_train)
val_dataset = HousingDataset(X_val, y_val)

train_loader = DataLoader(train_dataset, batch_size=4, shuffle=True)
val_loader = DataLoader(val_dataset, batch_size=4, shuffle=False)

#Define Multimodal Model

class MultimodalHousePriceModel(nn.Module):
    def __init__(self, input_dim, hidden_dim=64):
        super(MultimodalHousePriceModel, self).__init__()
        self.fc1 = nn.Linear(input_dim, hidden_dim)
        self.relu = nn.ReLU()
        self.dropout = nn.Dropout(0.3)
        self.fc2 = nn.Linear(hidden_dim, 32)
        self.fc3 = nn.Linear(32, 1)

    def forward(self, x):
        x = self.fc1(x)
        x = self.relu(x)
        x = self.dropout(x)
        x = self.fc2(x)
        x = self.relu(x)
        x = self.fc3(x)
        return x

#Initialize model
INPUT_DIM = X_combined.shape[1]  #128 (image) + 3 (tabular) = 131
model = MultimodalHousePriceModel(INPUT_DIM)

#Loss and optimizer
criterion = nn.MSELoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)

#Training Loop

EPOCHS = 100
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
model.to(device)

print(f"üöÄ Training on {device}...")

for epoch in range(EPOCHS):
    model.train()
    train_loss = 0.0
    for features, targets in train_loader:
        features, targets = features.to(device), targets.to(device)

        optimizer.zero_grad()
        outputs = model(features)
        loss = criterion(outputs, targets)
        loss.backward()
        optimizer.step()

        train_loss += loss.item()

    if (epoch + 1) % 20 == 0:
        print(f'Epoch [{epoch+1}/{EPOCHS}], Loss: {train_loss/len(train_loader):.4f}')

#Evaluation
model.eval()
y_true = []
y_pred = []

with torch.no_grad():
    for features, targets in val_loader:
        features, targets = features.to(device), targets.to(device)
        outputs = model(features)
        y_true.extend(targets.cpu().numpy())
        y_pred.extend(outputs.cpu().numpy())

y_true = np.array(y_true).flatten()
y_pred = np.array(y_pred).flatten()

mae = mean_absolute_error(y_true, y_pred)
rmse = np.sqrt(mean_squared_error(y_true, y_pred))

print("\n‚úÖ Evaluation Results:")
print(f"MAE: ${mae:,.2f}")
print(f"RMSE: ${rmse:,.2f}")

#Sample Prediction
sample_idx = 0
sample_input = torch.tensor(X_val[sample_idx:sample_idx+1], dtype=torch.float32).to(device)
predicted_price = model(sample_input).item()
actual_price = y_val[sample_idx][0]

print(f"\nüîç Sample Prediction:")
print(f"Actual Price: ${actual_price:,.2f}")
print(f"Predicted Price: ${predicted_price:,.2f}")
print(f"Error: ${abs(actual_price - predicted_price):,.2f}")

‚úÖ Created housing_data.csv
üìä Dataset Loaded:
   id  bedrooms  bathrooms  sqft   price   image_path
0   1         3        2.0  1500  350000  house_1.jpg
1   2         4        3.0  2200  550000  house_2.jpg
2   3         2        1.0   900  200000  house_3.jpg
3   4         5        4.0  3000  750000  house_4.jpg
4   5         3        2.0  1600  375000  house_5.jpg
üñºÔ∏è  Simulated image features shape: (10, 128)
üîó Combined features shape: (10, 131)
üöÄ Training on cpu...
Epoch [20/100], Loss: 177163706368.0000
Epoch [40/100], Loss: 177149689856.0000
Epoch [60/100], Loss: 177085284352.0000
Epoch [80/100], Loss: 176939966464.0000
Epoch [100/100], Loss: 176655818752.0000

‚úÖ Evaluation Results:
MAE: $564,958.19
RMSE: $565,156.81

üîç Sample Prediction:
Actual Price: $580,000.00
Predicted Price: $60.11
Error: $579,939.89
