# Libraries

In [32]:
!pip install torch torchvision pandas numpy scikit-learn tqdm




Defaulting to user installation because normal site-packages is not writeable
Defaulting to user installation because normal site-packages is not writeable






In [35]:
import os
import pandas as pd
import numpy as np
from tqdm import tqdm

from PIL import Image
import torch
import torch.nn as nn
from torchvision import models, transforms

from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_absolute_error, mean_squared_error


# Load CSV data

In [44]:

df = pd.read_csv("House_prediction.csv")


df.head()


Unnamed: 0,image_id,street,citi,n_citi,bed,bath,sqft,price
0,0,1317 Van Buren Avenue,"Salton City, CA",317,3,2.0,1560,201900
1,1,124 C Street W,"Brawley, CA",48,3,2.0,713,228500
2,2,2304 Clark Road,"Imperial, CA",152,3,1.0,800,273950
3,3,755 Brawley Avenue,"Brawley, CA",48,3,1.0,1082,350000
4,4,2207 R Carrillo Court,"Calexico, CA",55,4,3.0,2547,385100


# Define image preprocessing and load pretrained model

In [48]:
# Define preprocessing: 

preprocess = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406],
                         std=[0.229, 0.224, 0.225])
])

# Load pretrained ResNet50
resnet = models.resnet50(pretrained=True)

# Remove the last classification layer to get features
feature_extractor = nn.Sequential(*list(resnet.children())[:-1])
feature_extractor.eval()  # set to eval mode


Downloading: "https://download.pytorch.org/models/resnet50-0676ba61.pth" to C:\Users\ASAD SHAH/.cache\torch\hub\checkpoints\resnet50-0676ba61.pth
100%|██████████| 97.8M/97.8M [09:55<00:00, 172kB/s] 


Sequential(
  (0): Conv2d(3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
  (1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (2): ReLU(inplace=True)
  (3): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
  (4): Sequential(
    (0): Bottleneck(
      (conv1): Conv2d(64, 64, kernel_size=(1, 1), stride=(1, 1), bias=False)
      (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn2): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (conv3): Conv2d(64, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)
      (bn3): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (relu): ReLU(inplace=True)
      (downsample): Sequential(
        (0): Conv2d(64, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)


# Extract image features

In [53]:
# Store features here
image_features = []

# Folder where images are stored
image_folder = "House_images"

for img_id in tqdm(df['image_id']):
    img_path = os.path.join(image_folder, f"{img_id}.jpg")
    
    if os.path.exists(img_path):
        img = Image.open(img_path).convert('RGB')
        img_tensor = preprocess(img).unsqueeze(0)  # add batch dimension
        
        with torch.no_grad():
            feat = feature_extractor(img_tensor).squeeze().numpy()
            image_features.append(feat)
    else:
        print(f"Image not found: {img_path}")
        # fill with zeros if missing
        image_features.append(np.zeros(2048))


100%|██████████| 15474/15474 [44:31<00:00,  5.79it/s] 


# Prepare tabular features

In [58]:

tabular_cols = ['n_citi', 'bed', 'bath', 'sqft']
tabular_features = df[tabular_cols].values

# Standardize tabular data
scaler = StandardScaler()
tabular_features_scaled = scaler.fit_transform(tabular_features)

# Convert image features list to numpy array
image_features_np = np.array(image_features)


# Combine features

In [63]:
# Concatenate tabular + image features
X = np.hstack([tabular_features_scaled, image_features_np])

# Target
y = df['price'].values


# Train-test split

In [66]:
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, random_state=42)


# Train regression model

In [69]:
model = LinearRegression()
model.fit(X_train, y_train)


# Evaluate

In [72]:
y_pred = model.predict(X_test)

mae = mean_absolute_error(y_test, y_pred)
rmse = np.sqrt(mean_squared_error(y_test, y_pred))

print(f"MAE: {mae:.2f}")
print(f"RMSE: {rmse:.2f}")


MAE: 223528.22
RMSE: 301313.95
