Task 3: Multimodal ML – Housing Price Prediction Using Images + Tabular
Data
Objective:
Predict housing prices using both structured data and house images.

In [1]:
#Step 1: Setup and Imports
!pip install pandas numpy scikit-learn matplotlib tensorflow opencv-python




In [2]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import cv2
import os
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import mean_absolute_error, mean_squared_error
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Input, Dense, Conv2D, Flatten, MaxPooling2D, Concatenate
from tensorflow.keras.optimizers import Adam


In [3]:
#Step 2: Load Tabular Data (CSV)
df = pd.read_csv("/content/Housing.csv")
df.head()


Unnamed: 0,price,area,bedrooms,bathrooms,stories,mainroad,guestroom,basement,hotwaterheating,airconditioning,parking,prefarea,furnishingstatus
0,13300000,7420,4,2,3,yes,no,no,no,yes,2,yes,furnished
1,12250000,8960,4,4,4,yes,no,no,no,yes,3,no,furnished
2,12250000,9960,3,2,2,yes,no,yes,no,no,2,yes,semi-furnished
3,12215000,7500,4,2,2,yes,no,yes,no,yes,3,yes,furnished
4,11410000,7420,4,1,2,yes,yes,yes,no,yes,2,no,furnished


In [5]:
#Step 3: Load Image Data
def load_images(df, image_folder, image_size=(64, 64)):
    images = []
    # Create a dummy image folder for demonstration purposes
    if not os.path.exists(image_folder):
        os.makedirs(image_folder)
        # Create dummy image files
        for idx in df.index:
            dummy_img = np.zeros((*image_size, 3), dtype=np.uint8)
            cv2.imwrite(os.path.join(image_folder, f"house_{idx}.jpg"), dummy_img)

    for idx in df.index: # Iterate through the dataframe index
        img_path = os.path.join(image_folder, f"house_{idx}.jpg")
        if os.path.exists(img_path):
            img = cv2.imread(img_path)
            img = cv2.resize(img, image_size)
            img = img / 255.0
            images.append(img)
        else:
            images.append(np.zeros((*image_size, 3)))  # blank image if missing
    return np.array(images)

image_data = load_images(df, "images")

In [7]:
#Step 4: Preprocess Tabular Data
# Identify categorical and numerical columns (excluding 'price' as it's the target)
categorical_cols = df.select_dtypes(include='object').columns.tolist()
numerical_cols = df.select_dtypes(include=np.number).columns.tolist()
numerical_cols.remove('price') # Exclude the target variable

# Apply one-hot encoding to categorical features
tabular_features = pd.get_dummies(df[categorical_cols + numerical_cols], columns=categorical_cols, drop_first=True)

# Scale numerical features
scaler = StandardScaler()
tabular_data = scaler.fit_transform(tabular_features)

# Target variable
target = df['price'].values

print("Shape of processed tabular data:", tabular_data.shape)
print("Shape of target data:", target.shape)

Shape of processed tabular data: (545, 13)
Shape of target data: (545,)


In [8]:
#Step 5: Train/Test Split
X_tab_train, X_tab_test, X_img_train, X_img_test, y_train, y_test = train_test_split(
    tabular_data, image_data, target, test_size=0.2, random_state=42)


In [9]:
#Step 6: Build the CNN (Image Model)
#python
#Copy
#Edit
image_input = Input(shape=(64, 64, 3))
x = Conv2D(32, (3, 3), activation="relu")(image_input)
x = MaxPooling2D((2, 2))(x)
x = Conv2D(64, (3, 3), activation="relu")(x)
x = MaxPooling2D((2, 2))(x)
x = Flatten()(x)
x = Dense(64, activation="relu")(x)


In [10]:
# step 7 Tabular Input Model
tabular_input = Input(shape=(X_tab_train.shape[1],))
y = Dense(64, activation="relu")(tabular_input)



In [11]:
#Step 8: Combine (Feature Fusion)
combined = Concatenate()([x, y])
z = Dense(64, activation="relu")(combined)
z = Dense(1)(z)

model = Model(inputs=[image_input, tabular_input], outputs=z)
model.compile(optimizer=Adam(learning_rate=0.001), loss='mse')
model.summary()


In [12]:
#Step 9: Train the Model
model.fit([X_img_train, X_tab_train], y_train, validation_split=0.1, epochs=10, batch_size=32)


Epoch 1/10
[1m13/13[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m8s[0m 265ms/step - loss: 25444363534336.0000 - val_loss: 22728002240512.0000
Epoch 2/10
[1m13/13[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 14ms/step - loss: 26310541836288.0000 - val_loss: 22727991754752.0000
Epoch 3/10
[1m13/13[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 10ms/step - loss: 25521062674432.0000 - val_loss: 22727974977536.0000
Epoch 4/10
[1m13/13[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 9ms/step - loss: 24002816901120.0000 - val_loss: 22727951908864.0000
Epoch 5/10
[1m13/13[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 9ms/step - loss: 25932161089536.0000 - val_loss: 22727916257280.0000
Epoch 6/10
[1m13/13[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 9ms/step - loss: 24809173942272.0000 - val_loss: 22727868022784.0000
Epoch 7/10
[1m13/13[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 12ms/step - loss: 26725169758208.0000 - val_loss: 2272780

<keras.src.callbacks.history.History at 0x7a928c7713d0>

In [13]:
#Step 10: Evaluate the Model
y_pred = model.predict([X_img_test, X_tab_test])

mae = mean_absolute_error(y_test, y_pred)
rmse = np.sqrt(mean_squared_error(y_test, y_pred))

print(f"MAE: {mae:.2f}")
print(f"RMSE: {rmse:.2f}")



[1m4/4[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 199ms/step
MAE: 5007481.00
RMSE: 5489019.59


Summary
Step	Task
1	Install libraries and import
2	Load tabular data
3	Load and resize images
4	Normalize tabular features
5	Split train/test
6–8	Build image & tabular submodels, then combine
9	Train the model
10	Evaluate with MAE and RMSE

