 Multimodal ML – Housing Price Prediction

Libraries For Machine Learning

In [3]:
import numpy as np
import pandas as pd
import tensorflow as tf
from tensorflow.keras import layers, models, Input, Model
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_absolute_error, mean_squared_error

2025-09-06 14:34:09.455213: I tensorflow/core/platform/cpu_feature_guard.cc:210] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: AVX2 FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.


read the dataset

In [8]:
df=pd.read_csv("Housing.csv")
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 545 entries, 0 to 544
Data columns (total 13 columns):
 #   Column            Non-Null Count  Dtype 
---  ------            --------------  ----- 
 0   price             545 non-null    int64 
 1   area              545 non-null    int64 
 2   bedrooms          545 non-null    int64 
 3   bathrooms         545 non-null    int64 
 4   stories           545 non-null    int64 
 5   mainroad          545 non-null    object
 6   guestroom         545 non-null    object
 7   basement          545 non-null    object
 8   hotwaterheating   545 non-null    object
 9   airconditioning   545 non-null    object
 10  parking           545 non-null    int64 
 11  prefarea          545 non-null    object
 12  furnishingstatus  545 non-null    object
dtypes: int64(6), object(7)
memory usage: 55.5+ KB


features & target

In [9]:
y = df["price"].values
X_tab = df.drop(columns=["price"])


 Preprocess Tabular Features

In [11]:
from sklearn.compose import ColumnTransformer
from sklearn.preprocessing import StandardScaler, OneHotEncoder
categorical_cols = X_tab.select_dtypes(include=["object"]).columns
numeric_cols = X_tab.select_dtypes(exclude=["object"]).columns

print("Categorical features:", categorical_cols.tolist())
print("Numeric features:", numeric_cols.tolist())

preprocessor = ColumnTransformer(
    transformers=[
        ("num", StandardScaler(), numeric_cols),
        ("cat", OneHotEncoder(handle_unknown="ignore"), categorical_cols)
    ]
)

X_tab_transformed = preprocessor.fit_transform(X_tab)


Categorical features: ['mainroad', 'guestroom', 'basement', 'hotwaterheating', 'airconditioning', 'prefarea', 'furnishingstatus']
Numeric features: ['area', 'bedrooms', 'bathrooms', 'stories', 'parking']


Train-Test Split



In [12]:
X_tab_train, X_tab_test, y_train, y_test = train_test_split(
    X_tab_transformed, y, test_size=0.2, random_state=42
)

Build Model (Dense NN for Tabular)

In [13]:
tab_input = Input(shape=(X_tab_train.shape[1],))
x = layers.Dense(128, activation="relu")(tab_input)
x = layers.Dense(64, activation="relu")(x)
x = layers.Dense(32, activation="relu")(x)
output = layers.Dense(1, activation="linear")(x)

model = Model(inputs=tab_input, outputs=output)
model.compile(optimizer="adam", loss="mse", metrics=["mae"])
model.summary()

Train Model

In [14]:
history = model.fit(
    X_tab_train, y_train,
    validation_split=0.2,
    epochs=20,
    batch_size=32
)

Epoch 1/20
[1m11/11[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 69ms/step - loss: 25349389811712.0000 - mae: 4727421.5000 - val_loss: 24781592199168.0000 - val_mae: 4623896.5000
Epoch 2/20
[1m11/11[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 23ms/step - loss: 25349373034496.0000 - mae: 4727420.0000 - val_loss: 24781571227648.0000 - val_mae: 4623894.5000
Epoch 3/20
[1m11/11[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 21ms/step - loss: 25349341577216.0000 - mae: 4727416.5000 - val_loss: 24781522993152.0000 - val_mae: 4623889.0000
Epoch 4/20
[1m11/11[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 22ms/step - loss: 25349268176896.0000 - mae: 4727409.0000 - val_loss: 24781411844096.0000 - val_mae: 4623878.0000
Epoch 5/20
[1m11/11[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 23ms/step - loss: 25349106696192.0000 - mae: 4727392.5000 - val_loss: 24781187448832.0000 - val_mae: 4623854.5000
Epoch 6/20
[1m11/11[0m [32m━━━━━━━━━━━━━━━━━━━━[0m

Evaluate Model

In [16]:
# Step 7: Model Evaluation

from sklearn.metrics import mean_absolute_error, mean_squared_error
import numpy as np
y_pred = model.predict(X_tab_test).ravel()
mae = mean_absolute_error(y_test, y_pred)
mse = mean_squared_error(y_test, y_pred)
rmse = np.sqrt(mse)

print("Model Evaluation Results:")
print(f"Mean Absolute Error (MAE): {mae:.2f}")
print(f"Root Mean Squared Error (RMSE): {rmse:.2f}")


[1m4/4[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 24ms/step
Model Evaluation Results:
Mean Absolute Error (MAE): 4984760.50
Root Mean Squared Error (RMSE): 5466991.67


In [17]:
import joblib
joblib.dump(preprocessor, "preprocessor.pkl")
model.save("housing_model.h5")

print(" Model and preprocessing pipeline saved successfully!")




 Model and preprocessing pipeline saved successfully!


In [None]:
preprocessor = joblib.load("preprocessor.pkl")
#load model
from tensorflow.keras.models import load_model
model = load_model("housing_model.h5")
