In [44]:
import pandas as pd
import numpy as np

from sklearn.model_selection import train_test_split
from sklearn.preprocessing import PolynomialFeatures
from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score

from sklearn.linear_model import LinearRegression
from sklearn.neighbors import KNeighborsRegressor
from sklearn.tree import DecisionTreeRegressor





In [45]:
url = "https://raw.githubusercontent.com/softwareWCU/Machine-Learning-Regression-Models-using-House-Price-Dataset/main/Housing%20Price.csv"
df = pd.read_csv(url)
print("Shape:", df.shape)
print("\nFirst 5 rows:")
print(df.head())




Shape: (545, 13)

First 5 rows:
      price  area  bedrooms  bathrooms  stories mainroad guestroom basement  \
0  13300000  7420         4          2        3      yes        no       no   
1  12250000  8960         4          4        4      yes        no       no   
2  12250000  9960         3          2        2      yes        no      yes   
3  12215000  7500         4          2        2      yes        no      yes   
4  11410000  7420         4          1        2      yes       yes      yes   

  hotwaterheating airconditioning  parking prefarea furnishingstatus  
0              no             yes        2      yes        furnished  
1              no             yes        3       no        furnished  
2              no              no        2      yes   semi-furnished  
3              no             yes        3      yes        furnished  
4              no             yes        2       no        furnished  


In [46]:
y = df["price"]
X = df.drop(columns=["price"])

X = pd.get_dummies(X, drop_first=True)

print("Features shape:", X.shape)
print("Target shape:", y.shape)




Features shape: (545, 13)
Target shape: (545,)


In [47]:
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, random_state=42
)
print("Training samples:", X_train.shape[0])
print("Testing samples (UNSEEN):", X_test.shape[0])


Training samples: 436
Testing samples (UNSEEN): 109


In [48]:
lr = LinearRegression()
lr.fit(X_train, y_train)

y_pred_lr = lr.predict(X_test)

print("MAE :", mean_absolute_error(y_test, y_pred_lr))
print("MSE :", mean_squared_error(y_test, y_pred_lr))
print("RMSE:", np.sqrt(mean_squared_error(y_test, y_pred_lr)))
print("R2  :", r2_score(y_test, y_pred_lr))




MAE : 970043.4039201636
MSE : 1754318687330.6638
RMSE: 1324506.9600914386
R2  : 0.6529242642153184


In [49]:
mlr = LinearRegression()
mlr.fit(X_train, y_train)

y_pred_mlr = mlr.predict(X_test)

print("MAE :", mean_absolute_error(y_test, y_pred_mlr))
print("MSE :", mean_squared_error(y_test, y_pred_mlr))
print("RMSE:", np.sqrt(mean_squared_error(y_test, y_pred_mlr)))
print("R2  :", r2_score(y_test, y_pred_mlr))



MAE : 970043.4039201636
MSE : 1754318687330.6638
RMSE: 1324506.9600914386
R2  : 0.6529242642153184


In [50]:
poly = PolynomialFeatures(degree=2)

X_train_poly = poly.fit_transform(X_train)
X_test_poly = poly.transform(X_test)

pr = LinearRegression()
pr.fit(X_train_poly, y_train)

y_pred_pr = pr.predict(X_test_poly)

print("MAE :", mean_absolute_error(y_test, y_pred_pr))
print("MSE :", mean_squared_error(y_test, y_pred_pr))
print("RMSE:", np.sqrt(mean_squared_error(y_test, y_pred_pr)))
print("R2  :", r2_score(y_test, y_pred_pr))



MAE : 1042927.6352819109
MSE : 1916484377491.022
RMSE: 1384371.473807165
R2  : 0.6208412814380686


In [51]:
knn = KNeighborsRegressor(n_neighbors=5)
knn.fit(X_train, y_train)

y_pred_knn = knn.predict(X_test)
print("MAE :", mean_absolute_error(y_test, y_pred_knn))
print("MSE :", mean_squared_error(y_test, y_pred_knn))
print("RMSE:", np.sqrt(mean_squared_error(y_test, y_pred_knn)))
print("R2  :", r2_score(y_test, y_pred_knn))




MAE : 1296547.7064220184
MSE : 3213839804128.4404
RMSE: 1792718.5512869665
R2  : 0.36417150272211063


In [52]:
dt = DecisionTreeRegressor(random_state=42)
dt.fit(X_train, y_train)

y_pred_dt = dt.predict(X_test)

print("MAE :", mean_absolute_error(y_test, y_pred_dt))
print("MSE :", mean_squared_error(y_test, y_pred_dt))
print("RMSE:", np.sqrt(mean_squared_error(y_test, y_pred_dt)))
print("R2  :", r2_score(y_test, y_pred_dt))




MAE : 1195266.0550458715
MSE : 2642802637614.6787
RMSE: 1625669.904259373
R2  : 0.4771459275854347


In [53]:
print("\n✅ UNSEEN DATASET PREDICTIONS (FIRST 10)")
print(y_pred_dt[:10])





✅ UNSEEN DATASET PREDICTIONS (FIRST 10)
[5600000. 7840000. 3850000. 4935000. 2660000. 2730000. 6090000. 4893000.
 2100000. 2940000.]


In [54]:
print("\n✅ ALL MODELS EXECUTED SUCCESSFULLY")
print("Test set used as unseen dataset")



✅ ALL MODELS EXECUTED SUCCESSFULLY
Test set used as unseen dataset
