In [16]:
import pandas as pd 
import numpy as np

from sklearn.model_selection import train_test_split 
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import r2_score , mean_squared_error , f1_score
from sklearn.ensemble import RandomForestRegressor

import matplotlib.pyplot as plt


In [17]:
df = pd.read_csv("houseprice.csv")

In [18]:
numerical_val = ["yr" , "period" , "index_nsa" , "index_sa"  , "bedrooms" , "bathrooms" ,"stories" ,"parking" ]

x = df[numerical_val]
y = df["price"]

In [19]:
x_train , x_test , y_train , y_test = train_test_split(x , y , test_size = 0.2 , random_state = 42)

In [20]:
ran_for = RandomForestRegressor(n_estimators = 300 , max_depth = None , min_samples_split = 2 , random_state = 42 , n_jobs = -1)
ran_for.fit(x_train , y_train)

In [22]:
# Evaluate Performance 
y_pred_rf = ran_for.predict(x_test)

r2_rf = r2_score(y_test , y_pred_rf)
rmse_rf = mean_squared_error(y_test , y_pred_rf )

In [23]:
print(f"Random Forest R**2 : {r2_rf : .4f}")
print(f"Random Forest RMSE : {rmse_rf : .2f}")

Random Forest R**2 :  0.5542
Random Forest RMSE :  1346278682315.06


In [24]:
results = pd.DataFrame({
    "Model": ["Linear Regression", "Decision Tree", "Random Forest"],
    "R²": [0.72, 0.78, r2_rf]
})

results


Unnamed: 0,Model,R²
0,Linear Regression,0.72
1,Decision Tree,0.78
2,Random Forest,0.554235


In [None]:
importances = ran_for.feature_importances_
feature_names = x.columns

fi = pd.Series(importances, index=feature_names).sort_values(ascending=False)
plt.figure(figsize=(20, 6))
fi.head(15).plot(kind="bar")
plt.title("Top 15 Feature Importances (Random Forest)")
plt.ylabel("Importance")
plt.tight_layout()
plt.show()

In [27]:
pip install xgboost

Collecting xgboostNote: you may need to restart the kernel to use updated packages.

  Downloading xgboost-3.2.0-py3-none-win_amd64.whl.metadata (2.1 kB)
Downloading xgboost-3.2.0-py3-none-win_amd64.whl (101.7 MB)
   ---------------------------------------- 0.0/101.7 MB ? eta -:--:--
   ---------------------------------------- 0.5/101.7 MB 3.3 MB/s eta 0:00:31
    --------------------------------------- 1.3/101.7 MB 3.0 MB/s eta 0:00:34
    --------------------------------------- 1.8/101.7 MB 2.8 MB/s eta 0:00:36
    --------------------------------------- 2.4/101.7 MB 2.7 MB/s eta 0:00:38
   - -------------------------------------- 2.6/101.7 MB 2.5 MB/s eta 0:00:39
   - -------------------------------------- 3.1/101.7 MB 2.5 MB/s eta 0:00:40
   - -------------------------------------- 3.7/101.7 MB 2.5 MB/s eta 0:00:40
   - -------------------------------------- 4.2/101.7 MB 2.5 MB/s eta 0:00:39
   - -------------------------------------- 5.0/101.7 MB 2.5 MB/s eta 0:00:39
   -- -------

In [28]:
from xgboost import XGBRegressor

In [30]:
xgb = XGBRegressor(
    n_estimators=500,
    learning_rate=0.05,
    max_depth=6,
    subsample=0.8,
    colsample_bytree=0.8,
    random_state=42
)

xgb.fit(x_train, y_train)

In [32]:
y_pred_xgb = xgb.predict(x_test)

r2_xgb = r2_score(y_test, y_pred_xgb)
rmse_xgb = mean_squared_error(y_test, y_pred_xgb)

print(f"XGBoost R²: {r2_xgb:.4f}")
print(f"XGBoost RMSE: {rmse_xgb:.2f}")


XGBoost R²: 0.5940
XGBoost RMSE: 1226332438528.00
