In [2]:
#islamabad base
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import r2_score, mean_absolute_error, mean_squared_error
from sklearn.preprocessing import OneHotEncoder
from sklearn.compose import ColumnTransformer
from sklearn.pipeline import Pipeline
import joblib



In [3]:

from google.colab import files
uploaded = files.upload()
df = pd.read_csv("FINAL_DATASET.csv")

df.dropna(subset=["PRICE", "AREA"], inplace=True)


X = df.drop(columns=["PRICE"])
y = df["PRICE"]


cat_cols = ["TYPE", "PURPOSE", "LOCATION"]
num_cols = ["AREA", "BUILD IN YEAR", "BEDROOMS", "BATHROOMS", "PARKING SPACES"]


Saving FINAL_DATASET.csv to FINAL_DATASET.csv


In [4]:

preprocessor = ColumnTransformer(transformers=[
    ("cat", OneHotEncoder(handle_unknown="ignore"), cat_cols),
], remainder='passthrough')


model = Pipeline(steps=[
    ("preprocess", preprocessor),
    ("regressor", RandomForestRegressor(n_estimators=100, random_state=42))
])


X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)


model.fit(X_train, y_train)


y_train_pred = model.predict(X_train)
y_test_pred = model.predict(X_test)





In [5]:

def evaluate_model(y_true, y_pred, dataset_name):
    print(f"\nðŸ“Š {dataset_name.upper()} PERFORMANCE")
    print("-" * 40)
    print("RÂ² Score:", r2_score(y_true, y_pred))
    print("MAE     :", mean_absolute_error(y_true, y_pred))
    print("MSE     :", mean_squared_error(y_true, y_pred))
    print("RMSE    :", np.sqrt(mean_squared_error(y_true, y_pred)))

In [7]:

evaluate_model(y_train, y_train_pred, "Training")
evaluate_model(y_test, y_test_pred, "Testing")


joblib.dump((model, preprocessor.get_feature_names_out()), "clean_price_model.pkl")


ðŸ“Š TRAINING PERFORMANCE
----------------------------------------
RÂ² Score: 0.9875269473672326
MAE     : 3314203.857500221
MSE     : 132664857509944.1
RMSE    : 11518023.159811066

ðŸ“Š TESTING PERFORMANCE
----------------------------------------
RÂ² Score: 0.9374220265797671
MAE     : 7480792.517099898
MSE     : 709943950929383.8
RMSE    : 26644773.426122125


['clean_price_model.pkl']