In [None]:
import pandas as pd
import numpy as np
import joblib

from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn.tree import DecisionTreeRegressor
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score


In [None]:
df = pd.read_csv("/kaggle/input/datasets/yasserh/housing-prices-dataset/Housing.csv")

print("Dataset Loaded Successfully")
print("Shape:", df.shape)

In [None]:

print("\nMissing Values:\n", df.isnull().sum())

df = df.dropna()  



binary_columns = [
    "mainroad",
    "guestroom",
    "basement",
    "airconditioning",
    "prefarea"
]

for col in binary_columns:
    df[col] = df[col].map({"yes": 1, "no": 0})


df = pd.get_dummies(df, columns=["furnishingstatus"], drop_first=True)


print("\nPreprocessing Completed")

In [None]:
features = [
    "area",
    "bedrooms",
    "guestroom",
    "bathrooms",
    "mainroad",
    "prefarea",
    "stories",
    "parking",
    "basement",
    "airconditioning",
    "furnishingstatus_semi-furnished",
    "furnishingstatus_unfurnished"
]

X = df[features]
y = df["price"]

print("\nSelected Features:", list(X.columns))


In [None]:
X_train, X_test, y_train, y_test = train_test_split(
    X, y,
    test_size=0.2,
    random_state=42
)


In [None]:
lr = LinearRegression()
dt = DecisionTreeRegressor(random_state=42)
rf = RandomForestRegressor(
    n_estimators=200,
    random_state=42
)


In [None]:
lr.fit(X_train, y_train)
dt.fit(X_train, y_train)
rf.fit(X_train, y_train)

print("\nModels Trained Successfully")


In [None]:
def evaluate(model, name):
    predictions = model.predict(X_test)

    mae = mean_absolute_error(y_test, predictions)
    rmse = np.sqrt(mean_squared_error(y_test, predictions))
    r2 = r2_score(y_test, predictions)

    print(f"\n{name}")
    print("MAE:", round(mae, 2))
    print("RMSE:", round(rmse, 2))
    print("R2 Score:", round(r2, 4))

    return rmse


rmse_lr = evaluate(lr, "Linear Regression")
rmse_dt = evaluate(dt, "Decision Tree")
rmse_rf = evaluate(rf, "Random Forest")

In [None]:
rmse_scores = {
    "Linear Regression": rmse_lr,
    "Decision Tree": rmse_dt,
    "Random Forest": rmse_rf
}

best_model_name = min(rmse_scores, key=rmse_scores.get)
print("\nBest Model Based on RMSE:", best_model_name)

if best_model_name == "Linear Regression":
    best_model = lr
elif best_model_name == "Decision Tree":
    best_model = dt
else:
    best_model = rf

In [None]:
if best_model_name in ["Decision Tree", "Random Forest"]:
    importance = pd.Series(
        best_model.feature_importances_,
        index=X.columns
    ).sort_values(ascending=False)

    print("\nTop 5 Important Features:")
    print(importance.head(5))

In [None]:
def predict_price(
    area,
    bedrooms,
    guestroom,
    bathrooms,
    mainroad,
    prefarea,
    stories,
    parking,
    basement,
    airconditioning,
    furnishingstatus
):

    semi = 0
    unfurnished = 0

    if furnishingstatus == "semi-furnished":
        semi = 1
    elif furnishingstatus == "unfurnished":
        unfurnished = 1
    # furnished â†’ both 0

    input_data = pd.DataFrame([{
        "area": area,
        "bedrooms": bedrooms,
        "guestroom": guestroom,
        "bathrooms": bathrooms,
        "mainroad": mainroad,
        "prefarea": prefarea,
        "stories": stories,
        "parking": parking,
        "basement": basement,
        "airconditioning": airconditioning,
        "furnishingstatus_semi-furnished": semi,
        "furnishingstatus_unfurnished": unfurnished
    }])

    prediction = best_model.predict(input_data)[0]
    return prediction


In [None]:
example_price = predict_price(
    area=3000,
    bedrooms=3,
    guestroom=0,
    bathrooms=2,
    mainroad=1,
    prefarea=1,
    stories=2,
    parking=1,
    basement=0,
    airconditioning=1,
    furnishingstatus="furnished"
)

print("\nExample Predicted Price:", round(example_price, 2))

In [None]:
joblib.dump(best_model, "best_house_price_model.pkl")
print("\nBest Model Saved Successfully")