# üè† House Price Exploration

Fully updated, clean, production-ready notebook.

In [None]:

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.ensemble import GradientBoostingRegressor
from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score

import warnings
warnings.filterwarnings("ignore")
plt.style.use("default")


In [None]:

df = pd.read_csv("../data/kc_house_data_cleaned.csv")
df.head()


In [None]:

print("Shape:", df.shape)
df.info()


In [None]:

df.describe().T


In [None]:

df.isnull().sum()


In [None]:

plt.figure(figsize=(7,4))
sns.histplot(df["price"], bins=50, kde=True)
plt.title("House Price Distribution")
plt.show()


In [None]:

plt.figure(figsize=(14,10))
sns.heatmap(df.corr(), cmap="coolwarm", linewidths=0.3)
plt.title("Feature Correlation Heatmap")
plt.show()


In [None]:

X = df.drop("price", axis=1)
y = df["price"]


In [None]:

X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, random_state=42
)


In [None]:

scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)


In [None]:

model = GradientBoostingRegressor(
    n_estimators=300,
    learning_rate=0.05,
    max_depth=4,
    random_state=42
)

model.fit(X_train_scaled, y_train)


In [None]:

preds = model.predict(X_test_scaled)

print("MAE:", mean_absolute_error(y_test, preds))
print("RMSE:", np.sqrt(mean_squared_error(y_test, preds)))
print("R2:", r2_score(y_test, preds))


In [None]:

import pickle, os
os.makedirs("../models", exist_ok=True)

with open("../models/gradient_boosting.pkl", "wb") as f:
    pickle.dump(model, f)

with open("../models/scaler.pkl", "wb") as f:
    pickle.dump(scaler, f)

print("Model & scaler saved")
