In [1]:
#import libraries
import pandas as pd
import joblib
import numpy as np
from sklearn.ensemble import GradientBoostingRegressor
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score

In [2]:
#load preprocessed data
df_preprocessed = pd.read_csv("../house_prices_processed.csv")

X = df_preprocessed.drop("Price (in rupees)", axis=1)
y = df_preprocessed["Price (in rupees)"]

In [3]:
#load preprocessor
preprocessor = joblib.load('../models/preprocessor.pkl')

In [4]:
#Split the data into train and test sets
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, random_state=42
)
print(f"Training set: {X_train.shape}")
print(f"Testing set: {X_test.shape}")

Training set: (141668, 13)
Testing set: (35418, 13)


In [5]:
# Transform the features using the preprocessor
X_train_transformed = preprocessor.fit_transform(X_train)
X_test_transformed = preprocessor.transform(X_test)

print(f"Transformed training features: {X_train_transformed.shape}")
print(f"Transformed testing features: {X_test_transformed.shape}")

Transformed training features: (141668, 125)
Transformed testing features: (35418, 125)


In [6]:
# Standard Gradient Boosting
gb_standard = GradientBoostingRegressor(
            n_estimators=100,
            learning_rate=0.1,
            max_depth=6,
            min_samples_split=20,
            min_samples_leaf=10,
            random_state=42
        )
gb_standard.fit(X_train_transformed, y_train)
print("Gradient Boosting training completed!")


Gradient Boosting training completed!


In [7]:
# Make predictions
y_pred_gb_train = gb_standard.predict(X_train_transformed)
y_pred_gb = gb_standard.predict(X_test_transformed)

In [8]:
# Evaluate gradient boosting
gb_standard_mae_train = mean_absolute_error(y_train, y_pred_gb_train)
gb_standard_mse_train = mean_squared_error(y_train, y_pred_gb_train)
gb_standard_rmse_train = np.sqrt(gb_standard_mse_train)
gb_standard_r2_train = r2_score(y_train, y_pred_gb_train)

print("Standard Gradient Boosting Train Results:")
print(f"Train MAE: {gb_standard_mae_train:.2f}")
print(f"Train MSE: {gb_standard_mse_train:.2f}")
print(f"Train RMSE: {gb_standard_rmse_train:.2f}")
print(f"Train R² Score: {gb_standard_r2_train:.4f}")

gb_standard_mae = mean_absolute_error(y_test, y_pred_gb)
gb_standard_mse = mean_squared_error(y_test, y_pred_gb)
gb_standard_rmse = np.sqrt(gb_standard_mse)
gb_standard_r2 = r2_score(y_test, y_pred_gb)

print("\nStandard Gradient Boosting Test Results:")
print(f"Test MAE: {gb_standard_mae:.2f}")
print(f"Test MSE: {gb_standard_mse:.2f}")
print(f"Test RMSE: {gb_standard_rmse:.2f}")
print(f"Test R² Score: {gb_standard_r2:.4f}")


Standard Gradient Boosting Train Results:
Train MAE: 921.27
Train MSE: 1761204.13
Train RMSE: 1327.10
Train R² Score: 0.7729

Standard Gradient Boosting Test Results:
Test MAE: 932.84
Test MSE: 1817183.89
Test RMSE: 1348.03
Test R² Score: 0.7660


In [9]:
# Save Gradient Boosting model
joblib.dump(gb_standard, '../models/gradient_boosting_model.pkl')
print("\nGradient Boosting model saved as 'gradient_boosting_model.pkl'")


Gradient Boosting model saved as 'gradient_boosting_model.pkl'
