In [12]:
import numpy as np
import pandas as pd
from sklearn.datasets import fetch_california_housing
from sklearn.model_selection import train_test_split, cross_val_score, KFold
from sklearn.ensemble import GradientBoostingRegressor
from sklearn.tree import DecisionTreeRegressor
from sklearn.metrics import mean_squared_error, r2_score
import time

In [13]:
housing = fetch_california_housing()
X, y = housing.data, housing.target
feature_names = housing.feature_names

In [None]:
class CustomGradientBoostingRegressor:
    def __init__(self, n_estimators=100, learning_rate=0.1, max_depth=3, random_state=None):
        self.n_estimators = n_estimators
        self.learning_rate = learning_rate
        self.max_depth = max_depth
        self.random_state = random_state
        self.trees = []
        self.initial_prediction = None
    
    def fit(self, X, y):
        self.initial_prediction = np.mean(y)
        F = np.full(len(y), self.initial_prediction)
        
        for i in range(self.n_estimators):
            residuals = y - F #mse
            
            tree = DecisionTreeRegressor(max_depth=self.max_depth, random_state=self.random_state)
            tree.fit(X, residuals)
            
            self.trees.append(tree)
            
            predictions = tree.predict(X)
            F += self.learning_rate * predictions
            
        return self
    
    def predict(self, X):
        predictions = np.full(X.shape[0], self.initial_prediction)
        
        for tree in self.trees:
            predictions += self.learning_rate * tree.predict(X)
            
        return predictions

def evaluate_model(model, X, y, cv=5):
    kf = KFold(n_splits=cv, shuffle=True, random_state=42)
    cv_scores = []
    
    for train_idx, test_idx in kf.split(X):
        X_train, X_test = X[train_idx], X[test_idx]
        y_train, y_test = y[train_idx], y[test_idx]
        
        model.fit(X_train, y_train)
        
        y_pred = model.predict(X_test)
        mse = mean_squared_error(y_test, y_pred)
        cv_scores.append(mse)
    
    return np.sqrt(np.mean(cv_scores))

In [15]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [16]:
n_estimators = 100
learning_rate = 0.1
max_depth = 3

In [17]:
start_time = time.time()
custom_gb = CustomGradientBoostingRegressor(
    n_estimators=n_estimators, 
    learning_rate=learning_rate, 
    max_depth=max_depth,
    random_state=42
)
custom_gb.fit(X_train, y_train)
custom_train_time = time.time() - start_time
print(f"Custom model training time: {custom_train_time:.2f} seconds")

Custom model training time: 6.07 seconds


In [18]:
start_time = time.time()
sklearn_gb = GradientBoostingRegressor(
    n_estimators=n_estimators, 
    learning_rate=learning_rate, 
    max_depth=max_depth,
    random_state=42
)
sklearn_gb.fit(X_train, y_train)
sklearn_train_time = time.time() - start_time
print(f"Scikit-learn model training time: {sklearn_train_time:.2f} seconds")

Scikit-learn model training time: 5.56 seconds


In [19]:
custom_y_pred = custom_gb.predict(X_test)
sklearn_y_pred = sklearn_gb.predict(X_test)

custom_rmse = np.sqrt(mean_squared_error(y_test, custom_y_pred))
sklearn_rmse = np.sqrt(mean_squared_error(y_test, sklearn_y_pred))

custom_r2 = r2_score(y_test, custom_y_pred)
sklearn_r2 = r2_score(y_test, sklearn_y_pred)

In [20]:
custom_cv_rmse = evaluate_model(custom_gb, X, y)
sklearn_cv_rmse = evaluate_model(sklearn_gb, X, y)


In [21]:
print(f"{'Metric':<25}{'Custom Implementation':<25}{'Scikit-learn':<25}")
print(f"{'Training Time (seconds)':<25}{custom_train_time:<25.2f}{sklearn_train_time:<25.2f}")
print(f"{'Test RMSE':<25}{custom_rmse:<25.4f}{sklearn_rmse:<25.4f}")
print(f"{'Test R²':<25}{custom_r2:<25.4f}{sklearn_r2:<25.4f}")
print(f"{'Cross-validation RMSE':<25}{custom_cv_rmse:<25.4f}{sklearn_cv_rmse:<25.4f}")


Metric                   Custom Implementation    Scikit-learn             
Training Time (seconds)  6.07                     5.56                     
Test RMSE                0.5422                   0.5422                   
Test R²                  0.7757                   0.7756                   
Cross-validation RMSE    3.3080                   0.5316                   
