In [1]:
import numpy as np
from sklearn.tree import DecisionTreeRegressor

In [20]:
class GradientBoostingRegressorScratch:
    def __init__(self,n_estimators=100,learning_rate=0.1,max_depth=3):
        self.n_estimators=n_estimators
        self.learning_rate = learning_rate
        self.max_depth = max_depth
        self.trees = []
        self.initial_prediction = None

    def fit(self,X,y):
        self.initial_prediction=np.mean(y)
        y_pred=np.full(y.shape,self.initial_prediction)
        for _ in range(self.n_estimators):
            residuals = y - y_pred
            tree = DecisionTreeRegressor(max_depth=self.max_depth)
            tree.fit(X, residuals)
            self.trees.append(tree)
            y_pred += self.learning_rate * tree.predict(X)

    def predict(self, X):
        y_pred = np.full((X.shape[0],), self.initial_prediction)
        for tree in self.trees:
            y_pred += self.learning_rate * tree.predict(X)
        return y_pred


In [26]:
from sklearn.datasets import make_regression, make_classification
from sklearn.metrics import mean_squared_error, accuracy_score

# Regression
X_reg, y_reg = make_regression(n_samples=200, n_features=5, noise=10, random_state=42)
model = GradientBoostingRegressorScratch(n_estimators=100, learning_rate=0.1,max_depth=5)
model.fit(X_reg, y_reg)
y_pred_reg = model.predict(X_reg)
print("Scratch Model Regression MSE:", mean_squared_error(y_reg, y_pred_reg))

Scratch Model Regression MSE: 0.9482766643357254


In [25]:
from sklearn.ensemble import GradientBoostingRegressor
model_sklearn = GradientBoostingRegressor(n_estimators=100, learning_rate=0.1, max_depth=5, random_state=42)
model_sklearn.fit(X_reg, y_reg)
y_pred_reg_sklearn = model_sklearn.predict(X_reg)
print("Scikit learn model :",mean_squared_error(y_reg, y_pred_reg_sklearn))

Scikit learn model : 0.9482766643357259
