In [12]:
from sklearn.tree import DecisionTreeRegressor
import numpy as np

In [13]:
class GradientBoostClassifier:
    def __init__(self, n_estimators=100, learning_rate=0.1, max_depth=3):
        self.n_estimators = n_estimators
        self.learning_rate = learning_rate
        self.max_depth = max_depth
        self.trees = []
        self.initial_prediction = None

    def fit(self, X, y):
        pos_ratio = np.clip(np.mean(y), 1e-5, 1-1e-5)
        self.initial_prediction = np.log(pos_ratio / (1 - pos_ratio))
        F = np.full(y.shape, self.initial_prediction)
        
        for _ in range(self.n_estimators):
            p = 1 / (1 + np.exp(-F))
            residuals = y - p
            tree = DecisionTreeRegressor(max_depth=self.max_depth)
            tree.fit(X, residuals)
            self.trees.append(tree)
            F += self.learning_rate * tree.predict(X)

    def predict_proba(self, X):
        F = np.full((X.shape[0],), self.initial_prediction)
        for tree in self.trees:
            F += self.learning_rate * tree.predict(X)
        p = 1 / (1 + np.exp(-F))
        return np.vstack([1-p, p]).T

    def predict(self, X):
        proba = self.predict_proba(X)[:, 1]
        return (proba >= 0.5).astype(int)


In [14]:
from sklearn.datasets import  make_classification
from sklearn.metrics import  accuracy_score
X_clf, y_clf = make_classification(n_samples=200, n_features=5, n_informative=3, n_redundant=0, random_state=42)

In [17]:
scratch_model = GradientBoostClassifier(n_estimators=100, learning_rate=0.1, max_depth=3)
scratch_model.fit(X_clf, y_clf)
scratch_model_preds = scratch_model.predict(X_clf)
acc_scratch = accuracy_score(y_clf, scratch_model_preds)
print(acc_scratch)

0.975


In [18]:
sklearn_model = GradientBoostingClassifier(n_estimators=100, learning_rate=0.1, max_depth=3, random_state=42)
sklearn_model.fit(X_clf, y_clf)
y_pred_clf_sklearn = sklearn_model.predict(X_clf)
acc_sklearn = accuracy_score(y_clf, y_pred_clf_sklearn)

NameError: name 'GradientBoostingClassifier' is not defined