# GradientBoosting Algorithm From Scratch

## Importing Libraries

In [2]:
import numpy as np
import pandas as pd
from sklearn.tree import DecisionTreeRegressor
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, confusion_matrix
from sklearn.ensemble import GradientBoostingClassifier

## The Gradient Boost Classifier

In [3]:
class MyGradientBoostClassifier:
    def __init__(self, n_estimators=100, learning_rate=0.1, max_depth=3):
        self.n_estimators = n_estimators
        self.learning_rate = learning_rate
        self.max_depth = max_depth
        self.binary_classifiers = []

    def fit(self, X, y):
        # Initialize predictions with zeros for each class
        class_labels = np.unique(y)
        predictions = np.zeros((len(y), len(class_labels)))

        for i, label in enumerate(class_labels):
            # Create binary labels for the current class
            binary_labels = (y == label).astype(int)

            # Fit a binary classifier for the current class
            binary_classifier = self.build_binary_classifier(X, binary_labels)
            self.binary_classifiers.append(binary_classifier)

            # Update predictions with the binary classifier's predictions, scaled by the learning rate
            predictions[:, i] += self.learning_rate * binary_classifier.predict(X)

        return predictions

    def build_binary_classifier(self, X, y):
        # Fit a decision tree to the pseudo-residuals
        tree = DecisionTreeRegressor(max_depth=self.max_depth)
        tree.fit(X, y)
        return tree

    def predict_proba(self, X):
        # Make predictions for each binary classifier
        binary_predictions = np.zeros((X.shape[0], len(self.binary_classifiers)))
        for i, binary_classifier in enumerate(self.binary_classifiers):
            binary_predictions[:, i] = binary_classifier.predict(X)

        # Convert binary predictions to probabilities for each class
        probabilities = np.exp(binary_predictions) / np.sum(np.exp(binary_predictions), axis=1, keepdims=True)
        return probabilities

    def predict(self, X):
        # Make predictions by selecting the class with the highest probability
        probabilities = self.predict_proba(X)
        return np.argmax(probabilities, axis=1)

### Evaluating Our Model

In [8]:
iris = pd.read_csv("iris.csv")
iris = iris.drop('Id', axis=1)

X = iris.iloc[:, 0:4]
y = iris['Species']
labels = {item: index for index, item in enumerate(np.unique(y))}
y = y.map(labels)

X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=10, test_size=0.2, shuffle=True)

model = MyGradientBoostClassifier(n_estimators=10)
model.fit(X_train, y_train)
predictions = model.predict(X_test)

# Calculate the accuracy of the model
accuracy = accuracy_score(y_test, predictions)
print("Accuracy:", accuracy)

Accuracy: 0.9666666666666667


## Using GradientBoosting from library

In [9]:
iris = pd.read_csv("iris.csv")  
iris = iris.drop('Id', axis=1)

X = iris.iloc[:, 0:4]
y = iris['Species']
labels = {item: index for index, item in enumerate(np.unique(y))}
y = y.map(labels)

X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=10, test_size=0.2, shuffle=True)

clf = GradientBoostingClassifier(n_estimators=10)
clf.fit(X_train, y_train)
print("Accuracy:", clf.score(X_test, y_test))

Accuracy: 0.9666666666666667
