In [1]:
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.tree import DecisionTreeRegressor
from sklearn.metrics import accuracy_score

In [2]:
url = "https://raw.githubusercontent.com/jbrownlee/Datasets/master/pima-indians-diabetes.data.csv"
columns = ["Pregnancies", "Glucose", "BloodPressure", "SkinThickness", "Insulin",
           "BMI", "DiabetesPedigreeFunction", "Age", "Outcome"]
df = pd.read_csv(url, names=columns)

In [3]:
X = df.drop("Outcome", axis=1).values
y = df["Outcome"].values

In [4]:
# --- Split into Train/Test ---
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [5]:
# --- Custom Gradient Boosting Classifier ---
class GradientBoostingClassifier:
    def __init__(self, n_estimators=100, learning_rate=0.1, max_depth=3):
        self.n_estimators = n_estimators
        self.learning_rate = learning_rate
        self.max_depth = max_depth
        self.trees = []

    def fit(self, X, y):
        y = np.where(y == 1, 1.0, -1.0)
        self.initial_pred = 0.0
        residuals = y.astype(float)

        for _ in range(self.n_estimators):
            tree = DecisionTreeRegressor(max_depth=self.max_depth)
            tree.fit(X, residuals)
            predictions = tree.predict(X)
            residuals -= self.learning_rate * predictions
            self.trees.append(tree)

    def predict_proba(self, X):
        pred = np.full(X.shape[0], self.initial_pred, dtype=float)
        for tree in self.trees:
            pred += self.learning_rate * tree.predict(X)
        return self._sigmoid(pred)
    
    def predict(self, X):
        return (self.predict_proba(X) > 0.5).astype(int)
    
    def _sigmoid(self, x):
        return 1 / (1 + np.exp(-x))

In [6]:
# --- Train and Evaluate ---
custom_model = GradientBoostingClassifier(n_estimators=100, learning_rate=0.1, max_depth=3)
custom_model.fit(X_train, y_train)
y_pred_custom = custom_model.predict(X_test)

In [7]:
accuracy_custom = accuracy_score(y_test, y_pred_custom)
print(f"Custom Gradient Boosting Accuracy: {accuracy_custom:.4f}")

Custom Gradient Boosting Accuracy: 0.7597
