In [1]:
import numpy as np
from sklearn.tree import DecisionTreeClassifier
from sklearn.metrics import accuracy_score
from sklearn.datasets import load_iris
from sklearn.model_selection import train_test_split

class AdaBoost:
    def __init__(self, n_estimators=50):
        self.n_estimators = n_estimators
        self.alphas = []
        self.models = []

    def fit(self, X, y):
        n_samples, n_features = X.shape
        # Initialize weights
        w = np.full(n_samples, (1 / n_samples))

        for _ in range(self.n_estimators):
            # Train weak learner
            model = DecisionTreeClassifier(max_depth=1, max_leaf_nodes=2)
            model.fit(X, y, sample_weight=w)
            predictions = model.predict(X)

            # Compute error and alpha
            error = np.sum(w * (predictions != y)) / np.sum(w)
            alpha = 0.5 * np.log((1 - error) / (error + 1e-10))

            # Update weights
            w *= np.exp(-alpha * y * predictions)
            w /= np.sum(w)  # Normalize to one

            # Save model and alpha
            self.models.append(model)
            self.alphas.append(alpha)

    def predict(self, X):
        # Aggregate predictions
        model_preds = np.array([model.predict(X) for model in self.models])
        return np.sign(np.dot(self.alphas, model_preds))

In [6]:
# Example usage:
# Load dataset
iris = load_iris()
X, y = iris.data, iris.target

# Convert to binary classification problem
X, y = X[y != 2], y[y != 2]
y = np.where(y == 0, -1, 1)

# Split dataset into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Train AdaBoost model
ada = AdaBoost(n_estimators=50)
ada.fit(X_train, y_train)

# Predict and evaluate
y_pred = ada.predict(X_test)
accuracy = accuracy_score(y_test, y_pred)
print(f"Accuracy: {accuracy:.4f}")

Accuracy: 1.0000
