In [3]:
import numpy as np

In [10]:
class DecisionStump:
    def __init__(self):
        self.feature_index=None
        self.threshold=None
        self.polarity=None
        self.alpha=None
    
    def predict(self,X):
        n_samples=X.shape[0]
        X_=X[:,self.feature_index]

        predictions=np.ones(n_samples)
        if(self.polarity==1):
            predictions[X_<self.threshold]=-1
        else:
            predictions[X_>self.threshold]=-1
        return predictions

In [11]:
class AdaBoost:
    def __init__(self,n_stumps=5):
        self.n_stumps=n_stumps
        self.stumps=[]

    def fit(self,X,y):
        n_samples,n_features=X.shape

        w=np.full(n_samples,(1/n_samples))

        for _ in range(self.n_stumps):
            min_error=float('inf')
            stump=DecisionStump()

            for feature in range(n_features):
                X_=X[:,feature]
                thresholds=np.unique(X_)

                for threshold in thresholds:
                    for polarity in [1,-1]:
                        predictions=np.ones(n_samples)
                        if(polarity==1):
                            predictions[X_<threshold]=-1
                        else:
                            predictions[X_>threshold]=-1
                        
                        error=np.sum(w*(predictions!=y))
                        if(error<min_error):
                            min_error=error
                            stump.feature_index=feature
                            stump.polarity=polarity
                            stump.threshold=threshold

            stump.alpha = 0.5 * np.log((1 - min_error) / (min_error + 1e-10))


            w*=np.exp(-stump.alpha)
            w/=np.sum(w)

            self.stumps.append(stump)

    def predict(self,X):
        stump_preds=[ stump.alpha *stump.predict(X) for stump in self.stumps]
        preds=np.sign(np.sum(stump_preds,axis=0))
        return preds

In [12]:
from sklearn.datasets import make_classification
X, y = make_classification(n_samples=1000, n_features=6, n_informative=2, n_redundant=0, random_state=42)
y = np.where(y == 0, -1, 1)  # AdaBoost requires labels -1 and 1

model = AdaBoost(n_stumps=10)
model.fit(X, y)
y_pred = model.predict(X)

accuracy = np.mean(y_pred == y)
print("Accuracy:", accuracy)


Accuracy: 0.875


In [13]:
from sklearn.ensemble import AdaBoostClassifier
from sklearn.metrics import accuracy_score

sklearn_clf = AdaBoostClassifier(n_estimators=10, random_state=42)
sklearn_clf.fit(X, y)  # sklearn uses 0/1 labels
y_pred_sklearn = sklearn_clf.predict(X)
acc_sklearn = accuracy_score(y, y_pred_sklearn)
print("sklearn AdaBoost Accuracy:", acc_sklearn)

sklearn AdaBoost Accuracy: 0.875
