In [1]:
import numpy as np

In [2]:
class DecisionStump:
    def __init__(self):
        self.feature_index = None
        self.threshold = None
        self.alpha = None
        self.polarity = 1
        
    
    def predict(self, X):
        n_samples = X.shape[0]
        feature_column = X[:, self.feature_index]
        
        predictions = np.ones(n_samples)
        if self.polarity == 1:
            predictions[feature_column < self.threshold] = -1
        else:
            predictions[feature_column > self.threshold] = -1
            
        return predictions

In [3]:
class AdaBoost:
    def __init__(self, n_stumps=5):
        self.n_stumps = n_stumps

        
    def fit(self, X, y):
        n_samples, n_features = X.shape

        w = np.full(n_samples, (1/n_samples))
        self.stumps = []

        for _ in range(self.n_stumps):
            decision_stump = DecisionStump()
            min_error = float('inf')
            
            for feature_index in range(n_features):
                feature_column = X[:, feature_index]
                thresholds = np.unique(feature_column)
                
                for threshold in thresholds:
                    pol = 1
                    predictions = np.ones(n_samples)
                    predictions[feature_column < threshold] = -1
                    
                    missclassified_weights = w[y!= predictions]
                    
                    error = sum(missclassified_weights)
                    
                    if error > 0.5:
                        error = 1 - error
                        pol = -1
                        
                    if error < min_error:
                        min_error = error
                        decision_stump.polarity = pol
                        decision_stump.feature_index = feature_index
                        decision_stump.threshold = threshold
                 
                EPSILON = 1e-15
                decision_stump.alpha = 0.5 * np.log(  (1-error) / (error+EPSILON) )
                
                stump_predictions = decision_stump.predict(X)
                w *= np.exp(-decision_stump.alpha*y*stump_predictions)
                w /= sum(w)
                self.stumps.append(decision_stump)
                
    
    def predict(self, X):
        for decision_stump in self.stumps:
            stumps_predictions = [ (stump.alpha * stump.predict(X)) for stump in self.stumps]
            stumps_aggregated_prediction = np.sum(stumps_predictions, axis=0)
            sign_prediction = np.sign(stumps_aggregated_prediction)
            return sign_prediction

In [4]:
from sklearn.datasets import load_breast_cancer
from sklearn.model_selection import train_test_split

In [5]:
dataset = load_breast_cancer()
X, y = dataset.data, dataset.target

In [6]:
y[y == 0] = -1

In [7]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=5)

In [13]:
adaboost_clf = AdaBoost(n_stumps=10)

In [14]:
adaboost_clf.fit(X_train, y_train)

In [15]:
y_predicted = adaboost_clf.predict(X_test)

In [11]:
def accuracy(original, predicted):
    return sum(original == predicted)/len(original)

In [16]:
acc = accuracy(y_test, y_predicted)
print(f'accuracy = {acc*100:.1f}%')

accuracy = 90.4%
