In [164]:
from sklearn.datasets import load_breast_cancer
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.tree import DecisionTreeClassifier
import numpy as np
from sklearn.metrics import accuracy_score, f1_score

In [165]:
cancer = load_breast_cancer()
data, target = cancer.data, cancer.target

In [166]:
X_train, X_test, y_train, y_test = train_test_split(data, target, test_size=0.2, random_state=42)

print(f"Shape of X_train: {X_train.shape}, y_train: {y_train.shape}")
print(f"Shape of X_test: {X_test.shape}, y_test: {y_test.shape}")

scale = StandardScaler()
X_train = scale.fit_transform(X_train)
X_test = scale.transform(X_test)

Shape of X_train: (455, 30), y_train: (455,)
Shape of X_test: (114, 30), y_test: (114,)


In [167]:
class AdaBoost:
    def __init__(self, M):
        self.M = M
        self.alphas = []
        self.models = []

    def fit(self, X, y):
        w = np.full(X.shape[0], 1 / X.shape[0])
        y = np.where(y == 0, -1, 1)
        for m in range(self.M):
            tree = DecisionTreeClassifier(max_depth=1)
            tree.fit(X, y, sample_weight=w)
            self.models.append(tree)
            predictions = tree.predict(X)

            miss_indexes = np.where(predictions != y)
            eps = np.sum(w[miss_indexes]) / np.sum(w)
            alpha = np.log((1 - eps) / (eps + 1e-10))
            self.alphas.append(alpha)
            w = w * np.exp(-self.alphas[-1] * y * predictions)
            w = w / np.sum(w)

    def predict(self, x):
        final = np.zeros(x.shape[0])
        for alpha, model in zip(self.alphas, self.models):
            preds = model.predict(x)
            final += alpha * preds

        return np.where(final >= 0, 1, -1)


In [168]:
from sklearn.model_selection import cross_val_score, StratifiedKFold
from sklearn.ensemble import AdaBoostClassifier
scores=[]
for i in range(1,101):
    cv=StratifiedKFold(n_splits=5,shuffle=True,random_state=True)
    ada = AdaBoostClassifier(n_estimators=i, random_state=0)
    score=cross_val_score(ada,X_train,y_train,cv=cv,scoring='accuracy')
    scores.append(score.mean())

best_stimator=np.argmax(scores)

In [169]:
y_test = np.where(y_test == 0, -1, 1)

In [170]:
ada = AdaBoost(best_stimator)
ada.fit(X_train, y_train)
preds = ada.predict(X_test)

print(f"Sklearn Adaboost Accuracy: {accuracy_score(y_test, preds):.3f}")
print(f"Sklearn Adaboost F1-Score: {f1_score(y_test, preds, average='weighted'):.3f}")

Sklearn Adaboost Accuracy: 0.912
Sklearn Adaboost F1-Score: 0.913


In [172]:
adaboost = AdaBoostClassifier(n_estimators=best_stimator, random_state=0)
adaboost.fit(X_train, y_train)
prediction = ada.predict(X_test)

print(f"Sklearn Adaboost Accuracy: {accuracy_score(y_test, prediction):.3f}")
print(f"Sklearn Adaboost F1-Score: {f1_score(y_test, prediction, average='weighted'):.3f}")

Sklearn Adaboost Accuracy: 0.912
Sklearn Adaboost F1-Score: 0.913
