In [41]:
import numpy as np

In [42]:
class DecisionStump:
    def __init__(self,):
        self.feature=None 
        self.threshold=None 
        self.polarity=None 
        self.alpha=None 

    def predict(self,X):
        n_samples,n_features=X.shape

        X_c=X[:,self.feature]


        prediction=np.ones(shape=(n_samples))

        if self.polarity==1:
            prediction[X_c<self.threshold]=-1
        
        else:
            prediction[X_c>self.threshold]=-1

        return prediction

In [43]:
class AdaBoost:
    def __init__(self,n_clf=5):
        self.n_clf=n_clf 
        self.clfs=[]

    def fit(self,X,y):
        n_samples,n_features=X.shape

        w=np.full(shape=(n_samples),fill_value=(1/n_samples))

        for _ in range(self.n_clf):
            clf=DecisionStump()

            min_error=float('inf')

            for feat in range(n_features):
                X_c=X[:,feat]

                possible_threshold=np.unique(X_c)

                for threshold in possible_threshold:
                    p=1 

                    prediction=np.ones(shape=(n_samples))

                    prediction[X_c<threshold]=-1 

                    missclassified_weight=w[prediction!=y]

                    error=sum(missclassified_weight)

                    if error>0.5:
                        p=-1 
                        error=1-error

                    if error<min_error:
                        min_error=error 
                        clf.feature=feat 
                        clf.threshold=threshold
                        clf.polarity=p 

            EPS=1e-10 
            clf.alpha=0.5 * np.log((1+min_error)/(min_error+EPS))

            y_pred=clf.predict(X)

            w=w * np.exp(-clf.alpha * y * y_pred)

            w=w/sum(w)

            self.clfs.append(clf)

        return self 
    

    def predict(self,X):
        prediction=np.array([clf.alpha * clf.predict(X)  for clf in self.clfs])

        prediction=prediction.sum(axis=0)

        y_pred=np.sign(prediction)

        return y_pred

In [44]:
from sklearn.datasets import load_breast_cancer as d 
from sklearn.model_selection import train_test_split as tts 

def acc(y_,y):
    return np.sum(y_==y)/len(y)

In [45]:
X,y=d().data,d().target

In [46]:
y=np.where(y<=0,-1,1)

In [47]:
X_train,X_test,y_train,y_test=tts(X,y,test_size=0.2,random_state=123)

In [48]:
clf=AdaBoost(n_clf=5)
# [c.alpha for c in  clf.clfs]

clf.fit(X_train,y_train)


<__main__.AdaBoost at 0x1a07c086810>

In [49]:
y_pred=clf.predict(X_test)
acc(y_pred,y_test)

0.9736842105263158

In [50]:
y_pred.shape

(114,)

In [51]:
y_test.shape

(114,)