## Supervised Learning - Classification - AdaBoost


Modify the AdaBoost scratch code in our lecture such that:
- Notice that if <code>err</code> = 0, then $\alpha$ will be undefined, thus attempt to fix this by adding some very small value to the lower term
- Notice that sklearn version of AdaBoost has a parameter <code>learning_rate</code>.  This is in fact the $\frac{1}{2}$ in front of the $\alpha$ calculation.  Attempt to change this $\frac{1}{2}$ into a parameter called <code>eta</code>, and try different values of it and see whether accuracy is improved.  Note that sklearn default this value to 1.
- Observe that we are actually using sklearn DecisionTreeClassifier.  If we take a look at it closely, it is actually using weighted gini index, instead of weighted errors that we learn above.  Attempt to write your own class of <code>class Stump</code> that actually uses weighted errors, instead of weighted gini index.   To check whether your stump really works, it should give you still relatively the same accuracy.  In addition, if you do not change y to -1, it will result in very bad accuracy.  Unlike sklearn version of DecisionTree, it will STILL work even y is not change to -1 since it uses gini index
- Put everything into a class

#### st122645

In [1]:
import numpy as np
import matplotlib.pyplot as plt
from sklearn.metrics import accuracy_score
from sklearn.tree import DecisionTreeClassifier
from sklearn.metrics import classification_report
from sklearn.model_selection import train_test_split
from sklearn.datasets import make_moons
from sklearn.datasets import make_classification


In [2]:
X, y = make_classification(n_samples=500, random_state=1)
y = np.where(y==0,-1,1)  #change our y to be -1 if it is 0, otherwise 1

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)

In [3]:
# X, y = make_classification(n_samples=500, random_state=1)
# y = np.where(y==0,-1,1)  

# X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)

# class AdaBoost:
#     def __init__(self, stump_params, S = 20, W = 0, eta = 0.5):
#         self.eta = eta
#         self.W = W
#         self.stump_params = stump_params
#         self.S = S
#         self.models = [DecisionTreeClassifier(**stump_params) for _ in range(S)]
#         self.a_js = np.zeros(S)
        
    
#     def fit(self,X_train,y_train,X_test,y_test):
#         m = X_train.shape[0]
#         self.W = np.full(m, 1/m)    
#         for j, model in enumerate(self.models):
#             model.fit(X_train, y_train, sample_weight = self.W)
#             yhat = model.predict(X_train) 
#             err = self.W[(yhat != y_train)].sum()
#             a_j = np.log ((1 - err) / err) * self.eta
#             self.a_js[j] = a_j
#             self.W = (self.W * np.exp(-a_j * y_train * yhat)) 
#             self.W = self.W / sum (self.W)
    
#     def predict(self,y_test):
#         Hx = 0
#         for i, model in enumerate(self.models):
#             yhat = model.predict(X_test)
#             Hx += self.a_js[i] * yhat
    
#         yhat = np.sign(Hx)
#         print(classification_report(y_test, yhat))

#         clf = AdaBoost({'max_depth':1,'max_leaf_nodes':2})
# clf.fit(X_train,y_train,X_test,y_test)
# clf.predict(y_test)

In [4]:
class Stump:
    def __init__(self):
        self.polarity = 1
        self.feature_index = None
        self.threshold = None
        self.alpha = None

In [82]:
class AdaBoost:
    def __init__(self, S = 20, W = 0, eta = 0.5):
        self.eta = eta
        self.W = W
        self.S = S
        self.models = []
        self.a_js = np.zeros(S)
        
    
    def fit(self, X, y):
        m,n = X.shape #[0]
        self.W = np.full(m, 1/m)    
        
        for i in range(self.S):
            clf = Stump()
            min_error = np.inf
            
            for feature in range(n):
                feature_values = np.expand_dims(X[:,feature], axis =1)
                thresholds = np.unique(feature_values)
                
                for threshold in thresholds:
                    p = 1
                    prediction = np.ones(np.shape(y))
                    prediction[X[:, feature] < threshold] = -1
                    error = sum(self.W[y != prediction])
                    
                    if error > 0.5:
                        error = 1 - error 
                        p = -1
                    if error < min_error:
                        clf.polarity = p
                        clf.threshold = threshold
                        clf.feature_index = feature
                        min_error = error
                
            clf.alpha = self.eta * np.log((1.0 - min_error)/(min_error + 1e-10))
            predictions = np.ones(np.shape(y))
            negative_idx = (clf.polarity * X[:, clf.feature_index] < clf.polarity * clf.threshold)
                
            predictions[negative_idx] = -1
            self.W *= np.exp(-clf.alpha * y * predictions)
            self.W /= np.sum(self.W)
            self.models.append(clf)
            
    def predict(self,X): 
        m, n = X.shape
        yhat = np.zeros(m)
        for clf in self.models:
            pred = np.ones(m)
            pred[clf.polarity * X[:, clf.feature_index] < clf.polarity * clf.threshold] = -1
            yhat += clf.alpha * pred
            
        return np.sign(yhat)

In [83]:
model = AdaBoost(S=20) #({'max_depth':1,'max_leaf_nodes':2})
model.fit(X_train,y_train)
yhat = model.predict(X_test)
print(classification_report(y_test, yhat))

              precision    recall  f1-score   support

          -1       0.93      0.96      0.94        79
           1       0.96      0.92      0.94        71

    accuracy                           0.94       150
   macro avg       0.94      0.94      0.94       150
weighted avg       0.94      0.94      0.94       150

