## AdaBoost

### A series of trees that learn sequentially to predict better.

This model uses a series of weights which determine how difficult a datapoint is to classify. These weights are updated for the weights associated with the errors and effect the training of all future trees. The thinking behind this is, if the trees get better over time. Through the relative weighting of the trees based on how good they are, we will get better results than any individual tree.

[The psuedocode of AdaBoost](./images/The-pseudocode-of-the-AdaBoost-algorithm.png)


## Implementation


In [1]:
import numpy as np

In [185]:
class AdaBoost():
    def __init__(self,X:np.ndarray,y:np.ndarray,num_trees=1,num_splits=10):
        '''
        AdaBoost:
        Parameters:
            X: An array of numerical factors
            y: The responding variable, can be categorical or continuous
            num_trees: The number of estimators which are voting to form a prediction
            max_depth: The maximum number of nodes from root to leaves in each estimator
            num_splits: The number of splits to be tested for each factor at each node
        '''
        self.trees = []
        self.__is_continuous__ = self.__is_continuous__(y)
        self.weights=np.ones(shape=(X.shape[0])) / len(X)
        self.alphas = []
        for tree in range(num_trees):
            new_tree=self.__DecisionTreeClassifier__(X,y,5,num_splits)
            curr_x = np.multiply(X.T,self.weights).T
            current_predictions = new_tree.predict(curr_x)

            errors = (np.array(current_predictions) != np.array(y))
            print(errors.shape)
            err = self.weights[errors].sum()/self.weights.sum()

            alpha = np.log2((1-err)/(err+0.00001))

            self.alphas.append(alpha)

            self.weights[errors]*=np.exp(alpha)

            self.trees.append(new_tree)
        self.alphas = np.array(self.alphas)
    def predict(self,X):
        '''
        Predict:
        Parameters:
            X: An array of numerical factors
            
        Returns:
            A series of predictions, the continuity of predictions depends on the labels the model was trained on
        '''
        predictions = []
        for tree in self.trees:
            predictions.append(tree.predict(X))
        predictions = np.dot(np.array(predictions).T,self.alphas) 
        return np.array(predictions).sum()>0.5
    class __DecisionTreeClassifier__():
        def __init__(self,X:np.ndarray,y:np.ndarray,max_depth,num_splits):
            '''
            Decision Tree Classifier:
            Parameters:
                X: An array of numerical factors
                y: The responding variable, must be categorical
                max_depth: The maximum number of nodes from root to leaves in each estimator
                num_splits: The number of splits to be tested for each factor at each node
            '''
            self.tree = self.Node_(X,y,self,num_splits,max_depth)
        def predict(self,X):
            '''
            Predict:
            Predicts using the established tree

            Parameters:
                X: An array of numerical factors

            Returns:
                An array of predictions.
            '''
            total_predictions=[]
            for x in X:
                node = self.tree
                while node.prediction_value is None:
                    #decend tree
                    if x[node.split_characteristic] <= node.split: node = node.left
                    else: node = node.right
                total_predictions.append(node.prediction_value)
            return total_predictions
        class Node_():
            def __init__(self, X,y,tree, num_splits, max_depth,current_depth=0):
                self.left = None
                self.right = None

                self.tree = tree
                #calculate entropy
                classes = np.unique(y)
                if len(classes) == 1:
                    self.prediction_value = classes[0]
                    return #early stop
                p_classes = []
                for class_ in classes:
                    p_class = np.sum(y==class_)/len(y)
                    p_classes.append(p_class)

                self.entropy = -np.sum(np.array([p_class*np.log2(p_class) for p_class in p_classes]))


                info_gains = [] # used to find best split
                total_splits = []

                for column_index in range(X.shape[1]):
                    curr_column = X[:,column_index]

                    #pick 10 random potential split points

                    random_splits = np.random.random_sample(num_splits,)*(curr_column.max()-curr_column.min())\
                                    + curr_column.min()
                    total_splits = np.concatenate([total_splits, random_splits],axis=0)

                    # decide on best split using information gain
                    for split in random_splits:

                        y_lower = y[curr_column<=split]
                        y_higher = y[curr_column>split]
                        lower_p_classes = []
                        higher_p_classes = []
                        #find entropy of each split
                        for class_ in classes:
                            lower_p_class = np.sum(y_lower==class_)/len(y_lower)
                            lower_p_classes.append(lower_p_class)    
                            higher_p_class = np.sum(y_higher==class_)/len(y_higher)
                            higher_p_classes.append(higher_p_class)    
                        lower_entropy = -np.sum(np.array([p_class*np.log2(p_class) for p_class in lower_p_classes]))
                        higher_entropy = -np.sum(np.array([p_class*np.log2(p_class) for p_class in higher_p_classes]))

                        info_gains.append(self.entropy -  higher_entropy - lower_entropy)
                # split using best splitpoint
                arg_max = np.argmax(np.array(info_gains))
                self.split_characteristic = arg_max // len(random_splits)
                final_split = total_splits[arg_max]
                self.split = final_split
                # also split X and y
                final_X_lower = X[X[:,self.split_characteristic]<=self.split, :]
                final_X_higher = X[X[:,self.split_characteristic]>self.split, :]
                final_y_lower = y[X[:,self.split_characteristic]<=self.split]
                final_y_higher = y[X[:,self.split_characteristic]>self.split]
                #assign children
                if current_depth<max_depth:
                    self.left = self.tree.Node_(X=final_X_lower,\
                                                y=final_y_lower,\
                                                tree=self.tree,\
                                                num_splits=num_splits,\
                                                max_depth=max_depth,\
                                                current_depth=current_depth+1)
                    self.right = self.tree.Node_(X=final_X_higher,\
                                                y=final_y_higher,\
                                                tree=self.tree,\
                                                num_splits=num_splits,\
                                                max_depth=max_depth,\
                                                current_depth=current_depth+1)
                    self.prediction_value = None
                #asign property to be predicted
                else:
                    self.prediction_value = classes[np.argmax(np.array(p_classes))]


    def __is_continuous__(self,x):

        if type(x[0]) in [np.float64,np.float,np.float128,np.float16,np.float32]: return True
        # Enough ints that we can consider them continuous
        elif type(x[0]) == np.int64 and len(np.unique(x[0])) > 10: return True
        else: return False

In [186]:
from sklearn.datasets import load_breast_cancer
from sklearn.model_selection import train_test_split
dataset = load_breast_cancer()

In [187]:
data = dataset['data']
target = dataset['target']

data_train, data_test, target_train, target_test = train_test_split(data, target, test_size=0.2, random_state=42)

In [188]:
ADA =AdaBoost(data_train,target_train,num_trees=50,num_splits=10)



(455,)
(455,)
(455,)
(455,)
(455,)
(455,)
(455,)
(455,)
(455,)
(455,)
(455,)
(455,)
(455,)
(455,)
(455,)
(455,)
(455,)
(455,)
(455,)
(455,)
(455,)
(455,)
(455,)
(455,)
(455,)
(455,)
(455,)
(455,)
(455,)
(455,)
(455,)
(455,)
(455,)
(455,)
(455,)
(455,)
(455,)
(455,)
(455,)
(455,)
(455,)
(455,)
(455,)
(455,)
(455,)
(455,)
(455,)
(455,)
(455,)
(455,)


In [189]:
predictions = ADA.predict(data_test)

In [190]:
ADA.alphas

array([ 7.58953059e-01, -3.35971771e-01,  1.48728473e-01, -6.58391472e-02,
        2.91457259e-02, -1.29022455e-02,  5.71157440e-03, -2.52840317e-03,
        1.11927509e-03, -4.95481379e-04,  2.19340002e-04, -9.70975665e-05,
        4.29832104e-05, -1.90278339e-05,  8.42325321e-06, -3.72881090e-06,
        1.65067230e-06, -7.30720627e-07,  3.23475855e-07, -1.43196490e-07,
        6.33903104e-08, -2.80616609e-08,  1.24223523e-08, -5.49913505e-09,
        2.43436068e-09, -1.07764405e-09,  4.77052032e-10, -2.11181888e-10,
        9.34862363e-11, -4.13849068e-11,  1.83203962e-11, -8.11059539e-12,
        3.59072077e-12, -1.58921989e-12,  7.03792803e-13, -3.12334084e-13,
        1.38388025e-13, -6.02244183e-14,  2.59477547e-14, -1.15323354e-14,
        4.80513976e-15, -2.88308385e-15,  2.88308385e-15, -2.88308385e-15,
        2.88308385e-15, -2.88308385e-15,  2.88308385e-15, -2.88308385e-15,
        2.88308385e-15, -2.88308385e-15])

In [191]:
for tree in ADA.trees:
    predictions = tree.predict(data_test)
    TP = np.array([prediction == target for prediction,target in zip(predictions,target_test)])
    print(TP.sum())

86
89
86
82
87
85
87
86
86
80
89
88
89
86
90
87
86
88
86
86
83
90
86
85
86
80
83
87
73
76
86
80
88
83
95
82
87
85
89
89
86
87
88
90
90
83
72
85
87
89


In [192]:
#print recall as this is critical for medical tests

TP = np.array([prediction == target for prediction,target in zip(predictions,target_test)])
FN = np.array([ target != prediction  for prediction,target in zip(predictions,target_test)])


print(TP.sum()/(FN.sum()+TP.sum()))


0.7807017543859649


Not the best, but it's great for combining 5 decision stumps!

In [137]:
predictions[:5]

[1, 0, 1, 1, 1]

In [138]:
target[:5]

array([0, 0, 0, 0, 0])