# Boosting

Boosting is a supervised learning algorithm that uses an ensamble of weak classifiers to predict a label. This is accomplished by successively training classifiers that focus on perfoming well on the subset of the data that the previous ones don't classify correctly.

In [68]:
import numpy as np
from sklearn import datasets
import pandas as pd

import support

In this case, we are building a descrete classifier (labels are restricted to {-1,1}) that uses simple binary trees as weak classifiers.

In [63]:
class DiscreteAdaBoost:
    class __Weak:
        # In Init we are going to create a simple decision tree
        # that minimizes a weighted error
        
        def __init__(self, X, y, w):
            self.prediction = dict()
            cols = X.shape[1]
            
            error = np.array([float('Inf')] * cols)
            isPositive = [False] * cols
            
            for i in range(cols):
                pos = np.ones_like(y)
                neg = -1 * np.ones_like(y)
                
                idx = (X[:,i] == 0)
                pos[idx] = -1
                neg[idx] =  1
                
                pos_error = np.sum((pos != y).astype('float') * w)
                neg_error = np.sum((neg != y).astype('float') * w)
                if pos_error < neg_error:
                    error[i] = pos_error
                    isPositive[i] = True
                else:
                    error[i] = neg_error
            
            self.index = np.argmin(error)
            
            if isPositive[self.index]:
                self.prediction[0] = -1
                self.prediction[1] = 1
            else:
                self.prediction[0] = 1
                self.prediction[1] = -1            
        
        # Prediction value for the Weak classifier
        def Predict(self, X):
            output = [None] * np.shape(X)[0]

            for i in range(np.shape(X)[0]):
                value = X[i, self.index]
                output[i] = self.prediction[value]
            return np.array(output)
    
    def Fit(self, X, y, estimators = 3):
        n = y.shape[0]
        w = np.array([1/n] * n)
        self.F = []
        
        self.__partitions = dict()
        for i in range(X.shape[1]):
            self.__partitions[i] = set(X[:,i])
        Xb = self.__to_binary(X)
        
        for t in range(estimators):
            weak = self.__Weak(Xb,y,w)
            pred_weak = weak.Predict(Xb)
            error = np.sum((pred_weak != y).astype('float') * w)
            
            if error == 0:
                alpha = 5
            else:
                alpha = 0.5 * np.log( (1-error) / error )
                
            self.F.append( (alpha, weak) )
            
            w = w * np.exp(-y * alpha * pred_weak)
            w = w / w.sum()
    
    def Predict(self, X):
        counter = np.array( [0] * np.shape(X)[0])
        Xb = self.__to_binary(X)
        
        for alpha, weak in self.F:
            counter = counter + alpha * weak.Predict(Xb)
        
        predictions = np.ones_like(counter)
        predictions[counter<0] = -1
        
        return predictions, counter / counter.max()
    
        
    def __is_numeric(self, values):
        return values.dtype.kind in set('buifc')
    
    def __to_binary(self, X):
        output = np.empty((X.shape[0], 0))
        
        # Categorize each column
        for i in range(X.shape[1]):            
            # add a column "feature < value", for each value present in
            # that feature
            for value in self.__partitions[i]:
                if self.__is_numeric(value):
                    col = (X[:,i] < value).reshape((-1,1)).astype('int')
                    output = np.hstack( (output, col))
                else:
                    col = (X[:,i] == value).reshape((-1,1)).astype('int')
                    output = np.hstack( (output, col))
                      
        return output

In [65]:
X = [
    [1,1,2,2,1],
    [2,1,1,2,2],
    [2,2,1,1,2],
    [1,2,1,2,1],
    [2,2,2,1,1],
    [1,2,1,1,2],
    [2,1,1,2,1]
]

y = [-1,1,1,-1,-1,1,1]

X = np.array(X)
y = np.array(y)

dab = DiscreteAdaBoost()

for i in range(1,10):
    dab.Fit(X,y, estimators=i)
    pred, probs = dab.Predict(X)
    print(i, "predictors - Training score:", (pred == y).sum() / y.shape[0]) 

1 predictors - Training score: 0.8571428571428571
2 predictors - Training score: 0.8571428571428571
3 predictors - Training score: 1.0
4 predictors - Training score: 1.0
5 predictors - Training score: 1.0
6 predictors - Training score: 1.0
7 predictors - Training score: 1.0
8 predictors - Training score: 1.0
9 predictors - Training score: 1.0


## Test on Iris dataset

In [69]:
iris = datasets.load_iris()

X = iris.data
y = iris.target

print("The dataset has", X.shape[0], "entries with", X.shape[1], "different features and", len(set(y)), "different labels.\n")

print("Dataset Head:")
print(pd.DataFrame(X, columns=iris.feature_names).head())

The dataset has 150 entries with 4 different features and 3 different labels.

Dataset Head:
   sepal length (cm)  sepal width (cm)  petal length (cm)  petal width (cm)
0                5.1               3.5                1.4               0.2
1                4.9               3.0                1.4               0.2
2                4.7               3.2                1.3               0.2
3                4.6               3.1                1.5               0.2
4                5.0               3.6                1.4               0.2


In [75]:
dab = DiscreteAdaBoost()

yd = np.ones_like(y)
yd[y!=2] == -1

for i in range(1,52,5):
    dab.Fit(X,yd,estimators=i)
    pred, probs = dab.Predict(X)
    print(i, "predictors - Training score:", (pred == yd).sum() / yd.shape[0])    



1 predictors - Training score: 1.0
6 predictors - Training score: 1.0
11 predictors - Training score: 1.0
16 predictors - Training score: 1.0
21 predictors - Training score: 1.0
26 predictors - Training score: 1.0
31 predictors - Training score: 1.0
36 predictors - Training score: 1.0
41 predictors - Training score: 1.0
46 predictors - Training score: 1.0
51 predictors - Training score: 1.0


### Reference
[AdaBoost, Wikipedia](https://en.wikipedia.org/wiki/AdaBoost)