In [1]:
import numpy as np
import pandas as pd
from sklearn.datasets import load_iris
from sklearn.model_selection  import train_test_split
import matplotlib.pyplot as plt

In [2]:
# data
def create_data():
    iris = load_iris()
    df = pd.DataFrame(iris.data, columns=iris.feature_names)
    df['label'] = iris.target
    df.columns = ['sepal length', 'sepal width', 'petal length', 'petal width', 'label']
    data = np.array(df.iloc[:100, [0, 1, -1]])
    for i in range(len(data)):
        if data[i,-1] == 0:
            data[i,-1] = -1
    # print(data)
    return data[:,:2], data[:,-1]

In [3]:
X, y = create_data()
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2)

In [66]:
class Adaboosting:
    def __init__(self,lr=0.2):
        self.lr=lr
        
    def _em(self,ypred):
        tmp=np.array([0 if ypred[i]==self.y[i] else 1 for i in range(self.m)])
        return np.sum(np.multiply(self.D,tmp))
        
    def _predict(self,x,axis,threshold,direct):
#         print(x)
#         print('axis: '+str(axis))
#         print(x[:,axis])
        
        if direct=='p':
            return np.array([1 if i >= threshold else -1 for i in x[:,axis] ])
        if direct=='n':
            return np.array([-1 if i >= threshold else 1 for i in x[:,axis] ])
        
    def fit(self,x,y):
        self.x=np.array(x)
        self.y=np.array(y)
        self.m,self.n=self.x.shape
        self.D=np.ones(self.m)/self.m
        self.funList=[]
        
        minError=np.inf
        minTree=None
        
#         print('D:'+str(self.D))
#         print('D:'+str(self.D))
        
        while minError>0.01:
            for axis in range(self.n):
                minv=min(self.x[:,axis])
                maxv=max(self.x[:,axis])
                
                for threshold in np.arange(minv,maxv,self.lr):
                    ## direct = p
                    ypred=self._predict(self.x,axis,threshold,'p')
                    error=self._em(ypred)
#                     print('Error: '+str(error))
                    
                    if minError>error:
                        minError=error
                        minTree=(axis,threshold,'p')
#                         print('p em :'+str(minError))
#                         print('p minTree :'+str(minTree))
                    ## direct = n    
                    ypred=self._predict(self.x,axis,threshold,'n')
                    error=self._em(ypred)
                    
                    if minError>error:
                        minError=error
                        minTree=(axis,threshold,'n')
#                         print('n em :'+str(minError))
#                         print('n minTree :'+str(minTree))
                    
            alpha=0.5*np.log((1-minError)/minError)
            self.funList.append((alpha,minTree))
            gm=self._predict(self.x,minTree[0],minTree[1],minTree[2])
            eta=np.multiply(-alpha*self.y,gm)
            z=np.sum(np.multiply(self.D,np.exp(eta)))
            self.D=np.multiply(self.D,np.exp(eta))/z
            
            
    
    def predict(self,x):
        sumv=0
        for f in self.funList:
            falpha=f[0]
            ftree=f[1]
            sumv+=falpha*self._predict(x,ftree[0],ftree[1],ftree[2])
        print(sumv)
        return [1 if sumv[i] >0 else -1 for i in range(len(x))]
    
    def pred_score(self,y_pred,y_true):
        count=0
        l=len(y_pred)
        for i in range(l):
            if y_pred[i]==y_true[i]:
                count+=1
        return count/float(l)
        

In [67]:
dataSet = np.array([[0, 1, 3], [0, 3, 1], [1, 2, 2], [1, 1, 3], [1, 2, 3], [0, 1, 2], [1, 1, 2], [1, 1, 1], [1, 3, 1], [0, 2, 1]])    #p153的例子
labels = np.array([-1, -1, -1, -1, -1, -1, 1, 1, -1, -1])
adaboost = Adaboosting()
adaboost.fit(dataSet, labels)
print(adaboost.predict(np.array([1, 3, 2]).reshape(1,-1)))

[-5.94728824]
[-1]


In [55]:
dataSet[:,2]

array([3, 1, 2, 3, 3, 2, 2, 1, 1, 1])

In [70]:
adaboost2 = Adaboosting(lr=0.5)
adaboost2.fit(X_train, y_train)
print(adaboost2.predict(X_test))
adaboost2.pred_score(adaboost2.predict(X_test),y_test)

[  0.08163916  -7.18320255   8.75832327 -13.02157786   4.34489375
  -1.46895825   4.34489375  13.02157786  13.02157786  -7.30733356
   8.75832327  -7.30733356   0.08163916  -7.30733356  -1.46895825
  -7.18320255  13.02157786  -7.18320255  -1.46895825   8.75832327]
[1, -1, 1, -1, 1, -1, 1, 1, 1, -1, 1, -1, 1, -1, -1, -1, 1, -1, -1, 1]
[  0.08163916  -7.18320255   8.75832327 -13.02157786   4.34489375
  -1.46895825   4.34489375  13.02157786  13.02157786  -7.30733356
   8.75832327  -7.30733356   0.08163916  -7.30733356  -1.46895825
  -7.18320255  13.02157786  -7.18320255  -1.46895825   8.75832327]


0.95

In [64]:
y_test

array([-1., -1.,  1., -1.,  1., -1.,  1.,  1.,  1., -1.,  1., -1.,  1.,
       -1., -1., -1.,  1., -1., -1.,  1.])

In [73]:
from sklearn.ensemble import AdaBoostClassifier

In [74]:
clf=AdaBoostClassifier(n_estimators=100,learning_rate=0.5)
clf.fit(X_train,y_train)
clf.score(X_test,y_test)

1.0