In [132]:
import numpy as np
import pandas as pd
from sklearn.datasets import load_iris
from sklearn.model_selection  import train_test_split
import matplotlib.pyplot as plt

In [133]:
# data
def create_data():
    iris = load_iris()
    df = pd.DataFrame(iris.data, columns=iris.feature_names)
    df['label'] = iris.target
    df.columns = ['sepal length', 'sepal width', 'petal length', 'petal width', 'label']
    data = np.array(df.iloc[:100, [0, 1, -1]])
    for i in range(len(data)):
        if data[i,-1] == 0:
            data[i,-1] = -1
    # print(data)
    return data[:,:2], data[:,-1]

In [134]:
X, y = create_data()
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2)

In [135]:
class SingleDecisionTree:
    def __init__(self,axis=0,threshold=0,flag=True):
        self.axis=axis
        self.threshold=threshold
        self.flag=flag
        
    def predict(self,x):
        if self.flag==True:
            return -1 if x[self.axis]>=self.threshold else 1
        else :
            return 1 if x[self.axis]>=self.threshold else -1
        
    def predictArr(self,dataSet):
        result=list()
        for x in dataSet:
            if self.flag==True:
                result.append(-1 if x[self.axis]>=self.threshold else 1)
            else:
                result.append(1 if x[self.axis]>=self.threshold else -1)
        return result

In [139]:
class Adaboost:
    def train(self,dataSet,labels):
        N,M=np.array(dataSet).shape
        self.funList=list()
        D=np.ones((N,1))/float(N)
        
        L=0.5
        minError=np.inf
        minTree=None
        while minError>0.01:
            for axis in range(M):
                min=np.min(np.array(dataSet)[:,axis])
                max=np.max(np.array(dataSet)[:,axis])
                for threshold in np.arange(min,max,L):
                    tree = SingleDecisionTree(axis=axis,threshold=threshold,flag=True)
                    em=self.calcEm(D,tree,dataSet,labels)
                    if (minError>em):
                        minError=em
                        minTree =tree
                    tree = SingleDecisionTree(axis=axis,threshold=threshold,flag=False)
                    em=self.calcEm(D,tree,dataSet,labels)
                    if (minError>em):
                        minError=em
                        minTree =tree
            alpha=(0.5)*np.log((1-minError)/float(minError))
            print('alpha'+str(alpha))
            self.funList.append((alpha,minTree))
            yi=np.array(labels).reshape(-1,1)
            gm=np.array(minTree.predictArr(dataSet)).reshape(-1,1)
            # -ai * yi *gm
            eta=np.multiply(-alpha* yi,gm)
            Z=sum(np.multiply(D,np.exp(eta)))
            
            D=np.multiply(D,np.exp(eta))/Z
    
    def predict(self,x):
        sum=0
        print(self.funList)
        for fun in self.funList:
            alpha=fun[0]
            tree = fun[1]
            sum+=alpha* tree.predict(x)
        print(sum)
        return 1 if sum>0 else -1
    
    def calcEm(self,D,Gm,dataSet,labels):
        value=[0 if Gm.predict(row) == labels[i] else 1 for (i,row) in enumerate(dataSet)]
        return np.sum(np.multiply(D,np.array(value).reshape(-1,1)))

In [140]:
dataSet = [[0, 1, 3], [0, 3, 1], [1, 2, 2], [1, 1, 3], [1, 2, 3], [0, 1, 2], [1, 1, 2], [1, 1, 1], [1, 3, 1], [0, 2, 1]]    #p153的例子
labels = [-1, -1, -1, -1, -1, -1, 1, 1, -1, -1]
adaboost = Adaboost()
adaboost.train(dataSet, labels)
print(adaboost.predict([1, 3, 2]))

alpha0.6931471805599453
alpha0.7331685343967135
alpha0.7331685343967135
alpha1.0932930895655817
alpha1.0932930895655817
alpha1.149273824723755
alpha1.149273824723755
alpha1.4492075459468585
alpha1.4492075459468585
alpha2.0162052978500324
alpha2.0162052978500324
alpha2.0345539490161575
alpha2.0345539490161575
alpha3.6324422339216196
[(0.6931471805599453, <__main__.SingleDecisionTree object at 0x0000000013C5B860>), (0.7331685343967135, <__main__.SingleDecisionTree object at 0x0000000013CCD5C0>), (0.7331685343967135, <__main__.SingleDecisionTree object at 0x0000000013CCD5C0>), (1.0932930895655817, <__main__.SingleDecisionTree object at 0x0000000013CCD5F8>), (1.0932930895655817, <__main__.SingleDecisionTree object at 0x0000000013CCD5F8>), (1.149273824723755, <__main__.SingleDecisionTree object at 0x0000000013CCDCF8>), (1.149273824723755, <__main__.SingleDecisionTree object at 0x0000000013CCDCF8>), (1.4492075459468585, <__main__.SingleDecisionTree object at 0x0000000013CCD630>), (1.44920754

In [141]:
def pred_score(obj,x,y):
    count=0
    l=len(y)
    for i in range(l):
        if y[i]==obj.predict(x[i]):
            count+=1
    return count/float(l)

In [143]:
adaboost2 = Adaboost()
adaboost2.train(X_train, y_train)
pred_score(adaboost2,X_test,y_test)

alpha0.7752987062055835
alpha1.0270618668477742
alpha1.0270618668477742
alpha1.340510764357147
alpha1.340510764357147
alpha1.5635483429126418
alpha1.5635483429126418
alpha2.0727370005931793
alpha2.0727370005931793
alpha2.850620884119584
[(0.7752987062055835, <__main__.SingleDecisionTree object at 0x0000000013A8C710>), (1.0270618668477742, <__main__.SingleDecisionTree object at 0x0000000013A8C438>), (1.0270618668477742, <__main__.SingleDecisionTree object at 0x0000000013A8C438>), (1.340510764357147, <__main__.SingleDecisionTree object at 0x0000000013A8C668>), (1.340510764357147, <__main__.SingleDecisionTree object at 0x0000000013A8C668>), (1.5635483429126418, <__main__.SingleDecisionTree object at 0x0000000013A8CA90>), (1.5635483429126418, <__main__.SingleDecisionTree object at 0x0000000013A8CA90>), (2.0727370005931793, <__main__.SingleDecisionTree object at 0x0000000013A8C780>), (2.0727370005931793, <__main__.SingleDecisionTree object at 0x0000000013A8C780>), (2.850620884119584, <__mai

1.0