In [1]:
#以CART做弱分类器
from tree import DecisionTreeNode,CARTRegressor,CARTClassifier

import numpy as np
import pandas as pd
from sklearn.preprocessing import OneHotEncoder
import operator
import math

class AdaboostClassfier(CARTClassifier):
    def __init__(self,K):
        self.K=K  #弱分类器个数
        self.alphas=[]  #弱分类器权重
        self.models=[]  #弱分类器
    def fit(self,data):
        data=np.array(data)
        X,y=data[:,:-1],data[:,-1]
        m=len(X)  #样本个数
        #初始化样本权重矩阵D
        D=[1/m]*m
        for k in range(K):
            #训练第k个弱分类器
            
            #计算误差率e
            
            
            #计算弱分类器权重alpha
            alpha=np.log((1-e)/e)/2
            self.alphas.append(alpha)
            
            #更新样本权重
        
    def predict(self,data):
        res=[0]*len(data)
        for k in range(self.K):
            res=[x+self.alphas[k]*y for x,y in zip(res,self.models[k].predict(data))]
        res=[1 if x>0 else -1 for x in res]
        return res
    
class AdaboostRegressor(CARTRegressor):
    def __init__(self,K,min_samples_leaf=None,max_depth=None):
        self.K=K  #弱分类器个数
        self.alphas=[]  #弱分类器权重
        self.models=[]  #弱分类器
        
        self.min_samples_leaf=min_samples_leaf
        self.max_depth=max_depth
        
    def fit(self,data):
        data=np.array(data)
        m=len(data)  #样本个数
        self.models=[0]*self.K
        
        #初始化样本权重D
        D=[1/m]*m
        
        
        for k in range(self.K):
            #训练第k个弱分类器
            self.models[k]=CARTRegressor(min_samples_leaf=self.min_samples_leaf,max_depth=self.max_depth)
            self.models[k].fit(data,D)
            
            e_list=[]   #训练集误差
            leaf_node_list=self.models[k].root.print_leaf_node()
            for value,index_list in leaf_node_list:
                for index in index_list:
                    e_list.append((index,data[index][-1]-value))
            e_list=sorted(e_list,key=operator.itemgetter(0))
            e_list=[x[1] for x in e_list]
            
            e_max=max(e_list)    #计算训练集上最大误差
            e_list=[x**2/e_max for x in e_list]    #计算每个样本的相对误差,以平方误差为例
            e=sum([x*y for x,y in zip(D,e_list)])   #计算样本误差率
            
            alpha=e/(1-e)   #计算弱分类器权重alpha
            self.alphas.append(alpha)
            
            temp=[x*alpha**(1-y) for x,y in zip(D,e_list)]
            Z=sum(temp)    #规范化因子
            D=[x/Z for x in temp]#更新样本权重
            
    def predict(self,data):
        k_res=sorted(enumerate([math.log(1/x) for x in self.alphas]), key=operator.itemgetter(1))[(self.K+1)//2-1][0]
        model=self.models[k_res]
        return model.predict(data)

In [2]:
datasets = [[1,4.5],
               [2,4.75],
               [3,4.91],
               [4,5.34],
               [5,5.80],
               [6,7.05],
               [7,7.9],
               [8,8.23],
               [9,8.7],
               [10,9.0]]
print('================================adaboost回归结果================================')

model=AdaboostRegressor(K=100,min_samples_leaf=1,max_depth=2)
model.fit(datasets)
print('预测结果：',model.predict([[1.8]]))



[5.0600000000000005]

In [None]:
import numpy as np
np.arange(1,10,2)

In [None]:
a=np.array([1,2,3,8,5])
b=[2,2,2]
a*b

In [None]:
1/3

In [None]:
2*3**3

In [None]:
9//2

In [None]:
import operator
max(enumerate(a), key=operator.itemgetter(0))  

In [None]:
sorted(enumerate(a), key=operator.itemgetter(1))[3][0]

In [None]:
from tree import DecisionTreeNode,CARTRegressor,CARTClassifier
class g(CARTRegressor):
    pass

a=g()
dir(a.fit)

In [4]:
%run tree.py

{'label': '是是', 'feature': '有自己的房子', 'tree': {'否': {'label': '否否', 'feature': '有工作', 'tree': {'否': {'label': '否否', 'feature': None, 'tree': {}}, '是': {'label': '是是', 'feature': None, 'tree': {}}}}, '是': {'label': '是是', 'feature': None, 'tree': {}}}}
预测结果： ['否否']
[('否否', [0, 1, 4, 5, 6, 14]), ('是是', [2, 12, 13]), ('是是', [3, 7, 8, 9, 10, 11])]
{'label': '是是', 'feature': '有自己的房子', 'tree': {'否': {'label': '否否', 'feature': '有工作', 'tree': {'否': {'label': '否否', 'feature': None, 'tree': {}}, '是': {'label': '是是', 'feature': None, 'tree': {}}}}, '是': {'label': '是是', 'feature': None, 'tree': {}}}}
预测结果： ['否否']
[('否否', [0, 1, 4, 5, 6, 14]), ('是是', [2, 12, 13]), ('是是', [3, 7, 8, 9, 10, 11])]
{'label': '是是', 'feature': '有自己的房子', 'tree': {'=是': {'label': '是是', 'feature': None, 'tree': {}}, '≠是': {'label': '否否', 'feature': '有工作', 'tree': {'=是': {'label': '是是', 'feature': None, 'tree': {}}, '≠是': {'label': '否否', 'feature': None, 'tree': {}}}}}}
预测结果： ['否否']
[('是是', [3, 7, 8, 9, 10, 11]), ('是是', [2, 12,

In [None]:
a=[(0,1),(3,2),(1,0)]
a=sorted(a,key=operator.itemgetter(0))


In [None]:
a=[x[0] for x in a]

In [None]:
a