# Ensemble Learning - GMB 

### 수도 알고리즘은 강의 자료 참고 

1. Initialize $f_0(x)$ = $argmin_\gamma \sum_{i=1}^N L(y_i, \gamma)$ 
2. For m = 1 to M 
- for i = 1, ... N compute 
> $g_{im} = [\frac{\partial L(y_i, f(x_i))}{\partial f(x_i)}]_{f(x_i) = f_{m-1}(x_i)}$ 
- Fit a regression tree to the targets $g_{im}$ giving terminal regions $R_{jm}, j=1, ... , J_m$ 
- For j=1, ..., $J_m$ compute 
> $\gamma_jm$ = $argmin_\gamma \sum_{x_i \in R_{jm}} L(y_i, f_{m-1}(x_i) + \gamma)$
- update $f_m(x) = f_{m-1}(x) + \sum_{j=1}^{J_m} \gamma_{jm} I(x \in R_{jm})$ 
3. Output $\hat f(x) = f_M(x)$ 



**구현해야 하는 것**
- f(x) : 약한 학습기. Stump tree 적용 
- loss 함수 : OLS 적용



In [None]:
import numpy as np
import pandas as pd
import random as rand

from sklearn.datasets import load_iris
X = load_iris()['data'][:100]

# y의 값을 +1, -1 둘 중 하나로 변경 
y = load_iris()["target"][:100]
y[:50] = -1
y= y.reshape(-1,1)
S = np.concatenate((X,y), axis=1)

import matplotlib.pyplot as plt
import scipy as sc
from scipy.stats import norm
from sys import maxsize

In [None]:
# stump_tree 함수 구현 
def stump_tree(data) : 
    chose_var = data[np.random.choice(range(len(data)))]
    chose_att = np.random.choice(range(np.shape(data)[1]-1))
    crit = chose_var[chose_att]
    
    left = [] 
    right = [] 
    result = np.zeros(len(data))
    for index in range(len(data)) : 
        if data[index][chose_att] > crit : right.append(index)
        else : left.append(index)
    
    right_result = [1 if data[right][i,-1] == 1 else 0 for i in range(len(right)) ] 
    left_result = [1 if data[left][i,-1] == -1 else 0 for i in range(len(left)) ]
    if np.sum(right_result) + np.sum(left_result) > len(data)/2 : 
        result[right] = 1 
        result[left] = -1 
        direction = "right" 
    else : 
        result[right] = -1 
        result[left] = 1 
        direction = "left"
    return result, chose_att, crit, direction 

def cal_stump_tree(vector, chose_att, crit, direction) :
    if vector[chose_att] > crit :
        if direction == "right":  return 1
        else : return -1 
        
    else : 
        if direction == "right" : return -1 
        else : return 1 

In [None]:
#ht 를 수식으로 남기는 방법이 애매함. 

def adaboost(T,data, new_vector) : 
    D_list = [] 
    D = np.ones(len(data)) / len(data) 
    D_list.append(D)
    
    alpha_list = []
    h_list = [] 
    
    for t in range(T) :
        Z = 0 
        
        # 아래처럼 Random Choice로 샘플을 골라내는 게 맞을까? 
        new_index = np.random.choice(range(len(data)), len(data), p=D)
        new_data = data[new_index]
        epsil = epsilon(new_data)
        
        h, chose_att, crit, direction = stump_tree(new_data)
        h_list.append([chose_att, crit, direction])
        a = alpha(epsil)
        alpha_list.append(a)
        new_D = [] 
        for i in range(len(new_data)) : 
            new_D_value =  D[i]* np.exp(-a * new_data[i][-1] * h[i])
            Z += new_D_value
            new_D.append(new_D_value)
        
        D = np.array(new_D)/Z
    
    result = 0 
    for t in range(T):
        chose_att, crit, direction = h_list[t]
        result += alpha_list[t] * cal_stump_tree(new_vector, chose_att, crit, direction)
    
    if result > 0 : return 1
    else : return -1 
