# Seminar 6: Boosting
Course: MA06018, Machine Learning by professor Evgeny Burnaev <br\>
Author: Evgenii Egorov

In [None]:
from PIL import Image
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
%matplotlib inline

In [None]:
cat_kit = np.asarray(Image.open('cat.png')).mean(axis=2)
plt.figure(figsize=[10, 5]);
plt.imshow(cat_kit, cmap='gray');

In [None]:
from sklearn.model_selection import train_test_split

def make_train_test(image, train_size_fraction):
    h, w = image.shape
    flat_image = image.reshape(-1)
    
    x_1 = np.arange(len(flat_image)) % w
    x_2 = np.arange(len(flat_image)) // w    
    data = np.array([x_1, x_2]).T 
    
    target = flat_image
    X_train, X_test, target_train, target_test = train_test_split(data, target, train_size=train_size_fraction,\
                                                                  test_size = 1 - train_size_fraction,\
                                                                  random_state=1011)
    
    return X_train, X_test, target_train, target_test, data


def plot_prediction(original, prediction):
    plt.figure(figsize=[20, 10]);
    plt.subplot(121);
    plt.imshow(original, cmap='gray');
    plt.subplot(122);
    plt.imshow((prediction).reshape(*original.shape), cmap='gray')

In [None]:
X_train, X_test, target_train, target_test, X = make_train_test(cat_kit, train_size_fraction=0.007)

In [None]:
from sklearn.linear_model import LinearRegression as LR

In [None]:
regressor = LR()
regressor.fit(X_train, target_train)

In [None]:
prediction = regressor.predict(X)

In [None]:
plot_prediction(original=cat_kit, prediction=prediction)

# Task 1

Make same prediction with 
* DecisionTreeRegressor. Tune "max_depth" with crossvalidation at the set [3; 5; 10; 15]
* RandomForestRegressor. Use n_estimators=100. Don't forget to set the n_jobs param, to use all your laptlop power


Insert your MSE losses on https://goo.gl/forms/2wzmWffMXCnVnrFC2

In [None]:
from sklearn.tree import DecisionTreeRegressor as DTR
from sklearn.model_selection import GridSearchCV

In [None]:
parameters = # your code
tree = # your code
regressor = # your code

regressor.fit(X_train, target_train)
prediction = regressor.predict(X)
plot_prediction(original=cat_kit, prediction=prediction)

In [None]:
from sklearn.ensemble import RandomForestRegressor as RFR

In [None]:
# your code

Wow! Only with 0.7% train size trees are such powerfull! Well, the reason is more at the image structure. So, it is why CNN is so popylar. Thanks to the nature. For details: <a href='https://arxiv.org/abs/1608.08225'>Why does deep and cheap learning work so well?</a>

# Garadient boosting in nuts and bolts

Minimization of loss function is optimization task. So, "gradient boosting" it is one of the possible ways to make greedy optimization.

$$
\begin{aligned}
& b_n(x) := \text{a base algorithm from family of the algorithms $\mathcal{A}$} \\
& \gamma_n(x) := \text{scale or a weight of a base algorithm} \\
& a_N(x) = \sum\limits_{n=0}^{N}\gamma_n b_n(x) := \text{target composition}
\end{aligned}
$$


### Algorithm for regression with some loss function $L(y, z)$ and the dataset $(x_i, y_i)_{i=1}^l$

1. Init $b_0(x) = \text{mean}(y_{train})$, $\gamma_0 = 1$
2. For n in N:
    * Subporblem: $\sum\limits_{i=1}^{l}L\left(y_i, a_{n-1}(x_i) + \gamma_n b_n(x_i)\right) \to \min\limits_{b_{n}, \gamma_n}$  
    * Solution of the subproblem:
    $$
    \begin{aligned}
    & s_i = - \frac{\partial}{\partial z} L|_{z = a_{n-1}(x_i)} \\
    & b_n(x) = \arg\min\limits_{b\in\mathcal{A}}\sum\limits_{i=1}^{l}(b(x_i) - s_i)^2 \\
    & \gamma_N = \text{line-search or some-how selection}
    \end{aligned}
    $$
return $a_N(x) = \sum\limits_{n=0}^{N}\gamma_n b_n(x)$

# Task 2

Write MSE loss function and its gradient

In [None]:
def loss_mse_grad(y, z):
    # your code

def loss_mse(y, z):
   # your code

# Task 3

Inplement boosting
Insert your MSE losses on https://goo.gl/forms/nUH4YaKOPpZwG8sB3

In [None]:
from copy import deepcopy
from tqdm import tqdm
from scipy.optimize import golden
from IPython import display


def naive_boosting_predict(list_models, list_weights, point):
    prediction = 0
    for k in range(len(list_models)):
        gamma = list_weights[k]
        b = list_models[k]
        if k > 0:
            # your code
        else:
           # your code
    return prediction


def naive_boosting_fit(N, X_train, y_train, regressor, loss_grad, loss, verbose=False, X_all=None, y_all=None):
    b_0 = np.mean(y_train)
    gamma_0 = 0.5
    
    list_models = []
    list_weights = []
    
    list_models.append(b_0)
    list_weights.append(gamma_0)
    fig = plt.figure() # for plotting
        
    for n in range(N):
        # your code
        
        gamma = golden(weight_func)
        
        
        # for imaging during training
        if verbose and n % 2 == 0:
            all_predict = naive_boosting_predict(list_models, list_weights, X_all)
            plt.ion()
            plt.imshow((all_predict).reshape(*y_all.shape), cmap='gray');
            plt.pause(0.05);
            fig.canvas.draw()
        #    
            
        list_models.append(deepcopy(regressor))
        list_weights.append(gamma)
    
    return list_models, list_weights

In [None]:
regressor_tree = # your code
models, weights = # your code
prediction = # your code

In [None]:
plot_prediction(original=cat_kit, prediction=prediction)

In [None]:
regressor_tree = # your code
models, weights = # your code
prediction = # your code

In [None]:
plot_prediction(original=cat_kit, prediction=prediction)