# resources

https://en.wikipedia.org/wiki/Decision_tree - name of that model says everything that you need to understand what is it about

https://en.wikipedia.org/wiki/Ensemble_learning - usually we don't use one tree but many

https://en.wikipedia.org/wiki/Boosting_(machine_learning) - type of ensemble learning, we will discuss it today

http://xgboost.readthedocs.io/en/latest/ - xgboost documentation, read it before

# imports 

In [None]:
import xgboost as xgb

import matplotlib.pyplot as plt
import numpy as np
import pandas as pd

from sklearn.model_selection import train_test_split
from ipywidgets import interact
from functools import lru_cache

In [None]:
FIGSIZE = (15, 8)

def ground_truth(x):
    return x * np.sin(x) + 5 * np.sin(2 * x) + 2 * np.sin(3 * x)

def gen_data(n_samples=200):
    np.random.seed(15)
    X = np.random.uniform(0, 10, size=n_samples).reshape(-1, 1)
    y = ground_truth(X.reshape(-1)) + np.random.normal(scale=2, size=n_samples)
    
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=3)
    return X_train, X_test, y_train, y_test

X_train, X_test, y_train, y_test = gen_data(100)


x_plot = np.linspace(0, 10, 500)

def plot_data(alpha=0.4, s=20):
    fig = plt.figure(figsize=FIGSIZE)
    gt = plt.plot(x_plot, ground_truth(x_plot), alpha=alpha, label='ground truth')

    plt.scatter(X_train, y_train, s=s, alpha=alpha)
    plt.scatter(X_test, y_test, s=s, alpha=alpha, color='red')
    plt.xlim((0, 10))
    plt.ylim((-15, 20))
    plt.ylabel('y')
    plt.xlabel('x')
    
plot_data()
plt.show()

In [None]:
@lru_cache(maxsize=None)
def fib(n):
    if n < 2:
        return n
    return fib(n-1) + fib(n-2)

@lru_cache(maxsize=None)
def train_xgb(params):
    params = dict(list(params))
    return xgb.XGBRegressor(**params).fit(X_train, y_train)

In [None]:
xgb.XGBClassifier

In [None]:
interact_params = {
    'n_estimators': [fib(x) for x in range(2, 20)],
    'max_depth': [fib(x) for x in range(2, 20)],
    'gamma': np.linspace(0, 1, 10),
    'subsample': np.linspace(1, 0.5, 8),
    'learning_rate': np.linspace(0.1, 2, 10),
    'reg_alpha': np.linspace(0, 1, 10),
    'reg_lambda': np.linspace(1, 0, 10),
    'min_child_weight': np.linspace(1, 10, 9)
}

In [None]:
def pretty_format_params(params):
    def _single_pretty_format(param):
        
        return f'{param:.2f}'
    return str({k: _single_pretty_format(v) for k, v in params.items()})

In [None]:
@interact(**interact_params)
def plot(**params):
    plot_data()
    est = train_xgb(tuple(params.items()))
    plt.plot(
        x_plot,
        est.predict(x_plot[:, np.newaxis]),
        label=pretty_format_params(params),
        color='g',
        alpha=0.9,
        linewidth=3
    )
    plt.legend(loc='upper left')
    plt.show()