In [None]:
import numpy as np
import pandas as pd
from sklearn.tree import DecisionTreeRegressor
from sklearn.ensemble import GradientBoostingRegressor
import utils
import sklearn
from sklearn import tree
import matplotlib.pyplot as plt

In [None]:
features = np.array([[10],[20],[30],[40],[50],[60],[70],[80]])
labels = np.array([7,5,7,1,2,1,5,4])

In [None]:
plt.scatter(features, labels)
plt.xlabel("Age")
plt.ylabel("Days per week")
plt.show()

# Fitting a decision tree

In [None]:
decision_tree_regressor = DecisionTreeRegressor(max_depth=2)
decision_tree_regressor.fit(features, labels)

In [None]:
utils.display_tree(decision_tree_regressor)

In [None]:
utils.plot_regressor(decision_tree_regressor, features, labels)

# Gradient boosting

In [None]:
# First weak learner
x = np.linspace(0,85,2)
plt.scatter(features, labels)
plt.plot(x, [4 for i in range(len(x))])
plt.scatter(features, labels, color='blue')

In [None]:
gradient_boosting_regressor = GradientBoostingRegressor(max_depth=2, n_estimators=4, learning_rate=0.8)
gradient_boosting_regressor.fit(features, labels)
gradient_boosting_regressor.predict(features)

In [None]:
utils.plot_regressor(gradient_boosting_regressor, features, labels)

In [None]:
predictions_estimators = []
predictions = np.zeros(8)
centered_labels = labels-labels.mean()
residuals = [centered_labels]
for i in range(len(gradient_boosting_regressor.estimators_)):
    weak_learner = gradient_boosting_regressor.estimators_[i][0]
    print("\n"+"*"*50+"\n")
    print("Weak learner", i+1)
    preds = weak_learner.predict(features)
    predictions_estimators.append(preds)
    print("Residuals to predict:", residuals[-1])
    print("Predictions:", preds)
    predictions += preds*0.8
    #plt.scatter(features, predictions)
    #plt.scatter(features, residuals[-1])
    #utils.plot_regressor(tree[0], features, centered_labels)
    plt.scatter(features, centered_labels, color='white')
    utils.plot_regressor(weak_learner, features, residuals[-1])
    plt.show()
    residuals.append(centered_labels-predictions)
    print("New residuals:", residuals[-1])

Predictions of the first i learners

In [None]:
for i in range(1,5):
    print("Up to weak learner number", i)
    gb_intermediate = GradientBoostingRegressor(max_depth=2, n_estimators=i, learning_rate=0.8)
    gb_intermediate.fit(features, labels)
    predictions = gb_intermediate.predict(features)
    utils.plot_regressor(gb_intermediate, features, labels)

In [None]:
for tree in gradient_boosting_regressor.estimators_:
    sklearn.tree.plot_tree(tree[0], rounded=True)
    plt.show()

In [None]:
utils.display_tree(gradient_boosting_regressor.estimators_[0][0])

In [None]:
utils.display_tree(gradient_boosting_regressor.estimators_[1][0])

In [None]:
utils.display_tree(gradient_boosting_regressor.estimators_[2][0])

In [None]:
utils.display_tree(gradient_boosting_regressor.estimators_[3][0])

# XGBoost

In [None]:
import xgboost
from xgboost import XGBRegressor
xgboost_regressor = XGBRegressor(random_state=0,
                             n_estimators=3,
                             max_depth=2,
                             reg_lambda=0,
                             min_split_loss=1,
                             learning_rate=0.7)
xgboost_regressor.fit(features, labels)
xgboost_regressor.score(features, labels)

In [None]:
utils.plot_regressor(xgboost_regressor, features, labels)

In [None]:
xgboost.to_graphviz(xgboost_regressor, num_trees=0)

In [None]:
xgboost.to_graphviz(xgboost_regressor, num_trees=1)

In [None]:
xgboost.to_graphviz(xgboost_regressor, num_trees=2)

In [None]:
xgboost_regressor.predict(features)

# Calculations of similarity score

In [None]:
residuals = labels-0.5
residuals

In [None]:
def score(l, lam=0):
    if len(l)==0:
        return 0
    return sum(l)**2/(len(l)+lam)

In [None]:
score(residuals, lam=0)

In [None]:
lam = 0
for i in range(len(residuals)):
    left = residuals[:i]
    right = residuals[i:]
    print(left, right)
    print(score(left), score(right))
    print(score(left, lam)+score(right, lam))
    print()

In [None]:
left_tree = [6.5, 4.5, 6.5]
right_tree = [0.5, 1.5, 0.5, 4.5, 3.5]

In [None]:
residuals = left_tree
print(score(residuals))
for i in range(len(residuals)):
    left = residuals[:i]
    right = residuals[i:]
    print(left, right)
    print(score(left), score(right))
    print(score(left, lam)+score(right, lam))
    print()

In [None]:
residuals = right_tree
print(residuals)
for i in range(len(residuals)):
    left = residuals[:i]
    right = residuals[i:]
    print(left, right)
    print(score(left), score(right))
    print(score(left, lam)+score(right, lam))
    print()