In [1]:
import numpy as np
import pandas as pd
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import mean_squared_error, log_loss
from sklearn.metrics import accuracy_score
from sklearn.metrics import roc_auc_score
import matplotlib.pyplot as pt
import seaborn as sns
from sklearn.model_selection import train_test_split

In [3]:
from sklearn.tree import DecisionTreeRegressor

def loss_calc(y_true,y_pred):
    loss = (1/len(y_true))*0.5*np.sum(np.square(y_true - y_pred))
    return loss

def gradient_calc(y_true,y_pred):
    grad = -(y_true - y_pred)
    return grad

def tree_creator(r_state,X,y):
    d_tree = DecisionTreeRegressor(random_state = r_state,criterion ='mse',
                                  max_depth =2, min_samples_split =5,
                                  min_samples_leaf = 5, max_features = 3)
    d_tree.fit(X,y)
    return d_tree

def predict_grad_boost(models_tray,alpha,test_x, train_y):
    initial_pred = np.array([np.mean(train_y)])
    
    final_pred = initial_pred.reshape(-1,1)
    
    for i in range(len(models_tray)):
        model = models_tray[i]
        temp_pred = (model.predict(test_x)).reshape(-1,1)
        # final_pred -= alpha*temp_pred
        final_pred += alpha*temp_pred
    
    return final_pred

In [None]:
def grad_boost_train(train_x,train_y,alpha = 0.01, r_state = 100, n_iters = 101):
    model_tray = []
    loss_counter =[]
    initial_pred = np.array([np.mean(train_y)])*len(train_y)
    
    model_pred = initial_pred.reshape(-1,1)
    
    for epoch in range(n_iters):
        if epoch%100==0:
            print('#---------- Epoch number :',epoch,' -----------#')
        loss = loss_calc(y_true =  train_y,
                        y_pred = model_pred)
        loss_counter.append(loss)
        
        grads = gradient_calc(y_true = train_y,
                             y_pred = model_pred)
        
        tree_grad = tree_creator(r_state = r_state,
                                X= train_x,
                                y = -grads) # y = grads
        pred_m = (tree_grad.predict(train_x)).reshape(-1,1)
        model_pred += alpha*pred_m # model_pred -= alpha*pred_m
        model_tray.append(tree_grad)
    
    return model_tray,loss_counter, initial_pred

In [None]:
class GradientBoosting(object):
    
    def __init__(self,n_estimators, learning_rate, min_samples_split,
                min_impurity, max_depth,regression):
        self.n_estimators = n_estimators
        self.learning_rate = learning_rate
        self.min_samples_split = min_samples_split
        self.min_impurity = min_impurity
        self.max_depth = max_depth
        self.regression = regression
        
        self.loss = mean_square_loss()
        
        if not self.regression:
            self.loss = log_loss()
            
        self.trees = []
        for _ in range(n_estimators):
            tree = DecisionTreeRegressor(criterion ='mse',
                                  max_depth =self.max_depth , min_samples_split =self.min_samples_split,
                                  min_impurity_decrease= self.min_impurity, max_features = 3)
            self.trees.append(tree)
            
    def fit(self,X,y):
        y_pred = np.full(np.shape(y),np.mean(y,axis = 0))
        for i in range(self.n_estimators):
            gradient = self.loss

In [4]:
?mean_squared_error

[1;31mSignature:[0m
[0mmean_squared_error[0m[1;33m([0m[1;33m
[0m    [0my_true[0m[1;33m,[0m[1;33m
[0m    [0my_pred[0m[1;33m,[0m[1;33m
[0m    [1;33m*[0m[1;33m,[0m[1;33m
[0m    [0msample_weight[0m[1;33m=[0m[1;32mNone[0m[1;33m,[0m[1;33m
[0m    [0mmultioutput[0m[1;33m=[0m[1;34m'uniform_average'[0m[1;33m,[0m[1;33m
[0m    [0msquared[0m[1;33m=[0m[1;32mTrue[0m[1;33m,[0m[1;33m
[0m[1;33m)[0m[1;33m[0m[1;33m[0m[0m
[1;31mDocstring:[0m
Mean squared error regression loss.

Read more in the :ref:`User Guide <mean_squared_error>`.

Parameters
----------
y_true : array-like of shape (n_samples,) or (n_samples, n_outputs)
    Ground truth (correct) target values.

y_pred : array-like of shape (n_samples,) or (n_samples, n_outputs)
    Estimated target values.

sample_weight : array-like of shape (n_samples,), default=None
    Sample weights.

multioutput : {'raw_values', 'uniform_average'} or array-like of shape             (n_outputs,), def