# Clash of the Regressors

In [1]:
from sklearn.linear_model import LinearRegression
from sklearn.tree import DecisionTreeRegressor

from sklearn.ensemble import BaggingRegressor
from sklearn.ensemble import AdaBoostRegressor
from sklearn.ensemble import GradientBoostingRegressor

from xgboost import XGBRegressor
from lightgbm import LGBMRegressor

from sklearn.ensemble import VotingRegressor
from sklearn.ensemble import StackingRegressor

## Model Selection and scoring imports

In [2]:
import plotly.express as px
import plotly.graph_objects as go
import cufflinks as cf

import pandas as pd
import numpy as np

from sklearn.datasets import make_regression
from sklearn.model_selection import RepeatedKFold
from sklearn.model_selection import cross_val_score
from sklearn.model_selection import train_test_split
from sklearn.model_selection import GridSearchCV
from sklearn.metrics import mean_squared_error as MSE


random_state = 42

## Non linear Training Data

In [7]:
m=1
c=200
noise = np.random.random(1001) - 0.5
x = np.linspace(0,10, 1001)
y = np.sin(2*x) + noise + c
y_true = np.sin(2*x) + c
X = x.reshape(-1,1)

In [34]:
fig1 = go.Figure()
fig1.add_trace(
    go.Scatter(x=x, y = y, opacity=0.2, mode='markers',  marker=go.scatter.Marker(color='green', size=5), name='Input')
)
fig1.add_trace(
    go.Scatter(x=x, y = y_true, opacity=0.5,  marker=go.scatter.Marker(color='black', size=1), name='True')
)

fig1.update_layout(height=700, legend=dict(
    orientation="h",
    yanchor="bottom",
    y=1.02,
    xanchor="right",
    x=1
))

In [9]:
X_train, X_test, y_train, y_test = train_test_split(X,y, test_size=0.2, random_state=42)
print(X.shape, y.shape, X_train.shape, y_train.shape, X_test.shape, y_test.shape)
cv = RepeatedKFold(n_splits=10, n_repeats=3, random_state=1)

(1001, 1) (1001,) (800, 1) (800,) (201, 1) (201,)


### Linear Regression

In [None]:
linear_r = LinearRegression()

### Decision Tree Regressor

In [10]:
dtr = DecisionTreeRegressor(random_state=random_state)
params_dtr = {
    'criterion' : ['mse', 'friedman_mse', 'mae', 'poisson']
}
dtr_gridcv = GridSearchCV(dtr, param_grid=params_dtr, cv=cv, n_jobs=-1, verbose=3)
dtr_gridcv.fit(X_train,y_train)

Fitting 30 folds for each of 4 candidates, totalling 120 fits


[Parallel(n_jobs=-1)]: Using backend LokyBackend with 8 concurrent workers.
[Parallel(n_jobs=-1)]: Done  16 tasks      | elapsed:    1.1s
[Parallel(n_jobs=-1)]: Done 120 out of 120 | elapsed:    1.3s finished


GridSearchCV(cv=RepeatedKFold(n_repeats=3, n_splits=10, random_state=1),
             estimator=DecisionTreeRegressor(random_state=42), n_jobs=-1,
             param_grid={'criterion': ['mse', 'friedman_mse', 'mae',
                                       'poisson']},
             verbose=3)

### Bagging Regressors

In [11]:
bg = BaggingRegressor()
params_bg = {
    'n_estimators' : [200, 500, 750, 1000],
    'base_estimator': [
                       DecisionTreeRegressor(random_state=random_state, max_depth=3)
                       ],
    'max_samples': [0.5, 0.7, 0.9]
    }
bagging_gridcv = GridSearchCV(estimator=bg, param_grid=params_bg, return_train_score=True, cv=cv, n_jobs =-1, verbose=3)
bagging_gridcv.fit(X_train,y_train)
print(f'Best Params for BaggingRegressor: {bagging_gridcv.best_params_}')

Fitting 30 folds for each of 12 candidates, totalling 360 fits


[Parallel(n_jobs=-1)]: Using backend LokyBackend with 8 concurrent workers.
[Parallel(n_jobs=-1)]: Done  16 tasks      | elapsed:    1.4s
[Parallel(n_jobs=-1)]: Done 112 tasks      | elapsed:   28.4s
[Parallel(n_jobs=-1)]: Done 272 tasks      | elapsed:  1.1min
[Parallel(n_jobs=-1)]: Done 360 out of 360 | elapsed:  1.5min finished


Best Params for BaggingRegressor: {'base_estimator': DecisionTreeRegressor(max_depth=3, random_state=42), 'max_samples': 0.5, 'n_estimators': 750}


### Adaboost Regressor

In [12]:
ada = AdaBoostRegressor(random_state=random_state)
params_ada = {
    'learning_rate' : np.linspace(0.1,1,10),
    'n_estimators' : [100,200,500,700,1000],
    'loss' : ['linear', 'square']
}
ada_gridcv = GridSearchCV(ada, param_grid=params_ada, cv=cv, n_jobs=-1, verbose=3)
ada_gridcv.fit(X_train, y_train)

Fitting 30 folds for each of 100 candidates, totalling 3000 fits


[Parallel(n_jobs=-1)]: Using backend LokyBackend with 8 concurrent workers.
[Parallel(n_jobs=-1)]: Done  16 tasks      | elapsed:    0.7s
[Parallel(n_jobs=-1)]: Done 112 tasks      | elapsed:   11.6s
[Parallel(n_jobs=-1)]: Done 272 tasks      | elapsed:   34.0s
[Parallel(n_jobs=-1)]: Done 496 tasks      | elapsed:  1.0min
[Parallel(n_jobs=-1)]: Done 784 tasks      | elapsed:  1.6min
[Parallel(n_jobs=-1)]: Done 1136 tasks      | elapsed:  2.4min
[Parallel(n_jobs=-1)]: Done 1552 tasks      | elapsed:  3.2min
[Parallel(n_jobs=-1)]: Done 2032 tasks      | elapsed:  4.1min
[Parallel(n_jobs=-1)]: Done 2576 tasks      | elapsed:  4.8min
[Parallel(n_jobs=-1)]: Done 3000 out of 3000 | elapsed:  5.5min finished


GridSearchCV(cv=RepeatedKFold(n_repeats=3, n_splits=10, random_state=1),
             estimator=AdaBoostRegressor(random_state=42), n_jobs=-1,
             param_grid={'learning_rate': array([0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1. ]),
                         'loss': ['linear', 'square'],
                         'n_estimators': [100, 200, 500, 700, 1000]},
             verbose=3)

In [13]:
print(f'The Best Parameters for AdaBoostRegressor: {ada_gridcv.best_params_}')

The Best Parameters for AdaBoostRegressor: {'learning_rate': 1.0, 'loss': 'square', 'n_estimators': 100}


### XGBoostREgressor

In [14]:
xgr = XGBRegressor()
params_xgr= {
    'n_estimators' : [100, 500, 750, 1000],
    'objective' : ['reg:squarederror'],
    'learning_rate': [0.1, 0.3, 0.5, 0.7, 0.9]
}
xgr_gridcv = GridSearchCV(xgr, param_grid=params_xgr, cv=cv, n_jobs=-1, verbose=3)
xgr_gridcv.fit(X_train, y_train)
print(f'The Best Parameters for XGBRegressor: {xgr_gridcv.best_params_}')

Fitting 30 folds for each of 20 candidates, totalling 600 fits


[Parallel(n_jobs=-1)]: Using backend LokyBackend with 8 concurrent workers.
[Parallel(n_jobs=-1)]: Done  16 tasks      | elapsed:    1.8s
[Parallel(n_jobs=-1)]: Done 176 tasks      | elapsed:    6.5s
[Parallel(n_jobs=-1)]: Done 496 tasks      | elapsed:   17.1s


The Best Parameters for XGBRegressor: {'learning_rate': 0.1, 'n_estimators': 100, 'objective': 'reg:squarederror'}


[Parallel(n_jobs=-1)]: Done 600 out of 600 | elapsed:   20.8s finished


### LightGBM Regressor

In [15]:
lgr = LGBMRegressor()
params_lgr = {
    'learning_rate' : [0.01, 0.05, 0.1, 0.3, 0.5, 0.7],
    'n_estimators' : [100, 500, 700]
}
lgr_gridcv = GridSearchCV(lgr, param_grid=params_lgr, cv=cv, n_jobs=-1, verbose=3)
lgr_gridcv.fit(X_train, y_train)
print(f'The Best Parameters for LGBMRegressor: {lgr_gridcv.best_params_}')

Fitting 30 folds for each of 18 candidates, totalling 540 fits


[Parallel(n_jobs=-1)]: Using backend LokyBackend with 8 concurrent workers.
[Parallel(n_jobs=-1)]: Done  16 tasks      | elapsed:    0.3s
[Parallel(n_jobs=-1)]: Done 208 tasks      | elapsed:    8.2s
[Parallel(n_jobs=-1)]: Done 540 out of 540 | elapsed:   24.2s finished


The Best Parameters for LGBMRegressor: {'learning_rate': 0.01, 'n_estimators': 500}


### Models for Stacking and Voting

In [16]:
models_list = [
  (
     'BaggingRegressor', BaggingRegressor(
      n_estimators = bagging_gridcv.best_params_['n_estimators'],
      base_estimator = DecisionTreeRegressor(random_state=random_state, max_depth=3),
      max_samples = bagging_gridcv.best_params_['max_samples']
    )
  ),
  (
    'AdaBoostRegressor', AdaBoostRegressor(
    learning_rate = ada_gridcv.best_params_['learning_rate'], 
    n_estimators = ada_gridcv.best_params_['n_estimators'],
    loss = ada_gridcv.best_params_['loss']
    )
  ),
  (
    'XGBRegressor', XGBRegressor(
    n_estimators = xgr_gridcv.best_params_['n_estimators'],
    objective = xgr_gridcv.best_params_['objective'],
    learning_rate = xgr_gridcv.best_params_['learning_rate']
    )
  ),
  (
    'LGBMRegressor', LGBMRegressor(
    learning_rate = lgr_gridcv.best_params_['learning_rate'],
    n_estimators = lgr_gridcv.best_params_['n_estimators']
  ))
]

### Voting Regressor

In [17]:
voting_regressor = VotingRegressor(estimators = models_list)
voting_regressor.fit(X_train, y_train)

VotingRegressor(estimators=[('BaggingRegressor',
                             BaggingRegressor(base_estimator=DecisionTreeRegressor(max_depth=3,
                                                                                   random_state=42),
                                              max_samples=0.5,
                                              n_estimators=750)),
                            ('AdaBoostRegressor',
                             AdaBoostRegressor(loss='square',
                                               n_estimators=100)),
                            ('XGBRegressor',
                             XGBRegressor(objective='reg:squarederror')),
                            ('LGBMRegressor',
                             LGBMRegressor(learning_rate=0.01,
                                           n_estimators=500))])

### Stacking Regressor

In [18]:
stacking_regressor = StackingRegressor(estimators = models_list, final_estimator= LinearRegression(), cv=5)
stacking_regressor.fit(X_train, y_train)

StackingRegressor(cv=5,
                  estimators=[('BaggingRegressor',
                               BaggingRegressor(base_estimator=DecisionTreeRegressor(max_depth=3,
                                                                                     random_state=42),
                                                max_samples=0.5,
                                                n_estimators=750)),
                              ('AdaBoostRegressor',
                               AdaBoostRegressor(loss='square',
                                                 n_estimators=100)),
                              ('XGBRegressor',
                               XGBRegressor(objective='reg:squarederror')),
                              ('LGBMRegressor',
                               LGBMRegressor(learning_rate=0.01,
                                             n_estimators=500))],
                  final_estimator=LinearRegression())

### Cross Validation of Bagging Regressors

In [None]:
fig = go.Figure()

fig.add_trace(
    go.Scatter(x=x, y = y, opacity=0.2, mode='markers',  marker=go.scatter.Marker(color='green', size=5), name='Input')
)
fig.add_trace(
    go.Scatter(x=x, y = y_true, opacity=0.5,  marker=go.scatter.Marker(color='black', size=1), name='True')
)
fig.add_trace(
    go.Scatter(x=x, y = dtr_gridcv.best_estimator_.predict(X), opacity=0.5,  mode='markers',marker=go.scatter.Marker(color='red', size=2), name='DecisionTree')
)
fig.add_trace(
    go.Scatter(x=x, y = bagging_gridcv.best_estimator_.predict(X), opacity=1,marker=go.scatter.Marker(color='yellow', size=2), name='BaggingRegressor')
)

fig.add_trace(
    go.Scatter(x=x, y = ada_gridcv.best_estimator_.predict(X), opacity=1, marker=go.scatter.Marker(color='pink', size=2), name='AdaBoostRegressor')
)

fig.add_trace(
    go.Scatter(x=x, y = xgr_gridcv.best_estimator_.predict(X), opacity=1, marker=go.scatter.Marker(color='red', size=2), name='XGBRegressor')
)

fig.add_trace(
    go.Scatter(x=x, y = lgr_gridcv.best_estimator_.predict(X), opacity=1, marker=go.scatter.Marker(color='green', size=2), name='LGBMRegressor')
)

fig.add_trace(
    go.Scatter(x=x, y = voting_regressor.predict(X), opacity=1, marker=go.scatter.Marker(color='black', size=2), name='VotingRegressor')
)

fig.add_trace(
    go.Scatter(x=x, y = stacking_regressor.predict(X), opacity=1, marker=go.scatter.Marker(color='blue', size=2), name='StackingRegressor')
)

fig.update_layout(height=700, legend=dict(
    orientation="h",
    yanchor="bottom",
    y=1.02,
    xanchor="right",
    x=1
))

In [20]:
models_cv = [
  (
     'BaggingRegressor', BaggingRegressor(
      n_estimators = bagging_gridcv.best_params_['n_estimators'],
      base_estimator = DecisionTreeRegressor(random_state=random_state, max_depth=3),
      max_samples = bagging_gridcv.best_params_['max_samples']
    )
  ),
  (
    'AdaBoostRegressor', AdaBoostRegressor(
    learning_rate = ada_gridcv.best_params_['learning_rate'], 
    n_estimators = ada_gridcv.best_params_['n_estimators'],
    loss = ada_gridcv.best_params_['loss']
    )
  ),
  (
    'XGBRegressor', XGBRegressor(
    n_estimators = xgr_gridcv.best_params_['n_estimators'],
    objective = xgr_gridcv.best_params_['objective'],
    learning_rate = xgr_gridcv.best_params_['learning_rate']
    )
  ),
  (
    'LGBMRegressor', LGBMRegressor(
    learning_rate = lgr_gridcv.best_params_['learning_rate'],
    n_estimators = lgr_gridcv.best_params_['n_estimators']
    )
  ),
  (
    'VotingRegressor', VotingRegressor(estimators=models_list
    )
  ),
  (
    'StackingRegressor', StackingRegressor(estimators = models_list, final_estimator= LinearRegression(), cv=5
    )
  )

]

In [21]:
df_scores_regression = pd.DataFrame()
accuracy_scores_regression = []

for name, model in models_cv:
    model_scores = cross_val_score(model, X_train,y_train, scoring='neg_mean_absolute_error', cv=cv, n_jobs=-1)
    model.fit(X_train,y_train)
    accuracy_scores_regression.append(MSE(y_test, model.predict(X_test))**(1/2))
    df_scores_regression[name] = model_scores

df_scores_regression.iplot(kind='box', asFigure=True, showlegend=False, yTitle='-|Abs Error|', theme='ggplot', title='Regression: -|Abs Error|')

In [22]:
df_scores_regression.describe()

Unnamed: 0,BaggingRegressor,AdaBoostRegressor,XGBRegressor,LGBMRegressor,VotingRegressor,StackingRegressor
count,30.0,30.0,30.0,30.0,30.0,30.0
mean,-0.440021,-0.287146,-0.257991,-0.252357,-0.264312,-0.251308
std,0.034777,0.040536,0.018623,0.016247,0.018269,0.01633
min,-0.499083,-0.393617,-0.292813,-0.289165,-0.300215,-0.284214
25%,-0.466769,-0.311848,-0.271589,-0.263945,-0.278054,-0.265709
50%,-0.446588,-0.277474,-0.259555,-0.25139,-0.265456,-0.248367
75%,-0.414134,-0.258398,-0.242866,-0.240641,-0.255304,-0.237861
max,-0.362842,-0.2279,-0.230089,-0.221984,-0.217203,-0.222595


In [24]:
regression_models = [model_name for model_name, model in models_cv]
fig = px.bar(x=regression_models, y = accuracy_scores_regression, 
        title='Stacking Learners: Accuracy scores', text= accuracy_scores_regression)
fig.update_layout( yaxis_title="RMSE")