# Predictive Modeling: Boston Housing Dataset

First import the essential modules:

In [108]:
# essentials
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

# preprocessing
from sklearn.preprocessing import StandardScaler
from sklearn.preprocessing import PolynomialFeatures

# model selection
from sklearn.model_selection import train_test_split, GridSearchCV, cross_val_score, RandomizedSearchCV

# pipeline
from sklearn.pipeline import Pipeline

# Ml Algorithms 
from sklearn.neighbors import KNeighborsRegressor
from sklearn.linear_model import SGDRegressor, LinearRegression, Ridge, Lasso, ElasticNet
from sklearn.svm import LinearSVR, SVR
from sklearn.ensemble import RandomForestRegressor, GradientBoostingRegressor, AdaBoostRegressor
import xgboost

# warnings
import warnings
warnings.filterwarnings('ignore')

Load the dataset:

In [2]:
dataset = pd.read_csv('train.csv')

Take a look on first five of the dataset:

In [3]:
dataset.head()

Unnamed: 0,ID,crim,zn,indus,chas,nox,rm,age,dis,rad,tax,ptratio,black,lstat,medv
0,1,0.00632,18.0,2.31,0,0.538,6.575,65.2,4.09,1,296,15.3,396.9,4.98,24.0
1,2,0.02731,0.0,7.07,0,0.469,6.421,78.9,4.9671,2,242,17.8,396.9,9.14,21.6
2,4,0.03237,0.0,2.18,0,0.458,6.998,45.8,6.0622,3,222,18.7,394.63,2.94,33.4
3,5,0.06905,0.0,2.18,0,0.458,7.147,54.2,6.0622,3,222,18.7,396.9,5.33,36.2
4,7,0.08829,12.5,7.87,0,0.524,6.012,66.6,5.5605,5,311,15.2,395.6,12.43,22.9


Note that in this dataset, we have categorical features which are 'chas' and 'rad', but 'chas' is already in the correct form, and we have to change column 'rad' to make our model more robust. Also rearrange the columns to make it more cleaner.

In [4]:
dataset = pd.concat([dataset, pd.get_dummies(dataset['rad'], prefix='rad')], axis=1).drop('rad', axis=1)

In [5]:
dataset = dataset[['ID', 'crim', 'zn', 'indus', 'chas', 'nox', 'rm', 'age', 'dis', 'tax',
       'ptratio', 'black', 'lstat', 'rad_1', 'rad_2', 'rad_3', 'rad_4',
       'rad_5', 'rad_6', 'rad_7', 'rad_8', 'rad_24', 'medv']]

In [6]:
dataset.columns

Index(['ID', 'crim', 'zn', 'indus', 'chas', 'nox', 'rm', 'age', 'dis', 'tax',
       'ptratio', 'black', 'lstat', 'rad_1', 'rad_2', 'rad_3', 'rad_4',
       'rad_5', 'rad_6', 'rad_7', 'rad_8', 'rad_24', 'medv'],
      dtype='object')

Now, we see that column 'rad' has gone, and new dummies features from rad have been added to our dataset.

Let's make the feature matrix and target vector, and convert it to numpy array:

In [7]:
X = dataset.iloc[:, 1:-1].values # we dont take column ID and medv (the target vector)
y = dataset.iloc[:, -1].values # the target vector

In [8]:
print(X[0])
print(y[0])

[6.320e-03 1.800e+01 2.310e+00 0.000e+00 5.380e-01 6.575e+00 6.520e+01
 4.090e+00 2.960e+02 1.530e+01 3.969e+02 4.980e+00 1.000e+00 0.000e+00
 0.000e+00 0.000e+00 0.000e+00 0.000e+00 0.000e+00 0.000e+00 0.000e+00]
24.0


Now we are ready to use the machine learning algorithms. In this notebook, we will try every common Machine Learning Algorithms and look at its score with cross_val_score function. We will look for the best hyperparameter with greedy algorithm of GridSearchCV class, and at the end we split the data into training set and validation set, then look at the performance on validation set by training it first on train set.

---

## KNearest Neighbors

Here we will use KNeighborsRegressor. Note that this algorithm needs scaling first. But, for learning purposes we will try it without scaling.

In [9]:
cvs_knn_default_no_scaling_score = cross_val_score(KNeighborsRegressor(), X, y, cv=5, scoring='r2') # no scaling and using default hyperparameter
print("KNeighborsRegressor cross val score (default & no scaling): {}".format(cvs_knn_default_no_scaling_score))
print("The mean score: {:.3f}".format(cvs_knn_default_no_scaling_score.mean()))

KNeighborsRegressor cross val score (default & no scaling): [-1.27743668e+00  1.26213407e-01 -5.08365911e-01 -1.12486164e-04
 -1.59147857e+00]
The mean score: -0.650


Its R2 score is negative, so horrible. Now let's see what happen if we scale it with StandardScaler and still use the default hyperparameters.

In [10]:
pipe_knn = Pipeline([
    ('scaler', StandardScaler()),
    ('knn', KNeighborsRegressor())
])

In [11]:
cvs_knn_default_score = cross_val_score(pipe_knn, X, y, cv=5, scoring='r2')
print("KNeighborsRegressor cross val score (default & scaled): {}".format(cvs_knn_default_score))
print("The mean score: {:.3f}".format(cvs_knn_default_score.mean()))

KNeighborsRegressor cross val score (default & scaled): [-0.22459519  0.49962429 -0.1921519   0.22944539 -0.23294058]
The mean score: 0.016


Still aweful. Let's find the best hyperparameters with GridSearch. In here, we will only use two hyperparameters: n_neighbors and distance.

In [12]:
param_grid = {
    'knn__weights' : ['distance', 'uniform'],
    'knn__n_neighbors' : list(range(2, 21)) # we test n_neighbors parameter from 2 to 20
}

grid_knn = GridSearchCV(pipe_knn, param_grid=param_grid, cv=5, scoring='r2').fit(X, y)

In [13]:
grid_knn.best_score_

0.05875229208958241

In [14]:
grid_knn.best_params_

{'knn__n_neighbors': 20, 'knn__weights': 'uniform'}

In [15]:
grid_knn_results = pd.DataFrame(grid_knn.cv_results_)
grid_knn_results.groupby(['param_knn__weights', 'param_knn__n_neighbors']).mean()['mean_test_score']

param_knn__weights  param_knn__n_neighbors
distance            2                        -0.031728
                    3                         0.038893
                    4                         0.038841
                    5                         0.037972
                    6                        -0.010672
                    7                        -0.055296
                    8                        -0.042408
                    9                        -0.052224
                    10                       -0.032289
                    11                       -0.013722
                    12                        0.008872
                    13                        0.037274
                    14                        0.039667
                    15                        0.014169
                    16                        0.026639
                    17                        0.033953
                    18                        0.037984
                    19

Result we get is pretty bad. Let's move to other model.

---

## SGDRegressor

Note that stochastic algorithms are need scaleddata to have better performance. For learning purposes we dont scale the data and use default hyperparameters:

In [16]:
sgd_default_no_scaling_score = cross_val_score(SGDRegressor(), X, y, cv=5, scoring='r2') # no scaling and using default hyperparameter
print("SGDRegressor cross val score (default & no scaling): {}".format(sgd_default_no_scaling_score))
print("The mean score: {:.3f}".format(sgd_default_no_scaling_score.mean()))

SGDRegressor cross val score (default & no scaling): [-4.92025341e+27 -1.16251318e+27 -6.13110029e+26 -2.43449545e+27
 -9.68773822e+27]
The mean score: -3763622057763393428187512832.000


Its R2 score is negative. Certainly we need to scale it first

In [17]:
pipe_sgd = Pipeline([
    ('scaler', StandardScaler()),
    ('sgd', SGDRegressor())
])

In [18]:
sgd_default_score = cross_val_score(pipe_sgd, X, y, cv=5, scoring='r2') # no scaling and using default hyperparameter
print("SGDRegressor cross val score (default & no scaling): {}".format(sgd_default_score))
print("The mean score: {:.3f}".format(sgd_default_score.mean()))

SGDRegressor cross val score (default & no scaling): [ 0.72137527  0.64710453  0.33841331 -0.0245005  -1.24371964]
The mean score: 0.088


See... R2 score is increasing

There are several hyperparameters for SGD such as penalty, alpha, eta0, l1_ratio, early_stopping, loss, etc. But we will use three: penalty, alpha, eta0

In [19]:
param_grid = {
    'sgd__penalty' : ['l1', 'l2', 'elasticnet'], # regularization penalty
    'sgd__alpha' : [0.001, 0.01, 0.1, 1, 10, 100, 1000], # regularization parameter
    'sgd__eta0' : [0.001, 0.01, 0.1, 1, 10, 100, 1000]
}

grid_sgd = GridSearchCV(pipe_sgd, param_grid=param_grid, cv=3, scoring='r2').fit(X, y)

In [20]:
grid_sgd.best_score_

0.33060266278252504

In [21]:
grid_sgd.best_params_

{'sgd__alpha': 1, 'sgd__eta0': 0.1, 'sgd__penalty': 'elasticnet'}

In [22]:
pd.DataFrame(grid_sgd.cv_results_['mean_test_score']).describe()

Unnamed: 0,0
count,147.0
mean,-3.993321e+28
std,1.7786269999999997e+29
min,-1.608618e+30
25%,-7.550897e+26
50%,-7.291658e+22
75%,-0.9594631
max,0.3306027


Look at the standard deviation of the mean test score, quite dispersed right? Because stochastic algorithm uses randomness, if we run it several times, the score will change at each run. But the best score we have is 0.27 (note that if i run it again, probably it will change), which is better than KNeighborsRegressor.

---

## Linear Regression

Now we use the more popular ML algorithm in linear model family (as i know). This model is simple, no need to scale (but we will try to scale it to see are there any differences) and no need for hyperparameters tuning.

In [23]:
linreg_no_scaling_score = cross_val_score(LinearRegression(), X, y, cv=3, scoring='r2') # no scaling and using default hyperparameter
print("Linear Regression cross val score (no scaling): {}".format(linreg_no_scaling_score))
print("The mean score: {:.3f}".format(linreg_no_scaling_score.mean()))

Linear Regression cross val score (no scaling): [ 0.45163553  0.39253457 -2.65804985]
The mean score: -0.605


In [24]:
pipe_linreg = Pipeline([
    ('scaler', StandardScaler()),
    ('linreg', LinearRegression())
])

In [25]:
linreg_score = cross_val_score(pipe_linreg, X, y, cv=3, scoring='r2') # no scaling and using default hyperparameter
print("Linear Regression cross val score (no scaling): {}".format(linreg_score))
print("The mean score: {:.3f}".format(linreg_score.mean()))

Linear Regression cross val score (no scaling): [ 0.44696893  0.40233584 -2.73421918]
The mean score: -0.628


Not so different without scaling right?

Let's see what happen if we use PolynomialFeatures to expand the features.

In [26]:
poly = PolynomialFeatures(degree=2, include_bias=False)
linreg_poly_score = cross_val_score(LinearRegression(), poly.fit_transform(X), y, scoring='r2')
print("Linear Regression cross val score (with Polynomial terms): {}".format(linreg_poly_score))
print("The mean score: {:.3f}".format(linreg_poly_score.mean()))

Linear Regression cross val score (with Polynomial terms): [ -11635.10957162   -3398.25411481 -162062.65521929]
The mean score: -59032.006


Certainly adding more features does not help.

---

# Ridge and Lasso

In this subsection, we won't scale the data first. Let's begin with Ridge

In [27]:
ridge_default_score = cross_val_score(Ridge(), X, y, cv=3, scoring='r2')
print('Ridge Regression cross val score (default): {}'.format(ridge_default_score))
print("The mean score: {:.2f}".format(ridge_default_score.mean()))

Ridge Regression cross val score (default): [ 0.56288541  0.35648953 -1.17868545]
The mean score: -0.09


Now we use the feature matrix with expanded features (due to PolynomialFeatures)

In [28]:
ridge_poly_default_score = cross_val_score(Ridge(), poly.fit_transform(X), y, cv=3, scoring='r2')
print('Ridge Regression cross val score (default and with polynomial terms): {}'.format(ridge_poly_default_score))
print("The mean score: {:.2f}".format(ridge_poly_default_score.mean()))

Ridge Regression cross val score (default and with polynomial terms): [  -50.09180674    -6.08025845 -1703.31683783]
The mean score: -586.50


Here we will use GridSearch to find the best parameter:

In [29]:
param_grid = {
    'alpha' : [0.001, 0.01, 0.1, 1, 10, 100, 1000]
}

grid_ridge = GridSearchCV(Ridge(), param_grid=param_grid, cv=3, scoring='r2').fit(X, y)

In [30]:
grid_ridge.best_score_

0.3120057313941854

In [31]:
grid_ridge.best_params_

{'alpha': 100}

In [32]:
pd.DataFrame(grid_ridge.cv_results_)['mean_test_score']

0   -0.602560
1   -0.584480
2   -0.444172
3   -0.086437
4    0.131436
5    0.312006
6    0.161435
Name: mean_test_score, dtype: float64

Now, we move to Lasso

In [33]:
lasso_default_score = cross_val_score(Lasso(), X, y, cv=3, scoring='r2')
print('Lasso Regression corss val score (default): {}'.format(lasso_default_score))
print("The mean score: {:.2f}".format(lasso_default_score.mean()))

Lasso Regression corss val score (default): [ 0.49013108  0.04916419 -0.01947876]
The mean score: 0.17


We don't expande the features, because probably our dataset is already has irrelevant features. We will see later. Let's use GridSearch to find the best parameter

In [34]:
param_grid = {
    'alpha' : [0.001, 0.01, 0.1, 1, 10, 100, 1000]
}

grid_lasso = GridSearchCV(Lasso(), param_grid=param_grid, cv=3, scoring='r2').fit(X, y)

In [35]:
grid_lasso.best_score_

0.17327216889423158

In [36]:
grid_lasso.best_params_

{'alpha': 1}

In [37]:
pd.DataFrame(grid_lasso.cv_results_)['mean_test_score']

0   -0.586315
1   -0.239002
2    0.010592
3    0.173272
4    0.068983
5   -0.489925
6   -0.695293
Name: mean_test_score, dtype: float64

Note that the difference between Ridge and Lasso is the way they regularize the model. Ridge will make each feature has little influence, instead Lasso will actually ignore some features.

## ElasticNet

ElasticNet is using the combination of the regularization power of Ridge and Lasso.

In [38]:
elasticnet_default_score = cross_val_score(ElasticNet(), X, y, cv=3, scoring='r2')
print('ElasticNet cross val score (default): {}'.format(elasticnet_default_score))
print("The mean score: {:.2f}".format(elasticnet_default_score.mean()))

ElasticNet cross val score (default): [ 0.49295924  0.10315915 -0.03631311]
The mean score: 0.19


Let's use GridSearch to find the best parameters, in here we will use alpha and l1_ratio.

In [39]:
param_grid = {
    'l1_ratio' : [0, 0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1],
    'alpha' : [0.001, 0.01, 0.1, 1, 10, 100, 1000]
}

grid_elasticnet = GridSearchCV(ElasticNet(), param_grid=param_grid, cv=3, scoring='r2').fit(X, y)

In [40]:
grid_elasticnet.best_score_

0.265582850154267

In [41]:
grid_elasticnet.best_params_

{'alpha': 1, 'l1_ratio': 0}

In [42]:
pd.DataFrame(grid_elasticnet.cv_results_).columns

Index(['mean_fit_time', 'std_fit_time', 'mean_score_time', 'std_score_time',
       'param_alpha', 'param_l1_ratio', 'params', 'split0_test_score',
       'split1_test_score', 'split2_test_score', 'mean_test_score',
       'std_test_score', 'rank_test_score', 'split0_train_score',
       'split1_train_score', 'split2_train_score', 'mean_train_score',
       'std_train_score'],
      dtype='object')

In [43]:
pd.DataFrame(grid_elasticnet.cv_results_).groupby('param_alpha')['mean_test_score'].mean()

param_alpha
0.001      -0.436813
0.010      -0.074493
0.100       0.154940
1.000       0.197544
10.000      0.071101
100.000    -0.416145
1000.000   -0.688539
Name: mean_test_score, dtype: float64

In [44]:
pd.DataFrame(grid_elasticnet.cv_results_).groupby('param_l1_ratio')['mean_test_score'].mean()

param_l1_ratio
0.0   -0.046273
0.1   -0.083719
0.2   -0.144526
0.3   -0.167523
0.4   -0.178143
0.5   -0.184231
0.6   -0.189489
0.7   -0.197203
0.8   -0.208242
0.9   -0.223333
1.0   -0.251098
Name: mean_test_score, dtype: float64

Here we have the best parameter l1_ratio = 0, which means the penalty is l2. So it supposedly the same as using Ridge, and the best parameter for alpha is 0, surely different from the result we have when using Ridge.

---

## Linear SVR

Now we will use LinearSVR which is more complex than LinearRegression or any other linear model. Note that when using SVMs algorithms we need to scale the data first to have better result. But as usual we don't use it first

In [59]:
linsvr_default_no_scaling_score = cross_val_score(LinearSVR(), X, y, cv=5, scoring='r2')
print('LinearSVR cross val score (default and no scaling): {}'.format(linsvr_default_no_scaling_score))
print("The mean score: {:.2f}".format(linsvr_default_no_scaling_score.mean()))

LinearSVR cross val score (default and no scaling): [ 0.13141627  0.60156649  0.44327683 -0.08346035 -0.78312707]
The mean score: 0.06


Now, we use the scaled data. Using pipeline

In [46]:
pipe_linsvr = Pipeline([
    ('scaler', StandardScaler()),
    ('linsvr', LinearSVR())
])

In [58]:
linsvr_default_score = cross_val_score(pipe_linsvr, X, y, cv=5, scoring='r2')
print('LinearSVR cross val score (default): {}'.format(linsvr_default_score))
print("The mean score: {:.2f}".format(linsvr_default_score.mean()))

LinearSVR cross val score (default and no scaling): [ 0.56750153  0.68509431  0.23411804  0.03962725 -0.54107938]
The mean score: 0.20


It's searching time!

In [62]:
param_grid = {
    'linsvr__epsilon' : [0.001, 0.01, 0.1, 1, 10, 100, 1000],
    'linsvr__C' : [0.001, 0.01, 0.1, 1, 10, 100, 1000]
}

grid_linsvr = GridSearchCV(pipe_linsvr, param_grid=param_grid, cv=5, scoring='r2').fit(X, y)

In [63]:
grid_linsvr.best_score_

0.19984402892461908

In [65]:
grid_linsvr.best_params_

{'linsvr__C': 1, 'linsvr__epsilon': 0.001}

In [67]:
pd.DataFrame(grid_linsvr.cv_results_).groupby('param_linsvr__epsilon')['mean_test_score'].mean()

param_linsvr__epsilon
0.001       -2.587513
0.010       -2.699267
0.100       -2.656507
1.000       -2.702798
10.000      -3.525321
100.000    -10.727343
1000.000   -10.727343
Name: mean_test_score, dtype: float64

In [68]:
pd.DataFrame(grid_linsvr.cv_results_).groupby('param_linsvr__C')['mean_test_score'].mean()

param_linsvr__C
0.001      -10.539883
0.010       -8.973172
0.100       -3.190656
1.000       -3.038791
10.000      -3.098147
100.000     -3.137211
1000.000    -3.648232
Name: mean_test_score, dtype: float64

---

## SVR

Note that kernel trick that LinearSVR using is linear. Now, we use SVR, which the more general model, it means that we can tweak which kernel trick we want to use. Same as LinearSVR, this model will perform better with scaled data, so scaling is neccessary.

In [73]:
svr_default_no_scaling_score = cross_val_score(SVR(), X, y, cv=5, scoring='r2')
print('SVR cross val score (default and no scaling): {}'.format(svr_default_no_scaling_score))
print("The mean score: {:.2f}".format(svr_default_no_scaling_score.mean()))

SVR cross val score (default and no scaling): [-0.0327477  -0.05107432 -1.42701088  0.01562126 -2.2448059 ]
The mean score: -0.75


In [70]:
pipe_svr = Pipeline([
    ('scaler', StandardScaler()),
    ('svr', SVR())
])

In [71]:
svr_default_score = cross_val_score(pipe_svr, X, y, cv=5, scoring='r2')
print('SVR cross val score (default): {}'.format(svr_default_score))
print("The mean score: {:.2f}".format(svr_default_score.mean()))

SVR cross val score (default): [ 0.4124707   0.40408661 -0.6261605   0.25580055 -1.34137876]
The mean score: -0.18


It's searching time. We will rbf kernel trick

In [75]:
param_grid = {
    'svr__C' : [0.001, 0.01, 0.1, 1, 10, 100, 1000],
    'svr__gamma' : [0.001, 0.01, 0.1, 1, 10, 100, 1000]
}

grid_svr = GridSearchCV(pipe_svr, param_grid=param_grid, cv=5, scoring='r2').fit(X, y)

In [76]:
grid_svr.best_score_

0.341379212579757

In [77]:
grid_svr.best_params_

{'svr__C': 1000, 'svr__gamma': 0.001}

In [84]:
pd.DataFrame(grid_svr.cv_results_).groupby('param_svr__C')['mean_test_score'].median()

param_svr__C
0.001      -0.768252
0.010      -0.767493
0.100      -0.760642
1.000      -0.709444
10.000     -0.639138
100.000    -0.814071
1000.000   -0.954943
Name: mean_test_score, dtype: float64

In [85]:
pd.DataFrame(grid_svr.cv_results_).groupby('param_svr__gamma')['mean_test_score'].median()

param_svr__gamma
0.001      -0.325278
0.010      -0.297308
0.100      -0.753644
1.000      -0.742281
10.000     -0.772604
100.000    -0.772698
1000.000   -0.772698
Name: mean_test_score, dtype: float64

---

## Random Forest Regressor

Now we use ensemble ML algorithms, the first one is RandomForestRegressor. 

In [86]:
rf_default_score = cross_val_score(RandomForestRegressor(), X, y, cv=5, scoring='r2')
print('RandomForestRegressor cross val score (default): {}'.format(rf_default_score))
print("The mean score: {:.2f}".format(rf_default_score.mean()))

RandomForestRegressor cross val score (default): [ 0.67294     0.84227784  0.75384568  0.49261092 -0.12311533]
The mean score: 0.53


Now, we will look for the best parameters for RandomForestRegressor.

In [89]:
param_grid = {
    'n_estimators' : [50, 100, 150, 200, 250, 300],
    'max_depth' : list(range(2, 11)),
    'max_features' : list(range(3, 24, 3)) 
}

grid_rf = RandomizedSearchCV(RandomForestRegressor(), param_grid=param_grid, cv=5, scoring='r2').fit(X, y)

In [90]:
grid_rf.best_score_

0.5830035779990217

In [91]:
grid_rf.best_params_

{'max_depth': 9, 'max_features': 15, 'n_estimators': 100}

In [95]:
cross_val_score(RandomForestRegressor(n_estimators=250, max_depth=3, max_features=3), X, y, cv=5, scoring='r2')

array([ 0.38328332,  0.64878332,  0.26642751,  0.23414977, -0.318297  ])

---

## Gradient Boosting Regressor

Now, we will use GradientBoostingRegressor

In [97]:
gbrt_default_score = cross_val_score(GradientBoostingRegressor(), X, y, cv=5, scoring='r2')
print('GradientDescentRegressor cross val score (default): {}'.format(gbrt_default_score))
print("The mean score: {:.2f}".format(gbrt_default_score.mean()))

GradientDescentRegressor cross val score (default): [0.73370407 0.86294788 0.74353686 0.41000636 0.22397996]
The mean score: 0.59


To search the best parameters, we will use RandomizedSearchCV, to make the process faster, which probably make the true best parameters get missed.

In [102]:
param_grid = {
    'n_estimators' : [50, 100, 150, 200, 250, 300],
    'max_depth' : list(range(2, 11)),
    'max_features' : list(range(3, 22))
}

grid_gbrt = RandomizedSearchCV(GradientBoostingRegressor(), param_distributions=param_grid, cv=5, scoring='r2', n_iter=15, n_jobs=-1).fit(X, y)

In [103]:
grid_gbrt.best_score_

0.6132612982962498

In [109]:
grid_gbrt.best_params_

{'n_estimators': 200, 'max_features': 12, 'max_depth': 2}

---

## AdaBoostRegressor

In [110]:
ada_boost_default_score = cross_val_score(AdaBoostRegressor(GradientBoostingRegressor()), X, y, cv=5, scoring='r2')
print('GradientDescentRegressor cross val score (default): {}'.format(ada_boost_default_score))
print("The mean score: {:.2f}".format(ada_boost_default_score.mean()))

GradientDescentRegressor cross val score (default): [0.74336185 0.86523414 0.67868487 0.37091278 0.3051344 ]
The mean score: 0.59


In [None]:
ada_rf_default_score = cross_val_score(AdaBoostRegressor(GradientBoostingRegressor()), X, y, cv=5, scoring='r2')
print('GradientDescentRegressor cross val score (default): {}'.format(ada_boost_default_score))
print("The mean score: {:.2f}".format(ada_boost_default_score.mean()))