In [84]:
import numpy as np
import pandas as pd
from collections import namedtuple

Data = namedtuple('Data', 'X y')

np.random.seed(37)

def get_data(N=10000):
    x0 = np.random.normal(0, 1, N)
    x1 = np.random.normal(3, 1, N)

    X = np.hstack([x0.reshape(-1, 1), x1.reshape(-1, 1)])
    y = 5.3 + (3.0 * x0) - (2.9 * x1) + np.random.normal(0, 1, N)
    return Data(X, y)

# training data
T = get_data()

# print(T.X)
# print(T.y)
# validation data
V = get_data(N=1000)
y_true = 5.3 + (3.0 * V.X[:,0]) - (2.9 * V.X[:,1])
# print(V.X)
# print(V.X[:,0])
print(len(y_true))

1000


In [85]:
from sklearn.linear_model import LinearRegression

lr = LinearRegression()
lr.fit(T.X, T.y)
print(f'the intercept is {lr.intercept_}')
print(f'the coef is {lr.coef_}')

the intercept is 5.286808205950892
the coef is [ 3.01028299 -2.89392723]


In [86]:
from sklearn.linear_model import Ridge

ridge = Ridge(alpha=5)
ridge.fit(T.X, T.y)
print(f'the intercept is {ridge.intercept_}')
print(f'the coef is {ridge.coef_}')

the intercept is 5.282492317608582
the coef is [ 3.00875291 -2.8924833 ]


In [87]:
from sklearn.linear_model import Lasso

lasso = Lasso(alpha=0.5)
lasso.fit(T.X, T.y)
print(f'the intercept is {lasso.intercept_}')
print(f'the coef is {lasso.coef_}')

the intercept is 3.7951396720147397
the coef is [ 2.50158793 -2.39488658]


In [88]:
from sklearn.linear_model import ElasticNet

en = ElasticNet(alpha=0.5)
en.fit(T.X, T.y)
print(f'the intercept is {en.intercept_}')
print(f'the coef is {en.coef_}')

the intercept is 2.9621918681897306
the coef is [ 2.19723338 -2.11621056]


In [89]:
from sklearn.linear_model import BayesianRidge

brr = BayesianRidge()
brr.fit(T.X, T.y)
print(f'the intercept is {brr.intercept_}')
print(f'the coef is {brr.coef_}')

the intercept is 5.286710685406411
the coef is [ 3.01024842 -2.8938946 ]


In [90]:
from sklearn.ensemble import RandomForestRegressor

rf = RandomForestRegressor(n_estimators=100, random_state=37)
rf.fit(T.X, T.y)

In [91]:
from sklearn.ensemble import GradientBoostingRegressor

gbr = GradientBoostingRegressor(random_state=37)
gbr.fit(T.X, T.y)

In [92]:
from sklearn.ensemble import AdaBoostRegressor

abr = AdaBoostRegressor(random_state=37)
abr.fit(T.X, T.y)

In [93]:
from sklearn.ensemble import BaggingRegressor

bagr = BaggingRegressor(random_state=37)
bagr.fit(T.X, T.y)

In [94]:
from sklearn.neural_network import MLPRegressor

mlp = MLPRegressor(random_state=37)
mlp.fit(T.X, T.y)

In [95]:
models = [lr, ridge, lasso, en, brr, rf, gbr, abr, bagr, mlp]

In [100]:
from sklearn.metrics import explained_variance_score

def get_score(model, X, y_true):
    y_pred = model.predict(X)
    score = explained_variance_score(y_true, y_pred)
    return score, type(model).__name__

def print_score(scores):
    for score, name in scores:
        print(f'{score:.5f} : {name}')

scores = sorted([get_score(model, V.X, V.y) for model in models], key=lambda tup:tup[0],reverse=True)
print_score(scores)

0.94408 : BayesianRidge
0.94408 : LinearRegression
0.94408 : Ridge
0.94380 : MLPRegressor
0.94192 : GradientBoostingRegressor
0.93510 : RandomForestRegressor
0.93079 : BaggingRegressor
0.91760 : AdaBoostRegressor
0.91650 : Lasso
0.87551 : ElasticNet


In [98]:
from sklearn.metrics import max_error

def get_score(model, X, y_true):
    y_pred = model.predict(X)
    score = max_error(y_true, y_pred)
    return score, type(model).__name__

scores = sorted([get_score(model, V.X, V.y) for model in models], key=lambda tup: tup[0], reverse=False)
print_score(scores)

3.03046 : GradientBoostingRegressor
3.19835 : LinearRegression
3.19836 : BayesianRidge
3.19877 : Ridge
3.23994 : MLPRegressor
3.36372 : RandomForestRegressor
3.90285 : BaggingRegressor
4.38630 : AdaBoostRegressor
4.64630 : Lasso
5.57094 : ElasticNet


In [101]:
from sklearn.metrics import mean_absolute_error

def get_score(model, X, y_true):
    y_pred = model.predict(X)
    score = mean_absolute_error(y_true, y_pred)
    return score, type(model).__name__

scores = sorted([get_score(model, V.X, V.y) for model in models], key=lambda tup: tup[0], reverse=False)
print_score(scores)

0.80887 : LinearRegression
0.80887 : BayesianRidge
0.80888 : Ridge
0.81840 : MLPRegressor
0.82440 : GradientBoostingRegressor
0.87869 : RandomForestRegressor
0.90589 : BaggingRegressor
0.98418 : AdaBoostRegressor
0.99474 : Lasso
1.21903 : ElasticNet


In [103]:
from sklearn.metrics import mean_squared_error

def get_score(model, X, y_true):
    y_pred = model.predict(X)
    score = mean_squared_error(y_true, y_pred)
    return score, type(model).__name__

scores = sorted([get_score(model, V.X, V.y) for model in models], key=lambda tup: tup[0], reverse=False)
print_score(scores)


1.04706 : LinearRegression
1.04706 : BayesianRidge
1.04706 : Ridge
1.06718 : MLPRegressor
1.08764 : GradientBoostingRegressor
1.21508 : RandomForestRegressor
1.29575 : BaggingRegressor
1.54414 : AdaBoostRegressor
1.56425 : Lasso
2.33287 : ElasticNet


In [104]:
from sklearn.metrics import median_absolute_error

def get_score(model, X, y_true):
    y_pred = model.predict(X)
    score = median_absolute_error(y_true, y_pred)
    return score, type(model).__name__

scores = sorted([get_score(model, V.X, V.y) for model in models], key=lambda tup: tup[0], reverse=False)
print_score(scores)


0.65827 : BayesianRidge
0.65828 : LinearRegression
0.65934 : Ridge
0.66430 : GradientBoostingRegressor
0.66623 : MLPRegressor
0.75635 : RandomForestRegressor
0.77256 : BaggingRegressor
0.82732 : AdaBoostRegressor
0.83284 : Lasso
1.01466 : ElasticNet


In [105]:
from sklearn.metrics import r2_score

def get_score(model, X, y_true):
    y_pred = model.predict(X)
    score = r2_score(y_true, y_pred)
    return score, type(model).__name__

scores = sorted([get_score(model, V.X, V.y) for model in models], key=lambda tup: tup[0], reverse=True)
print_score(scores)

0.94408 : LinearRegression
0.94408 : BayesianRidge
0.94407 : Ridge
0.94300 : MLPRegressor
0.94191 : GradientBoostingRegressor
0.93510 : RandomForestRegressor
0.93079 : BaggingRegressor
0.91753 : AdaBoostRegressor
0.91645 : Lasso
0.87540 : ElasticNet
