### Ridge Regression Model With Hyperparameter and Feature Selection ###

In [1]:
import numpy as np

from sklearn.metrics import make_scorer, mean_squared_error, mean_absolute_error
from sklearn.feature_selection import RFECV
from sklearn.model_selection import GridSearchCV
from sklearn.linear_model import Ridge, LinearRegression, Lasso, ElasticNet
from sklearn.ensemble import RandomForestRegressor, BaggingRegressor
from sklearn.neighbors import KNeighborsRegressor
from sklearn.tree import DecisionTreeRegressor
from sklearn.svm import SVR
from sklearn.model_selection import train_test_split
from Utils import *

#### Loading the Training and Testing Data ###

In [2]:
train = np.load('../Models/train.npy')

X = train[0:,0:-1]
y = train[:, -1]

X_train = np.load('../Models/X_train.npy')
y_train = np.load('../Models/y_train.npy')
X_test = np.load('../Models/X_test.npy')
y_test = np.load('../Models/y_test.npy')

In [3]:
rfecv_mae = RFECV(estimator=LinearRegression(), step=1, cv=10, scoring=mae_scorer_gs)
rfecv_rmse = RFECV(estimator=LinearRegression(), step=1, cv=10, scoring=rmse_scorer_gs)

rfecv_mae = rfecv_mae.fit(X_train, y_train)
rfecv_rmse = rfecv_rmse.fit(X_train, y_train)

prediction_mae = rfecv_mae.predict(X_test)
prediction_rmse = rfecv_rmse.predict(X_test)

print 'MAE {0}'.format(absolute_error(y_test, prediction_mae))
print 'RMSE {0}'.format(root_mean_squared(y_test, prediction_rmse))

# best_estim = rfecv.estimator_

MAE 3.55857969394
RMSE 4.58429548429


In [4]:
parameters = {
    'alpha': [0.01, 0.1, 1.0, 10, 100],
    'normalize': [True, False],
    'fit_intercept': [True, False]
}

In [5]:
import pandas as pd
df_data = pd.read_csv("../feature_engineered_dataset.csv")
feature_names = df_data.columns
import operator

In [6]:
def get_best_estimator(estimator, step, cv, scoring, parameters):
    clf_mae = GridSearchCV(estimator=estimator, param_grid=parameters, cv=cv, scoring=scoring, n_jobs=-1, verbose=10)
    clf_mae.fit(X_train, y_train)
    return clf_mae.best_estimator_

In [7]:
ridge = Ridge()
best_ridge = get_best_estimator(ridge, 1, 10, mae_scorer_gs, parameters)

print best_ridge

Fitting 10 folds for each of 20 candidates, totalling 200 fits
[CV] normalize=True, alpha=0.01, fit_intercept=True ..................
[CV]  normalize=True, alpha=0.01, fit_intercept=True, score=-3.324433, total=   0.0s
[CV] normalize=True, alpha=0.01, fit_intercept=True ..................
[CV] normalize=True, alpha=0.01, fit_intercept=True ..................
[CV] normalize=True, alpha=0.01, fit_intercept=True ..................
[CV] normalize=True, alpha=0.01, fit_intercept=True ..................
[CV]  normalize=True, alpha=0.01, fit_intercept=True, score=-3.930391, total=   0.0s


[Parallel(n_jobs=-1)]: Batch computation too fast (0.0485s.) Setting batch_size=8.
[Parallel(n_jobs=-1)]: Batch computation too fast (0.0485s.) Setting batch_size=64.
[Parallel(n_jobs=-1)]: Done   2 tasks      | elapsed:    0.1s


[CV]  normalize=True, alpha=0.01, fit_intercept=True, score=-3.644707, total=   0.0s
[CV]  normalize=True, alpha=0.01, fit_intercept=True, score=-3.754051, total=   0.0s
[CV] normalize=True, alpha=0.01, fit_intercept=True ..................
[CV] normalize=True, alpha=0.01, fit_intercept=True ..................
[CV] normalize=True, alpha=0.01, fit_intercept=True ..................
[CV]  normalize=True, alpha=0.01, fit_intercept=True, score=-3.739443, total=   0.0s
[CV] normalize=True, alpha=0.01, fit_intercept=True ..................
[CV]  normalize=True, alpha=0.01, fit_intercept=True, score=-3.755577, total=   0.0s
[CV]  normalize=True, alpha=0.01, fit_intercept=True, score=-3.372890, total=   0.0s
[CV]  normalize=True, alpha=0.01, fit_intercept=True, score=-3.593802, total=   0.0s
[CV]  normalize=True, alpha=0.01, fit_intercept=True, score=-3.818839, total=   0.0s
[CV] normalize=True, alpha=0.01, fit_intercept=True ..................
[CV]  normalize=True, alpha=0.01, fit_intercept=Tr

[Parallel(n_jobs=-1)]: Done 200 out of 200 | elapsed:    1.0s finished


In [8]:
print model_cross_validation(best_ridge, X, y, mae_scorer_cv, 10)

3.6038503309


In [9]:
lin_params = {
}

linear = LinearRegression()
best_linear = get_best_estimator(linear, 1, 10, mae_scorer_gs, lin_params)

Fitting 10 folds for each of 1 candidates, totalling 10 fits
[CV]  ................................................................
[CV]  ................................................................
[CV]  ................................................................
[CV]  ................................................................
[CV]  ................................................................
[CV]  ................................................................
[CV]  ................................................................
[CV]  ................................................................
[CV] ................................ , score=-3.304049, total=   0.1s
[CV] ................................ , score=-3.896892, total=   0.1s
[CV]  ................................................................
[CV] ................................ , score=-3.768517, total=   0.0s
[CV]  ................................................................
[CV] ...........

[Parallel(n_jobs=-1)]: Batch computation too fast (0.0851s.) Setting batch_size=4.
[Parallel(n_jobs=-1)]: Done   3 out of  10 | elapsed:    0.1s remaining:    0.2s
[Parallel(n_jobs=-1)]: Done   5 out of  10 | elapsed:    0.1s remaining:    0.1s
[Parallel(n_jobs=-1)]: Done   7 out of  10 | elapsed:    0.2s remaining:    0.1s
[Parallel(n_jobs=-1)]: Done  10 out of  10 | elapsed:    0.2s finished


In [10]:
print model_cross_validation(linear, X, y, mae_scorer_cv, 10)

3.61816601526


In [11]:
random_forest = RandomForestRegressor()

param_random_forest = {
    'n_estimators': [10, 50, 100, 200],
    'max_depth': [5, 8, 15, 25, 30, None],
    'min_samples_split': [2, 5, 10, 15, 50],
    'min_samples_leaf': [1, 2, 5, 10],
    'max_features': ['log2', 'sqrt', None]
}

In [None]:
best_forest = get_best_estimator(random_forest, 1, 10, mae_scorer_gs, param_random_forest)

Fitting 10 folds for each of 1440 candidates, totalling 14400 fits
[CV] max_features=log2, min_samples_split=2, n_estimators=10, max_depth=5, min_samples_leaf=1 
[CV] max_features=log2, min_samples_split=2, n_estimators=10, max_depth=5, min_samples_leaf=1 
[CV] max_features=log2, min_samples_split=2, n_estimators=10, max_depth=5, min_samples_leaf=1 
[CV] max_features=log2, min_samples_split=2, n_estimators=10, max_depth=5, min_samples_leaf=1 
[CV] max_features=log2, min_samples_split=2, n_estimators=10, max_depth=5, min_samples_leaf=1 
[CV] max_features=log2, min_samples_split=2, n_estimators=10, max_depth=5, min_samples_leaf=1 
[CV] max_features=log2, min_samples_split=2, n_estimators=10, max_depth=5, min_samples_leaf=1 
[CV] max_features=log2, min_samples_split=2, n_estimators=10, max_depth=5, min_samples_leaf=1 
[CV]  max_features=log2, min_samples_split=2, n_estimators=10, max_depth=5, min_samples_leaf=1, score=-3.629583, total=   0.1s
[CV]  max_features=log2, min_samples_split=2, 

[Parallel(n_jobs=-1)]: Batch computation too fast (0.1750s.) Setting batch_size=2.
[Parallel(n_jobs=-1)]: Done   2 tasks      | elapsed:    0.2s
[Parallel(n_jobs=-1)]: Done   9 tasks      | elapsed:    0.4s


[CV] max_features=log2, min_samples_split=2, n_estimators=50, max_depth=5, min_samples_leaf=1 
[CV]  max_features=log2, min_samples_split=2, n_estimators=50, max_depth=5, min_samples_leaf=1, score=-3.787011, total=   0.5s
[CV]  max_features=log2, min_samples_split=2, n_estimators=50, max_depth=5, min_samples_leaf=1, score=-3.819878, total=   0.5s
[CV] max_features=log2, min_samples_split=2, n_estimators=100, max_depth=5, min_samples_leaf=1 
[CV]  max_features=log2, min_samples_split=2, n_estimators=50, max_depth=5, min_samples_leaf=1, score=-3.597183, total=   0.6s
[CV]  max_features=log2, min_samples_split=2, n_estimators=50, max_depth=5, min_samples_leaf=1, score=-4.017735, total=   0.6s
[CV] max_features=log2, min_samples_split=2, n_estimators=100, max_depth=5, min_samples_leaf=1 
[CV]  max_features=log2, min_samples_split=2, n_estimators=50, max_depth=5, min_samples_leaf=1, score=-3.722628, total=   0.5s
[CV]  max_features=log2, min_samples_split=2, n_estimators=50, max_depth=5, mi

[Parallel(n_jobs=-1)]: Done  16 tasks      | elapsed:    0.9s


[CV]  max_features=log2, min_samples_split=2, n_estimators=50, max_depth=5, min_samples_leaf=1, score=-3.562880, total=   0.5s
[CV] max_features=log2, min_samples_split=2, n_estimators=200, max_depth=5, min_samples_leaf=1 
[CV] max_features=log2, min_samples_split=2, n_estimators=200, max_depth=5, min_samples_leaf=1 
[CV]  max_features=log2, min_samples_split=2, n_estimators=50, max_depth=5, min_samples_leaf=1, score=-3.647332, total=   0.5s
[CV]  max_features=log2, min_samples_split=2, n_estimators=100, max_depth=5, min_samples_leaf=1, score=-3.572104, total=   1.0s
[CV] max_features=log2, min_samples_split=2, n_estimators=100, max_depth=5, min_samples_leaf=1 
[CV]  max_features=log2, min_samples_split=2, n_estimators=100, max_depth=5, min_samples_leaf=1, score=-3.835546, total=   1.0s
[CV] max_features=log2, min_samples_split=2, n_estimators=100, max_depth=5, min_samples_leaf=1 
[CV]  max_features=log2, min_samples_split=2, n_estimators=100, max_depth=5, min_samples_leaf=1, score=-3.

[Parallel(n_jobs=-1)]: Batch computation too slow (2.0261s.) Setting batch_size=1.


[CV]  max_features=log2, min_samples_split=5, n_estimators=10, max_depth=5, min_samples_leaf=1, score=-3.740081, total=   0.1s
[CV] max_features=log2, min_samples_split=5, n_estimators=10, max_depth=5, min_samples_leaf=1 
[CV]  max_features=log2, min_samples_split=5, n_estimators=10, max_depth=5, min_samples_leaf=1, score=-4.112665, total=   0.1s
[CV]  max_features=log2, min_samples_split=5, n_estimators=10, max_depth=5, min_samples_leaf=1, score=-4.027099, total=   0.1s
[CV] max_features=log2, min_samples_split=5, n_estimators=10, max_depth=5, min_samples_leaf=1 
[CV]  max_features=log2, min_samples_split=5, n_estimators=10, max_depth=5, min_samples_leaf=1, score=-3.932065, total=   0.1s
[CV] max_features=log2, min_samples_split=5, n_estimators=50, max_depth=5, min_samples_leaf=1 
[CV] max_features=log2, min_samples_split=5, n_estimators=10, max_depth=5, min_samples_leaf=1 
[CV]  max_features=log2, min_samples_split=5, n_estimators=10, max_depth=5, min_samples_leaf=1, score=-3.839352,

[Parallel(n_jobs=-1)]: Done  34 tasks      | elapsed:    3.9s


[CV]  max_features=log2, min_samples_split=5, n_estimators=10, max_depth=5, min_samples_leaf=1, score=-3.646785, total=   0.1s
[CV]  max_features=log2, min_samples_split=5, n_estimators=10, max_depth=5, min_samples_leaf=1, score=-3.634961, total=   0.1s
[CV] max_features=log2, min_samples_split=5, n_estimators=50, max_depth=5, min_samples_leaf=1 
[CV] max_features=log2, min_samples_split=5, n_estimators=50, max_depth=5, min_samples_leaf=1 
[CV]  max_features=log2, min_samples_split=2, n_estimators=200, max_depth=5, min_samples_leaf=1, score=-3.816506, total=   2.4s
[CV] max_features=log2, min_samples_split=2, n_estimators=200, max_depth=5, min_samples_leaf=1 
[CV]  max_features=log2, min_samples_split=2, n_estimators=200, max_depth=5, min_samples_leaf=1, score=-3.689110, total=   2.5s
[CV] max_features=log2, min_samples_split=2, n_estimators=200, max_depth=5, min_samples_leaf=1 
[CV]  max_features=log2, min_samples_split=5, n_estimators=50, max_depth=5, min_samples_leaf=1, score=-3.534

[Parallel(n_jobs=-1)]: Done  48 tasks      | elapsed:    6.1s


[CV]  max_features=log2, min_samples_split=5, n_estimators=50, max_depth=5, min_samples_leaf=1, score=-3.593822, total=   0.5s
[CV] max_features=log2, min_samples_split=5, n_estimators=100, max_depth=5, min_samples_leaf=1 
[CV]  max_features=log2, min_samples_split=2, n_estimators=200, max_depth=5, min_samples_leaf=1, score=-4.016589, total=   2.2s
[CV] max_features=log2, min_samples_split=5, n_estimators=100, max_depth=5, min_samples_leaf=1 
[CV]  max_features=log2, min_samples_split=2, n_estimators=200, max_depth=5, min_samples_leaf=1, score=-3.730721, total=   2.3s
[CV] max_features=log2, min_samples_split=5, n_estimators=100, max_depth=5, min_samples_leaf=1 
[CV]  max_features=log2, min_samples_split=5, n_estimators=100, max_depth=5, min_samples_leaf=1, score=-3.560140, total=   1.1s
[CV]  max_features=log2, min_samples_split=5, n_estimators=100, max_depth=5, min_samples_leaf=1, score=-4.060148, total=   1.1s
[CV] max_features=log2, min_samples_split=5, n_estimators=100, max_depth=

[Parallel(n_jobs=-1)]: Done  62 tasks      | elapsed:    8.1s


[CV]  max_features=log2, min_samples_split=2, n_estimators=200, max_depth=5, min_samples_leaf=1, score=-3.537262, total=   2.2s
[CV] max_features=log2, min_samples_split=5, n_estimators=200, max_depth=5, min_samples_leaf=1 
[CV]  max_features=log2, min_samples_split=2, n_estimators=200, max_depth=5, min_samples_leaf=1, score=-3.572588, total=   2.3s
[CV] max_features=log2, min_samples_split=5, n_estimators=200, max_depth=5, min_samples_leaf=1 
[CV]  max_features=log2, min_samples_split=5, n_estimators=100, max_depth=5, min_samples_leaf=1, score=-3.661006, total=   1.1s
[CV] max_features=log2, min_samples_split=5, n_estimators=200, max_depth=5, min_samples_leaf=1 
[CV]  max_features=log2, min_samples_split=5, n_estimators=100, max_depth=5, min_samples_leaf=1, score=-3.533245, total=   1.1s
[CV] max_features=log2, min_samples_split=5, n_estimators=200, max_depth=5, min_samples_leaf=1 
[CV]  max_features=log2, min_samples_split=5, n_estimators=100, max_depth=5, min_samples_leaf=1, score=-

[Parallel(n_jobs=-1)]: Done  75 tasks      | elapsed:   11.1s


[CV] max_features=log2, min_samples_split=10, n_estimators=10, max_depth=5, min_samples_leaf=1 
[CV] max_features=log2, min_samples_split=10, n_estimators=10, max_depth=5, min_samples_leaf=1 
[CV]  max_features=log2, min_samples_split=5, n_estimators=200, max_depth=5, min_samples_leaf=1, score=-3.623218, total=   2.2s
[CV]  max_features=log2, min_samples_split=10, n_estimators=10, max_depth=5, min_samples_leaf=1, score=-3.697822, total=   0.1s
[CV] max_features=log2, min_samples_split=10, n_estimators=10, max_depth=5, min_samples_leaf=1 
[CV]  max_features=log2, min_samples_split=10, n_estimators=10, max_depth=5, min_samples_leaf=1, score=-3.831341, total=   0.1s
[CV] max_features=log2, min_samples_split=10, n_estimators=50, max_depth=5, min_samples_leaf=1 
[CV] max_features=log2, min_samples_split=10, n_estimators=50, max_depth=5, min_samples_leaf=1 
[CV]  max_features=log2, min_samples_split=10, n_estimators=10, max_depth=5, min_samples_leaf=1, score=-3.606746, total=   0.1s
[CV]  ma

[Parallel(n_jobs=-1)]: Done  88 tasks      | elapsed:   11.5s


[CV]  max_features=log2, min_samples_split=10, n_estimators=50, max_depth=5, min_samples_leaf=1, score=-3.540042, total=   0.6s
[CV]  max_features=log2, min_samples_split=10, n_estimators=50, max_depth=5, min_samples_leaf=1, score=-4.110099, total=   0.6s
[CV] max_features=log2, min_samples_split=10, n_estimators=50, max_depth=5, min_samples_leaf=1 
[CV] max_features=log2, min_samples_split=10, n_estimators=50, max_depth=5, min_samples_leaf=1 
[CV]  max_features=log2, min_samples_split=10, n_estimators=50, max_depth=5, min_samples_leaf=1, score=-3.840534, total=   0.6s
[CV] max_features=log2, min_samples_split=10, n_estimators=50, max_depth=5, min_samples_leaf=1 
[CV]  max_features=log2, min_samples_split=10, n_estimators=50, max_depth=5, min_samples_leaf=1, score=-4.030875, total=   0.6s
[CV] max_features=log2, min_samples_split=10, n_estimators=50, max_depth=5, min_samples_leaf=1 
[CV]  max_features=log2, min_samples_split=10, n_estimators=50, max_depth=5, min_samples_leaf=1, score=-

[Parallel(n_jobs=-1)]: Done 101 tasks      | elapsed:   13.5s


[CV]  max_features=log2, min_samples_split=10, n_estimators=100, max_depth=5, min_samples_leaf=1, score=-4.009534, total=   1.1s
[CV] max_features=log2, min_samples_split=10, n_estimators=200, max_depth=5, min_samples_leaf=1 
[CV]  max_features=log2, min_samples_split=10, n_estimators=100, max_depth=5, min_samples_leaf=1, score=-3.727690, total=   1.1s
[CV] max_features=log2, min_samples_split=10, n_estimators=200, max_depth=5, min_samples_leaf=1 
[CV]  max_features=log2, min_samples_split=10, n_estimators=100, max_depth=5, min_samples_leaf=1, score=-3.740387, total=   1.1s
[CV] max_features=log2, min_samples_split=10, n_estimators=200, max_depth=5, min_samples_leaf=1 
[CV]  max_features=log2, min_samples_split=10, n_estimators=100, max_depth=5, min_samples_leaf=1, score=-3.796335, total=   1.2s
[CV] max_features=log2, min_samples_split=10, n_estimators=200, max_depth=5, min_samples_leaf=1 
[CV]  max_features=log2, min_samples_split=10, n_estimators=100, max_depth=5, min_samples_leaf=1

[Parallel(n_jobs=-1)]: Done 116 tasks      | elapsed:   16.7s


[CV]  max_features=log2, min_samples_split=15, n_estimators=10, max_depth=5, min_samples_leaf=1, score=-3.817135, total=   0.1s
[CV] max_features=log2, min_samples_split=15, n_estimators=10, max_depth=5, min_samples_leaf=1 
[CV]  max_features=log2, min_samples_split=10, n_estimators=200, max_depth=5, min_samples_leaf=1, score=-3.722830, total=   2.1s
[CV] max_features=log2, min_samples_split=15, n_estimators=10, max_depth=5, min_samples_leaf=1 
[CV]  max_features=log2, min_samples_split=15, n_estimators=10, max_depth=5, min_samples_leaf=1, score=-3.774803, total=   0.1s
[CV] max_features=log2, min_samples_split=15, n_estimators=10, max_depth=5, min_samples_leaf=1 
[CV]  max_features=log2, min_samples_split=10, n_estimators=200, max_depth=5, min_samples_leaf=1, score=-3.717562, total=   2.1s
[CV] max_features=log2, min_samples_split=15, n_estimators=10, max_depth=5, min_samples_leaf=1 
[CV]  max_features=log2, min_samples_split=15, n_estimators=10, max_depth=5, min_samples_leaf=1, score

[Parallel(n_jobs=-1)]: Done 131 tasks      | elapsed:   17.8s


[CV]  max_features=log2, min_samples_split=15, n_estimators=50, max_depth=5, min_samples_leaf=1, score=-3.636405, total=   0.6s
[CV] max_features=log2, min_samples_split=15, n_estimators=100, max_depth=5, min_samples_leaf=1 
[CV]  max_features=log2, min_samples_split=15, n_estimators=50, max_depth=5, min_samples_leaf=1, score=-3.536919, total=   0.6s
[CV] max_features=log2, min_samples_split=15, n_estimators=100, max_depth=5, min_samples_leaf=1 
[CV]  max_features=log2, min_samples_split=15, n_estimators=50, max_depth=5, min_samples_leaf=1, score=-3.560183, total=   0.5s
[CV]  max_features=log2, min_samples_split=15, n_estimators=50, max_depth=5, min_samples_leaf=1, score=-3.890187, total=   0.6s
[CV] max_features=log2, min_samples_split=15, n_estimators=100, max_depth=5, min_samples_leaf=1 
[CV] max_features=log2, min_samples_split=15, n_estimators=100, max_depth=5, min_samples_leaf=1 
[CV]  max_features=log2, min_samples_split=10, n_estimators=200, max_depth=5, min_samples_leaf=1, sc

[Parallel(n_jobs=-1)]: Done 148 tasks      | elapsed:   20.4s


[CV]  max_features=log2, min_samples_split=15, n_estimators=200, max_depth=5, min_samples_leaf=1, score=-3.555994, total=   2.2s
[CV] max_features=log2, min_samples_split=15, n_estimators=200, max_depth=5, min_samples_leaf=1 
[CV]  max_features=log2, min_samples_split=15, n_estimators=200, max_depth=5, min_samples_leaf=1, score=-4.070397, total=   2.2s
[CV] max_features=log2, min_samples_split=15, n_estimators=200, max_depth=5, min_samples_leaf=1 
[CV]  max_features=log2, min_samples_split=15, n_estimators=200, max_depth=5, min_samples_leaf=1, score=-3.808607, total=   2.2s
[CV]  max_features=log2, min_samples_split=15, n_estimators=200, max_depth=5, min_samples_leaf=1, score=-4.015764, total=   2.2s
[CV] max_features=log2, min_samples_split=50, n_estimators=10, max_depth=5, min_samples_leaf=1 
[CV] max_features=log2, min_samples_split=50, n_estimators=10, max_depth=5, min_samples_leaf=1 
[CV]  max_features=log2, min_samples_split=50, n_estimators=10, max_depth=5, min_samples_leaf=1, s

[Parallel(n_jobs=-1)]: Done 165 tasks      | elapsed:   22.9s


[CV]  max_features=log2, min_samples_split=50, n_estimators=50, max_depth=5, min_samples_leaf=1, score=-3.604506, total=   0.6s
[CV] max_features=log2, min_samples_split=50, n_estimators=50, max_depth=5, min_samples_leaf=1 
[CV]  max_features=log2, min_samples_split=50, n_estimators=50, max_depth=5, min_samples_leaf=1, score=-4.096856, total=   0.6s
[CV] max_features=log2, min_samples_split=50, n_estimators=50, max_depth=5, min_samples_leaf=1 
[CV]  max_features=log2, min_samples_split=50, n_estimators=50, max_depth=5, min_samples_leaf=1, score=-3.869317, total=   0.6s
[CV]  max_features=log2, min_samples_split=50, n_estimators=50, max_depth=5, min_samples_leaf=1, score=-3.754715, total=   0.5s
[CV]  max_features=log2, min_samples_split=50, n_estimators=50, max_depth=5, min_samples_leaf=1, score=-4.031218, total=   0.6s
[CV] max_features=log2, min_samples_split=50, n_estimators=50, max_depth=5, min_samples_leaf=1 
[CV] max_features=log2, min_samples_split=50, n_estimators=50, max_depth

[Parallel(n_jobs=-1)]: Done 184 tasks      | elapsed:   25.4s


[CV]  max_features=log2, min_samples_split=50, n_estimators=100, max_depth=5, min_samples_leaf=1, score=-3.666971, total=   1.1s
[CV] max_features=log2, min_samples_split=50, n_estimators=200, max_depth=5, min_samples_leaf=1 
[CV]  max_features=log2, min_samples_split=50, n_estimators=100, max_depth=5, min_samples_leaf=1, score=-3.528551, total=   1.1s
[CV] max_features=log2, min_samples_split=50, n_estimators=200, max_depth=5, min_samples_leaf=1 
[CV]  max_features=log2, min_samples_split=50, n_estimators=100, max_depth=5, min_samples_leaf=1, score=-3.821309, total=   1.1s
[CV]  max_features=log2, min_samples_split=50, n_estimators=100, max_depth=5, min_samples_leaf=1, score=-3.624310, total=   1.1s
[CV] max_features=log2, min_samples_split=50, n_estimators=200, max_depth=5, min_samples_leaf=1 
[CV] max_features=log2, min_samples_split=50, n_estimators=200, max_depth=5, min_samples_leaf=1 
[CV]  max_features=log2, min_samples_split=50, n_estimators=200, max_depth=5, min_samples_leaf=1

[Parallel(n_jobs=-1)]: Done 203 tasks      | elapsed:   28.4s


[CV]  max_features=log2, min_samples_split=2, n_estimators=50, max_depth=5, min_samples_leaf=2, score=-3.544754, total=   0.6s
[CV] max_features=log2, min_samples_split=2, n_estimators=50, max_depth=5, min_samples_leaf=2 
[CV]  max_features=log2, min_samples_split=2, n_estimators=50, max_depth=5, min_samples_leaf=2, score=-4.020557, total=   0.6s
[CV]  max_features=log2, min_samples_split=2, n_estimators=50, max_depth=5, min_samples_leaf=2, score=-3.836028, total=   0.6s
[CV] max_features=log2, min_samples_split=2, n_estimators=50, max_depth=5, min_samples_leaf=2 
[CV] max_features=log2, min_samples_split=2, n_estimators=50, max_depth=5, min_samples_leaf=2 
[CV]  max_features=log2, min_samples_split=2, n_estimators=50, max_depth=5, min_samples_leaf=2, score=-3.734712, total=   0.6s
[CV]  max_features=log2, min_samples_split=2, n_estimators=50, max_depth=5, min_samples_leaf=2, score=-3.767402, total=   0.6s
[CV] max_features=log2, min_samples_split=2, n_estimators=50, max_depth=5, min_s

[Parallel(n_jobs=-1)]: Done 224 tasks      | elapsed:   31.2s


[CV]  max_features=log2, min_samples_split=2, n_estimators=100, max_depth=5, min_samples_leaf=2, score=-3.608891, total=   1.1s
[CV] max_features=log2, min_samples_split=2, n_estimators=200, max_depth=5, min_samples_leaf=2 
[CV]  max_features=log2, min_samples_split=2, n_estimators=100, max_depth=5, min_samples_leaf=2, score=-3.543620, total=   1.1s
[CV] max_features=log2, min_samples_split=2, n_estimators=200, max_depth=5, min_samples_leaf=2 
[CV]  max_features=log2, min_samples_split=2, n_estimators=100, max_depth=5, min_samples_leaf=2, score=-3.848963, total=   1.1s
[CV] max_features=log2, min_samples_split=2, n_estimators=200, max_depth=5, min_samples_leaf=2 
[CV]  max_features=log2, min_samples_split=2, n_estimators=100, max_depth=5, min_samples_leaf=2, score=-3.609144, total=   1.1s
[CV] max_features=log2, min_samples_split=2, n_estimators=200, max_depth=5, min_samples_leaf=2 
[CV]  max_features=log2, min_samples_split=2, n_estimators=200, max_depth=5, min_samples_leaf=2, score=-

[Parallel(n_jobs=-1)]: Done 245 tasks      | elapsed:   34.3s


[CV]  max_features=log2, min_samples_split=5, n_estimators=50, max_depth=5, min_samples_leaf=2, score=-3.598869, total=   0.6s
[CV]  max_features=log2, min_samples_split=5, n_estimators=50, max_depth=5, min_samples_leaf=2, score=-4.013486, total=   0.6s
[CV] max_features=log2, min_samples_split=5, n_estimators=50, max_depth=5, min_samples_leaf=2 
[CV]  max_features=log2, min_samples_split=5, n_estimators=50, max_depth=5, min_samples_leaf=2, score=-4.067755, total=   0.6s
[CV]  max_features=log2, min_samples_split=5, n_estimators=50, max_depth=5, min_samples_leaf=2, score=-3.798665, total=   0.6s
[CV]  max_features=log2, min_samples_split=5, n_estimators=50, max_depth=5, min_samples_leaf=2, score=-3.741686, total=   0.6s
[CV] max_features=log2, min_samples_split=5, n_estimators=50, max_depth=5, min_samples_leaf=2 
[CV] max_features=log2, min_samples_split=5, n_estimators=50, max_depth=5, min_samples_leaf=2 
[CV] max_features=log2, min_samples_split=5, n_estimators=50, max_depth=5, min_s

[Parallel(n_jobs=-1)]: Done 268 tasks      | elapsed:   37.5s


[CV]  max_features=log2, min_samples_split=5, n_estimators=200, max_depth=5, min_samples_leaf=2, score=-3.564962, total=   2.2s
[CV] max_features=log2, min_samples_split=5, n_estimators=200, max_depth=5, min_samples_leaf=2 
[CV]  max_features=log2, min_samples_split=5, n_estimators=200, max_depth=5, min_samples_leaf=2, score=-3.801777, total=   2.2s
[CV] max_features=log2, min_samples_split=5, n_estimators=200, max_depth=5, min_samples_leaf=2 
[CV]  max_features=log2, min_samples_split=5, n_estimators=200, max_depth=5, min_samples_leaf=2, score=-4.076536, total=   2.2s
[CV] max_features=log2, min_samples_split=10, n_estimators=10, max_depth=5, min_samples_leaf=2 
[CV]  max_features=log2, min_samples_split=5, n_estimators=200, max_depth=5, min_samples_leaf=2, score=-4.025626, total=   2.2s
[CV] max_features=log2, min_samples_split=10, n_estimators=10, max_depth=5, min_samples_leaf=2 
[CV]  max_features=log2, min_samples_split=10, n_estimators=10, max_depth=5, min_samples_leaf=2, score=-

[Parallel(n_jobs=-1)]: Done 291 tasks      | elapsed:   40.7s


[CV]  max_features=log2, min_samples_split=10, n_estimators=50, max_depth=5, min_samples_leaf=2, score=-3.593996, total=   0.6s
[CV] max_features=log2, min_samples_split=10, n_estimators=100, max_depth=5, min_samples_leaf=2 
[CV]  max_features=log2, min_samples_split=10, n_estimators=50, max_depth=5, min_samples_leaf=2, score=-3.548374, total=   0.6s
[CV] max_features=log2, min_samples_split=10, n_estimators=100, max_depth=5, min_samples_leaf=2 
[CV]  max_features=log2, min_samples_split=10, n_estimators=50, max_depth=5, min_samples_leaf=2, score=-3.858654, total=   0.6s
[CV] max_features=log2, min_samples_split=10, n_estimators=100, max_depth=5, min_samples_leaf=2 
[CV]  max_features=log2, min_samples_split=10, n_estimators=50, max_depth=5, min_samples_leaf=2, score=-3.615703, total=   0.6s
[CV] max_features=log2, min_samples_split=10, n_estimators=100, max_depth=5, min_samples_leaf=2 
[CV]  max_features=log2, min_samples_split=5, n_estimators=200, max_depth=5, min_samples_leaf=2, sco

[Parallel(n_jobs=-1)]: Done 316 tasks      | elapsed:   45.3s


[CV]  max_features=log2, min_samples_split=15, n_estimators=10, max_depth=5, min_samples_leaf=2, score=-3.694554, total=   0.1s
[CV] max_features=log2, min_samples_split=15, n_estimators=10, max_depth=5, min_samples_leaf=2 
[CV]  max_features=log2, min_samples_split=15, n_estimators=10, max_depth=5, min_samples_leaf=2, score=-3.778588, total=   0.1s
[CV] max_features=log2, min_samples_split=15, n_estimators=10, max_depth=5, min_samples_leaf=2 
[CV]  max_features=log2, min_samples_split=15, n_estimators=10, max_depth=5, min_samples_leaf=2, score=-3.673557, total=   0.1s
[CV] max_features=log2, min_samples_split=15, n_estimators=10, max_depth=5, min_samples_leaf=2 
[CV]  max_features=log2, min_samples_split=10, n_estimators=200, max_depth=5, min_samples_leaf=2, score=-3.715728, total=   2.2s
[CV] max_features=log2, min_samples_split=15, n_estimators=10, max_depth=5, min_samples_leaf=2 
[CV]  max_features=log2, min_samples_split=15, n_estimators=10, max_depth=5, min_samples_leaf=2, score=

[Parallel(n_jobs=-1)]: Done 341 tasks      | elapsed:   47.8s


[CV]  max_features=log2, min_samples_split=15, n_estimators=100, max_depth=5, min_samples_leaf=2, score=-3.805554, total=   1.2s
[CV] max_features=log2, min_samples_split=15, n_estimators=200, max_depth=5, min_samples_leaf=2 
[CV]  max_features=log2, min_samples_split=15, n_estimators=100, max_depth=5, min_samples_leaf=2, score=-3.992058, total=   1.2s
[CV] max_features=log2, min_samples_split=15, n_estimators=200, max_depth=5, min_samples_leaf=2 
[CV]  max_features=log2, min_samples_split=15, n_estimators=100, max_depth=5, min_samples_leaf=2, score=-3.747865, total=   1.2s
[CV] max_features=log2, min_samples_split=15, n_estimators=200, max_depth=5, min_samples_leaf=2 
[CV]  max_features=log2, min_samples_split=15, n_estimators=100, max_depth=5, min_samples_leaf=2, score=-3.742934, total=   1.3s
[CV] max_features=log2, min_samples_split=15, n_estimators=200, max_depth=5, min_samples_leaf=2 
[CV]  max_features=log2, min_samples_split=15, n_estimators=100, max_depth=5, min_samples_leaf=2

[Parallel(n_jobs=-1)]: Done 368 tasks      | elapsed:   51.6s


[CV]  max_features=log2, min_samples_split=50, n_estimators=50, max_depth=5, min_samples_leaf=2, score=-3.594119, total=   0.6s
[CV] max_features=log2, min_samples_split=50, n_estimators=50, max_depth=5, min_samples_leaf=2 
[CV]  max_features=log2, min_samples_split=50, n_estimators=50, max_depth=5, min_samples_leaf=2, score=-4.097491, total=   0.6s
[CV]  max_features=log2, min_samples_split=50, n_estimators=50, max_depth=5, min_samples_leaf=2, score=-3.902294, total=   0.6s
[CV] max_features=log2, min_samples_split=50, n_estimators=50, max_depth=5, min_samples_leaf=2 
[CV] max_features=log2, min_samples_split=50, n_estimators=50, max_depth=5, min_samples_leaf=2 
[CV]  max_features=log2, min_samples_split=50, n_estimators=50, max_depth=5, min_samples_leaf=2, score=-3.733438, total=   0.6s
[CV]  max_features=log2, min_samples_split=50, n_estimators=50, max_depth=5, min_samples_leaf=2, score=-3.998642, total=   0.6s
[CV] max_features=log2, min_samples_split=50, n_estimators=50, max_depth

[Parallel(n_jobs=-1)]: Done 395 tasks      | elapsed:   56.7s


[CV]  max_features=log2, min_samples_split=2, n_estimators=10, max_depth=5, min_samples_leaf=5, score=-3.734743, total=   0.1s
[CV] max_features=log2, min_samples_split=2, n_estimators=10, max_depth=5, min_samples_leaf=5 
[CV]  max_features=log2, min_samples_split=2, n_estimators=10, max_depth=5, min_samples_leaf=5, score=-3.787798, total=   0.1s
[CV] max_features=log2, min_samples_split=2, n_estimators=10, max_depth=5, min_samples_leaf=5 
[CV]  max_features=log2, min_samples_split=50, n_estimators=200, max_depth=5, min_samples_leaf=2, score=-3.744601, total=   2.1s
[CV] max_features=log2, min_samples_split=2, n_estimators=10, max_depth=5, min_samples_leaf=5 
[CV]  max_features=log2, min_samples_split=2, n_estimators=10, max_depth=5, min_samples_leaf=5, score=-3.746237, total=   0.1s
[CV]  max_features=log2, min_samples_split=50, n_estimators=200, max_depth=5, min_samples_leaf=2, score=-3.738733, total=   2.2s
[CV] max_features=log2, min_samples_split=2, n_estimators=10, max_depth=5, m

[Parallel(n_jobs=-1)]: Done 424 tasks      | elapsed:   59.8s


[CV]  max_features=log2, min_samples_split=2, n_estimators=100, max_depth=5, min_samples_leaf=5, score=-3.624365, total=   1.2s
[CV] max_features=log2, min_samples_split=2, n_estimators=200, max_depth=5, min_samples_leaf=5 
[CV]  max_features=log2, min_samples_split=2, n_estimators=100, max_depth=5, min_samples_leaf=5, score=-3.540669, total=   1.1s
[CV] max_features=log2, min_samples_split=2, n_estimators=200, max_depth=5, min_samples_leaf=5 
[CV]  max_features=log2, min_samples_split=2, n_estimators=100, max_depth=5, min_samples_leaf=5, score=-3.857733, total=   1.1s
[CV] max_features=log2, min_samples_split=2, n_estimators=200, max_depth=5, min_samples_leaf=5 
[CV]  max_features=log2, min_samples_split=2, n_estimators=100, max_depth=5, min_samples_leaf=5, score=-3.595089, total=   1.1s
[CV] max_features=log2, min_samples_split=2, n_estimators=200, max_depth=5, min_samples_leaf=5 
[CV]  max_features=log2, min_samples_split=2, n_estimators=200, max_depth=5, min_samples_leaf=5, score=-

[Parallel(n_jobs=-1)]: Done 453 tasks      | elapsed:  1.1min


[CV]  max_features=log2, min_samples_split=5, n_estimators=50, max_depth=5, min_samples_leaf=5, score=-3.678944, total=   0.6s
[CV] max_features=log2, min_samples_split=5, n_estimators=100, max_depth=5, min_samples_leaf=5 
[CV]  max_features=log2, min_samples_split=5, n_estimators=50, max_depth=5, min_samples_leaf=5, score=-3.546597, total=   0.6s
[CV]  max_features=log2, min_samples_split=5, n_estimators=50, max_depth=5, min_samples_leaf=5, score=-3.849672, total=   0.6s
[CV] max_features=log2, min_samples_split=5, n_estimators=100, max_depth=5, min_samples_leaf=5 
[CV] max_features=log2, min_samples_split=5, n_estimators=100, max_depth=5, min_samples_leaf=5 
[CV]  max_features=log2, min_samples_split=5, n_estimators=50, max_depth=5, min_samples_leaf=5, score=-3.583743, total=   0.6s
[CV] max_features=log2, min_samples_split=5, n_estimators=100, max_depth=5, min_samples_leaf=5 
[CV]  max_features=log2, min_samples_split=2, n_estimators=200, max_depth=5, min_samples_leaf=5, score=-3.56

In [41]:
print model_cross_validation(best_forest, X, y, mae_scorer_cv, 10)

3.83106378686


In [18]:
# tree = DecisionTreeRegressor()

# param_tree = {
#     'estimator__n_estimators': [1, 5, 10, 50, 100, 300, 500, 1000],
#     'estimator__max_depth': [5, 8, 15, 25, 30, None],
#     'estimator__min_samples_split': [2, 5, 10, 15, 100],
#     'estimator__min_samples_leaf': [1, 2, 5, 10],
#     'estimator__max_features': ['log2', 'sqrt', None, 10]
# }

# best_knn = get_best_estimator(tree, 1, 10, mae_scorer_gs, param_random_forest)

In [19]:
# print model_cross_validation(best_knn.estimator_, X, y, mae_scorer_cv, 10)

In [None]:
svr = SVR()

svr_parameters = {
    'C': [0.001, 0.01, 0.1, 1.0, 10.0, 100.0, 1000.0],
    'kernel': ['rbf', 'poly'],
    'epsilon': [0.0, 0.1, 0.2, 0.5, 1.0]
}


best_svr = get_best_estimator(svr, 1, 10, mae_scorer_gs, svr_parameters)



In [18]:
print model_cross_validation(svr, X, y, mae_scorer_cv, 10)

5.80977421933


In [21]:
bagging = BaggingRegressor(base_estimator=best_ridge, n_estimators=50, random_state=42)

In [22]:
bagging.fit(X_train, y_train)

BaggingRegressor(base_estimator=Ridge(alpha=10, copy_X=True, fit_intercept=True, max_iter=None,
   normalize=False, random_state=None, solver='auto', tol=0.001),
         bootstrap=True, bootstrap_features=False, max_features=1.0,
         max_samples=1.0, n_estimators=50, n_jobs=1, oob_score=False,
         random_state=42, verbose=0, warm_start=False)

In [23]:
print model_cross_validation(bagging, X, y, mae_scorer_cv, 10)

3.60498900667
