In [1]:
import numpy as np
import pandas as pd
import warnings
warnings.filterwarnings('ignore')

In [2]:
import sklearn

In [3]:
data = pd.read_csv('a.csv')

In [4]:
data.head()

Unnamed: 0,state,Locationdesc,country_region,last_update,lat,long,confirmed,deaths,recovered,active,...,percent_american_indian_alaska_native,percent_asian,percent_native_hawaiian_other_pacific_islander,percent_hispanic,percent_non_hispanic_white,percent_not_proficient_in_english,percent_female,percent_rural,hospital_bed_occupancy_rate,icu_bed_occupancy_rate
0,KY,Kentucky,US,4/13/20 13:14,37.6681,-84.6701,2018,113,607.0,1905,...,0.300152,1.629352,0.090815,3.847751,84.288141,1.024374,50.731581,41.619527,0.6,0.65
1,OH,Ohio,US,4/13/20 13:14,40.3888,-82.7649,6604,253,,6351,...,0.287148,2.457106,0.059832,3.942678,78.669127,1.038759,50.975265,22.076099,0.59,0.64
2,WI,Wisconsin,US,4/13/20 13:14,44.2685,-89.6165,3341,144,,3197,...,1.162298,3.029929,0.055783,6.948487,81.067152,1.418583,50.249606,29.846179,0.58,0.55
3,OR,Oregon,US,4/13/20 13:14,44.572,-122.0709,1527,52,,1475,...,1.814083,4.829584,0.45336,13.276882,75.292104,2.985884,50.42624,18.968362,0.63,0.6
4,MO,Missouri,US,4/13/20 13:14,38.4561,-92.2884,4272,118,,4154,...,0.571603,2.139019,0.156453,4.297185,79.300417,0.972306,50.908568,29.563827,0.6,0.58


In [5]:
cor = data.corr()
cor = abs(cor['mortality_rate'])
print(cor[cor > 0.3])

deaths                                  0.312537
recovered                               0.311262
incident_rate                           0.316695
mortality_rate                          1.000000
hospitalization_rate                    0.497373
CLASS04_TOPIC08__FOBTFS_RESP209_BO1     0.449853
CLASS05_TOPIC02_AGE_RESP013_BO1         0.390366
CLASS05_TOPIC61_VETERAN3_RESP046_BO1    0.369961
CLASS05_TOPIC61_VETERAN3_RESP054_BO1    0.369961
suicide_rate_age_adjusted               0.325317
crude_rate                              0.302356
Name: mortality_rate, dtype: float64


In [6]:
data.drop([33, 47], inplace=True) # Get rid of Guam/Puerto Rico
y = data['mortality_rate'] # Labels
states = data['state'] # If we want to look a state up later
data.drop(columns=['mortality_rate', 'Locationdesc', 'country_region', 'last_update', 'lat', 'long', 'confirmed', 'deaths',
                  'recovered', 'active', 'people_tested', 'people_hospitalized', 'testing_rate', 'incident_rate', 'hospitalization_rate',
                  'state'], inplace=True)
data.fillna(data.mean(), inplace=True)


In [7]:
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler

scaled = StandardScaler().fit_transform(data)
X = pd.DataFrame(scaled, columns=data.columns)

In [8]:
from sklearn.linear_model import LassoCV
from sklearn.linear_model import Ridge
from sklearn.metrics import mean_squared_error
from sklearn.neighbors import KNeighborsRegressor
from sklearn.model_selection import cross_val_score
from sklearn.model_selection import GridSearchCV
from sklearn.ensemble import GradientBoostingRegressor
from sklearn.tree import DecisionTreeRegressor
from sklearn.ensemble import RandomForestRegressor
from sklearn.neural_network import MLPRegressor
from sklearn.svm import LinearSVR
from sklearn.metrics import make_scorer

def evaluate_model(model, param_dict, passes=10):
    min_test_err = 1e10
    best_hyperparams = {}
    corr_full_err = None
    scorer = make_scorer(mean_squared_error, greater_is_better=False)
    for i in range(passes):
        print('Pass {}/10 for model {}'.format(i + 1, model))
        X_train, X_test, y_train, y_test = train_test_split(data, y, test_size=0.2)
        
        default_model = model()
        model_gs = GridSearchCV(default_model, param_dict, cv=3, n_jobs=16, verbose=1, scoring=scorer)
        model_gs.fit(X_train, y_train)
        optimal_model = model(**model_gs.best_params_)
        optimal_model.fit(X_train, y_train)
        y_pred = optimal_model.predict(X_test)
        err = mean_squared_error(y_test, y_pred)
        
        full_y_pred=optimal_model.predict(data)
        full_err = mean_squared_error(full_y_pred, y)
        #print('MSE for {}: {}'.format(model, err))
        if err < min_test_err:
            min_test_err = err
            best_hyperparams = model_gs.best_params_
            corr_full_err = full_err
    print('Model {} with hyperparams {} yielded test error {} - overall error {}'.format(model, best_hyperparams, min_test_err, corr_full_err))
   

In [93]:
     
    
evaluate_model(LassoCV, {'eps': [0.001, 0.002, 0.003], 
                                'n_alphas':[200, 400, 600],
                                'tol': [0.001, 0.005, 0.01],
                                'max_iter': [4000, 7000]})

#evaluate_model(Ridge, {'alpha' : [(0.1, 0.3, 0.7, 1.0, 2.0, 5.0)]})

evaluate_model(KNeighborsRegressor, {'n_neighbors' : np.arange(1, 10)})



Pass 1/10 for model <class 'sklearn.linear_model.coordinate_descent.LassoCV'>
Fitting 3 folds for each of 54 candidates, totalling 162 fits


[Parallel(n_jobs=16)]: Done  18 tasks      | elapsed:    9.7s
[Parallel(n_jobs=16)]: Done 162 out of 162 | elapsed:   58.2s finished


Pass 2/10 for model <class 'sklearn.linear_model.coordinate_descent.LassoCV'>
Fitting 3 folds for each of 54 candidates, totalling 162 fits


[Parallel(n_jobs=16)]: Done  18 tasks      | elapsed:    3.3s
[Parallel(n_jobs=16)]: Done 162 out of 162 | elapsed:   16.5s finished


Pass 3/10 for model <class 'sklearn.linear_model.coordinate_descent.LassoCV'>
Fitting 3 folds for each of 54 candidates, totalling 162 fits


[Parallel(n_jobs=16)]: Done  18 tasks      | elapsed:    5.7s
[Parallel(n_jobs=16)]: Done 162 out of 162 | elapsed:   30.3s finished


Pass 4/10 for model <class 'sklearn.linear_model.coordinate_descent.LassoCV'>
Fitting 3 folds for each of 54 candidates, totalling 162 fits


[Parallel(n_jobs=16)]: Done  18 tasks      | elapsed:    4.4s
[Parallel(n_jobs=16)]: Done 162 out of 162 | elapsed:   26.7s finished


Pass 5/10 for model <class 'sklearn.linear_model.coordinate_descent.LassoCV'>
Fitting 3 folds for each of 54 candidates, totalling 162 fits


[Parallel(n_jobs=16)]: Done  18 tasks      | elapsed:    9.2s
[Parallel(n_jobs=16)]: Done 162 out of 162 | elapsed:   52.7s finished


Pass 6/10 for model <class 'sklearn.linear_model.coordinate_descent.LassoCV'>
Fitting 3 folds for each of 54 candidates, totalling 162 fits


[Parallel(n_jobs=16)]: Done  18 tasks      | elapsed:    2.5s
[Parallel(n_jobs=16)]: Done 162 out of 162 | elapsed:   13.5s finished


Pass 7/10 for model <class 'sklearn.linear_model.coordinate_descent.LassoCV'>
Fitting 3 folds for each of 54 candidates, totalling 162 fits


[Parallel(n_jobs=16)]: Done  18 tasks      | elapsed:    9.2s
[Parallel(n_jobs=16)]: Done 162 out of 162 | elapsed:   42.2s finished


Pass 8/10 for model <class 'sklearn.linear_model.coordinate_descent.LassoCV'>
Fitting 3 folds for each of 54 candidates, totalling 162 fits


[Parallel(n_jobs=16)]: Done  18 tasks      | elapsed:    4.1s
[Parallel(n_jobs=16)]: Done 162 out of 162 | elapsed:   25.2s finished


Pass 9/10 for model <class 'sklearn.linear_model.coordinate_descent.LassoCV'>
Fitting 3 folds for each of 54 candidates, totalling 162 fits


[Parallel(n_jobs=16)]: Done  18 tasks      | elapsed:    5.3s
[Parallel(n_jobs=16)]: Done 162 out of 162 | elapsed:   31.7s finished


Pass 10/10 for model <class 'sklearn.linear_model.coordinate_descent.LassoCV'>
Fitting 3 folds for each of 54 candidates, totalling 162 fits


[Parallel(n_jobs=16)]: Done  18 tasks      | elapsed:    4.9s
[Parallel(n_jobs=16)]: Done 162 out of 162 | elapsed:   30.6s finished


Model <class 'sklearn.linear_model.coordinate_descent.LassoCV'> with hyperparams {'eps': 0.003, 'max_iter': 4000, 'n_alphas': 600, 'tol': 0.001} yielded test error 0.656226228958609 - overall error 1.5179652135494348
Pass 1/10 for model <class 'sklearn.neighbors.regression.KNeighborsRegressor'>
Fitting 3 folds for each of 9 candidates, totalling 27 fits
Pass 2/10 for model <class 'sklearn.neighbors.regression.KNeighborsRegressor'>
Fitting 3 folds for each of 9 candidates, totalling 27 fits


[Parallel(n_jobs=16)]: Done  24 out of  27 | elapsed:    0.1s remaining:    0.0s
[Parallel(n_jobs=16)]: Done  27 out of  27 | elapsed:    0.1s finished


Pass 3/10 for model <class 'sklearn.neighbors.regression.KNeighborsRegressor'>
Fitting 3 folds for each of 9 candidates, totalling 27 fits


[Parallel(n_jobs=16)]: Done  24 out of  27 | elapsed:    0.1s remaining:    0.0s
[Parallel(n_jobs=16)]: Done  27 out of  27 | elapsed:    0.1s finished


Pass 4/10 for model <class 'sklearn.neighbors.regression.KNeighborsRegressor'>
Fitting 3 folds for each of 9 candidates, totalling 27 fits


[Parallel(n_jobs=16)]: Done  24 out of  27 | elapsed:    0.1s remaining:    0.0s
[Parallel(n_jobs=16)]: Done  27 out of  27 | elapsed:    0.1s finished


Pass 5/10 for model <class 'sklearn.neighbors.regression.KNeighborsRegressor'>
Fitting 3 folds for each of 9 candidates, totalling 27 fits


[Parallel(n_jobs=16)]: Done  24 out of  27 | elapsed:    0.1s remaining:    0.0s
[Parallel(n_jobs=16)]: Done  27 out of  27 | elapsed:    0.1s finished


Pass 6/10 for model <class 'sklearn.neighbors.regression.KNeighborsRegressor'>
Fitting 3 folds for each of 9 candidates, totalling 27 fits


[Parallel(n_jobs=16)]: Done  24 out of  27 | elapsed:    0.1s remaining:    0.0s
[Parallel(n_jobs=16)]: Done  27 out of  27 | elapsed:    0.1s finished


Pass 7/10 for model <class 'sklearn.neighbors.regression.KNeighborsRegressor'>
Fitting 3 folds for each of 9 candidates, totalling 27 fits


[Parallel(n_jobs=16)]: Done  24 out of  27 | elapsed:    0.1s remaining:    0.0s
[Parallel(n_jobs=16)]: Done  27 out of  27 | elapsed:    0.1s finished


Pass 8/10 for model <class 'sklearn.neighbors.regression.KNeighborsRegressor'>
Fitting 3 folds for each of 9 candidates, totalling 27 fits


[Parallel(n_jobs=16)]: Done  24 out of  27 | elapsed:    0.1s remaining:    0.0s
[Parallel(n_jobs=16)]: Done  27 out of  27 | elapsed:    0.1s finished


Pass 9/10 for model <class 'sklearn.neighbors.regression.KNeighborsRegressor'>
Fitting 3 folds for each of 9 candidates, totalling 27 fits


[Parallel(n_jobs=16)]: Done  24 out of  27 | elapsed:    0.1s remaining:    0.0s
[Parallel(n_jobs=16)]: Done  27 out of  27 | elapsed:    0.1s finished


Pass 10/10 for model <class 'sklearn.neighbors.regression.KNeighborsRegressor'>
Fitting 3 folds for each of 9 candidates, totalling 27 fits


[Parallel(n_jobs=16)]: Done  24 out of  27 | elapsed:    0.1s remaining:    0.0s
[Parallel(n_jobs=16)]: Done  27 out of  27 | elapsed:    0.1s finished


Model <class 'sklearn.neighbors.regression.KNeighborsRegressor'> with hyperparams {'n_neighbors': 7} yielded test error 0.8909413879436343 - overall error 1.3352701937221834


[Parallel(n_jobs=16)]: Done  24 out of  27 | elapsed:    0.1s remaining:    0.0s
[Parallel(n_jobs=16)]: Done  27 out of  27 | elapsed:    0.1s finished


In [None]:

evaluate_model(GradientBoostingRegressor, {
                                'learning_rate': [0.1, 0.05, 0.02], 
                                'n_estimators': [100, 200, 400, 800],
                                'max_depth': [1, 2, 3, 4, 5],
                                'max_features' : ['auto', 'sqrt', 'log2']})

evaluate_model(DecisionTreeRegressor, {'splitter': ['best', 'random'], 
                                'criterion': ['mse', 'friedman_mse', 'mae'],
                                'max_depth': [None, 2, 3, 4, 5],
                                'max_features' : ['auto', 'sqrt', 'log2']})

evaluate_model(RandomForestRegressor, {'n_estimators': [100, 200, 400, 800], 
                                'max_depth': [None, 2, 3, 4, 5],
                            'min_samples_split': [2, 3, 4],
                                'max_features' : ['auto', 'sqrt', 'log2']})

evaluate_model(MLPRegressor, {'hidden_layer_sizes': [(100,) * 3, (100,) * 10, (100,) * 30, (100,) * 100]})




Pass 1/10 for model <class 'sklearn.ensemble.gradient_boosting.GradientBoostingRegressor'>
Fitting 3 folds for each of 180 candidates, totalling 540 fits


[Parallel(n_jobs=16)]: Done  18 tasks      | elapsed:    0.2s
[Parallel(n_jobs=16)]: Done 540 out of 540 | elapsed:    6.2s finished


Pass 2/10 for model <class 'sklearn.ensemble.gradient_boosting.GradientBoostingRegressor'>
Fitting 3 folds for each of 180 candidates, totalling 540 fits


[Parallel(n_jobs=16)]: Done  18 tasks      | elapsed:    0.2s


Pass 3/10 for model <class 'sklearn.ensemble.gradient_boosting.GradientBoostingRegressor'>
Fitting 3 folds for each of 180 candidates, totalling 540 fits


[Parallel(n_jobs=16)]: Done 540 out of 540 | elapsed:    6.0s finished
[Parallel(n_jobs=16)]: Done  18 tasks      | elapsed:    0.3s


Pass 4/10 for model <class 'sklearn.ensemble.gradient_boosting.GradientBoostingRegressor'>
Fitting 3 folds for each of 180 candidates, totalling 540 fits


[Parallel(n_jobs=16)]: Done 540 out of 540 | elapsed:    6.5s finished
[Parallel(n_jobs=16)]: Done  18 tasks      | elapsed:    0.2s
[Parallel(n_jobs=16)]: Done 540 out of 540 | elapsed:    5.8s finished


Pass 5/10 for model <class 'sklearn.ensemble.gradient_boosting.GradientBoostingRegressor'>
Fitting 3 folds for each of 180 candidates, totalling 540 fits


[Parallel(n_jobs=16)]: Done  18 tasks      | elapsed:    0.2s


Pass 6/10 for model <class 'sklearn.ensemble.gradient_boosting.GradientBoostingRegressor'>
Fitting 3 folds for each of 180 candidates, totalling 540 fits


[Parallel(n_jobs=16)]: Done 540 out of 540 | elapsed:    6.1s finished
[Parallel(n_jobs=16)]: Done  18 tasks      | elapsed:    0.2s


Pass 7/10 for model <class 'sklearn.ensemble.gradient_boosting.GradientBoostingRegressor'>
Fitting 3 folds for each of 180 candidates, totalling 540 fits


[Parallel(n_jobs=16)]: Done 540 out of 540 | elapsed:    6.5s finished
[Parallel(n_jobs=16)]: Done  18 tasks      | elapsed:    0.2s
[Parallel(n_jobs=16)]: Done 540 out of 540 | elapsed:    6.1s finished


Pass 8/10 for model <class 'sklearn.ensemble.gradient_boosting.GradientBoostingRegressor'>
Fitting 3 folds for each of 180 candidates, totalling 540 fits


[Parallel(n_jobs=16)]: Done  18 tasks      | elapsed:    0.2s


Pass 9/10 for model <class 'sklearn.ensemble.gradient_boosting.GradientBoostingRegressor'>
Fitting 3 folds for each of 180 candidates, totalling 540 fits


[Parallel(n_jobs=16)]: Done 540 out of 540 | elapsed:    6.0s finished
[Parallel(n_jobs=16)]: Done  18 tasks      | elapsed:    0.2s


Pass 10/10 for model <class 'sklearn.ensemble.gradient_boosting.GradientBoostingRegressor'>
Fitting 3 folds for each of 180 candidates, totalling 540 fits


[Parallel(n_jobs=16)]: Done 540 out of 540 | elapsed:    6.0s finished
[Parallel(n_jobs=16)]: Done  18 tasks      | elapsed:    0.2s


Model <class 'sklearn.ensemble.gradient_boosting.GradientBoostingRegressor'> with hyperparams {'learning_rate': 0.02, 'max_depth': 1, 'max_features': 'log2', 'n_estimators': 100} yielded test error 1.0002394671953485 - overall error 0.9273150666345432
Pass 1/10 for model <class 'sklearn.tree.tree.DecisionTreeRegressor'>
Fitting 3 folds for each of 90 candidates, totalling 270 fits


[Parallel(n_jobs=16)]: Done 540 out of 540 | elapsed:    6.0s finished


Pass 2/10 for model <class 'sklearn.tree.tree.DecisionTreeRegressor'>
Fitting 3 folds for each of 90 candidates, totalling 270 fits


[Parallel(n_jobs=16)]: Done 270 out of 270 | elapsed:    0.2s finished


Pass 3/10 for model <class 'sklearn.tree.tree.DecisionTreeRegressor'>
Fitting 3 folds for each of 90 candidates, totalling 270 fits


[Parallel(n_jobs=16)]: Done 270 out of 270 | elapsed:    0.2s finished


Pass 4/10 for model <class 'sklearn.tree.tree.DecisionTreeRegressor'>
Fitting 3 folds for each of 90 candidates, totalling 270 fits


[Parallel(n_jobs=16)]: Done 270 out of 270 | elapsed:    0.2s finished


Pass 5/10 for model <class 'sklearn.tree.tree.DecisionTreeRegressor'>
Fitting 3 folds for each of 90 candidates, totalling 270 fits


[Parallel(n_jobs=16)]: Done 270 out of 270 | elapsed:    0.2s finished


Pass 6/10 for model <class 'sklearn.tree.tree.DecisionTreeRegressor'>
Fitting 3 folds for each of 90 candidates, totalling 270 fits


[Parallel(n_jobs=16)]: Done 270 out of 270 | elapsed:    0.2s finished


Pass 7/10 for model <class 'sklearn.tree.tree.DecisionTreeRegressor'>
Fitting 3 folds for each of 90 candidates, totalling 270 fits


[Parallel(n_jobs=16)]: Done 270 out of 270 | elapsed:    0.2s finished


Pass 8/10 for model <class 'sklearn.tree.tree.DecisionTreeRegressor'>
Fitting 3 folds for each of 90 candidates, totalling 270 fits


[Parallel(n_jobs=16)]: Done 270 out of 270 | elapsed:    0.2s finished


Pass 9/10 for model <class 'sklearn.tree.tree.DecisionTreeRegressor'>
Fitting 3 folds for each of 90 candidates, totalling 270 fits


[Parallel(n_jobs=16)]: Done 270 out of 270 | elapsed:    0.2s finished


Pass 10/10 for model <class 'sklearn.tree.tree.DecisionTreeRegressor'>
Fitting 3 folds for each of 90 candidates, totalling 270 fits


[Parallel(n_jobs=16)]: Done 270 out of 270 | elapsed:    0.2s finished


Model <class 'sklearn.tree.tree.DecisionTreeRegressor'> with hyperparams {'criterion': 'mae', 'max_depth': 2, 'max_features': 'sqrt', 'splitter': 'best'} yielded test error 1.4901349356289784 - overall error 1.0353395284896112
Pass 1/10 for model <class 'sklearn.ensemble.forest.RandomForestRegressor'>
Fitting 3 folds for each of 180 candidates, totalling 540 fits


[Parallel(n_jobs=16)]: Done 270 out of 270 | elapsed:    0.2s finished
[Parallel(n_jobs=16)]: Done  18 tasks      | elapsed:    1.2s
[Parallel(n_jobs=16)]: Done 168 tasks      | elapsed:    8.9s
[Parallel(n_jobs=16)]: Done 418 tasks      | elapsed:   21.3s
[Parallel(n_jobs=16)]: Done 540 out of 540 | elapsed:   27.9s finished


Pass 2/10 for model <class 'sklearn.ensemble.forest.RandomForestRegressor'>
Fitting 3 folds for each of 180 candidates, totalling 540 fits


[Parallel(n_jobs=16)]: Done  18 tasks      | elapsed:    1.3s
[Parallel(n_jobs=16)]: Done 168 tasks      | elapsed:    8.9s
[Parallel(n_jobs=16)]: Done 418 tasks      | elapsed:   21.3s
[Parallel(n_jobs=16)]: Done 540 out of 540 | elapsed:   27.9s finished


Pass 3/10 for model <class 'sklearn.ensemble.forest.RandomForestRegressor'>
Fitting 3 folds for each of 180 candidates, totalling 540 fits


[Parallel(n_jobs=16)]: Done  18 tasks      | elapsed:    1.3s
[Parallel(n_jobs=16)]: Done 168 tasks      | elapsed:    8.9s
[Parallel(n_jobs=16)]: Done 418 tasks      | elapsed:   21.3s
[Parallel(n_jobs=16)]: Done 540 out of 540 | elapsed:   28.0s finished


Pass 4/10 for model <class 'sklearn.ensemble.forest.RandomForestRegressor'>
Fitting 3 folds for each of 180 candidates, totalling 540 fits


[Parallel(n_jobs=16)]: Done  18 tasks      | elapsed:    1.3s
[Parallel(n_jobs=16)]: Done 168 tasks      | elapsed:    8.9s
[Parallel(n_jobs=16)]: Done 418 tasks      | elapsed:   21.3s
[Parallel(n_jobs=16)]: Done 540 out of 540 | elapsed:   27.7s finished


Pass 5/10 for model <class 'sklearn.ensemble.forest.RandomForestRegressor'>
Fitting 3 folds for each of 180 candidates, totalling 540 fits


[Parallel(n_jobs=16)]: Done  18 tasks      | elapsed:    1.3s
[Parallel(n_jobs=16)]: Done 168 tasks      | elapsed:    8.9s
[Parallel(n_jobs=16)]: Done 418 tasks      | elapsed:   21.3s
[Parallel(n_jobs=16)]: Done 540 out of 540 | elapsed:   28.2s finished


Pass 6/10 for model <class 'sklearn.ensemble.forest.RandomForestRegressor'>
Fitting 3 folds for each of 180 candidates, totalling 540 fits


[Parallel(n_jobs=16)]: Done  18 tasks      | elapsed:    1.3s
[Parallel(n_jobs=16)]: Done 168 tasks      | elapsed:    8.9s
[Parallel(n_jobs=16)]: Done 418 tasks      | elapsed:   21.3s
[Parallel(n_jobs=16)]: Done 540 out of 540 | elapsed:   28.0s finished


Pass 7/10 for model <class 'sklearn.ensemble.forest.RandomForestRegressor'>
Fitting 3 folds for each of 180 candidates, totalling 540 fits


[Parallel(n_jobs=16)]: Done  18 tasks      | elapsed:    1.3s
[Parallel(n_jobs=16)]: Done 168 tasks      | elapsed:    8.9s
[Parallel(n_jobs=16)]: Done 418 tasks      | elapsed:   21.3s
[Parallel(n_jobs=16)]: Done 540 out of 540 | elapsed:   28.1s finished


Pass 8/10 for model <class 'sklearn.ensemble.forest.RandomForestRegressor'>
Fitting 3 folds for each of 180 candidates, totalling 540 fits


[Parallel(n_jobs=16)]: Done  18 tasks      | elapsed:    1.3s
[Parallel(n_jobs=16)]: Done 168 tasks      | elapsed:    8.8s
[Parallel(n_jobs=16)]: Done 418 tasks      | elapsed:   21.3s
[Parallel(n_jobs=16)]: Done 540 out of 540 | elapsed:   27.8s finished


Pass 9/10 for model <class 'sklearn.ensemble.forest.RandomForestRegressor'>
Fitting 3 folds for each of 180 candidates, totalling 540 fits


[Parallel(n_jobs=16)]: Done  18 tasks      | elapsed:    1.3s
[Parallel(n_jobs=16)]: Done 168 tasks      | elapsed:    8.9s
[Parallel(n_jobs=16)]: Done 418 tasks      | elapsed:   21.3s
[Parallel(n_jobs=16)]: Done 540 out of 540 | elapsed:   28.0s finished


Pass 10/10 for model <class 'sklearn.ensemble.forest.RandomForestRegressor'>
Fitting 3 folds for each of 180 candidates, totalling 540 fits


[Parallel(n_jobs=16)]: Done  18 tasks      | elapsed:    1.3s
[Parallel(n_jobs=16)]: Done 168 tasks      | elapsed:    8.8s
[Parallel(n_jobs=16)]: Done 418 tasks      | elapsed:   21.3s
[Parallel(n_jobs=16)]: Done 540 out of 540 | elapsed:   27.9s finished


Model <class 'sklearn.ensemble.forest.RandomForestRegressor'> with hyperparams {'max_depth': 2, 'max_features': 'log2', 'min_samples_split': 2, 'n_estimators': 100} yielded test error 0.7155784118963757 - overall error 0.8293184113474716
Pass 1/10 for model <class 'sklearn.neural_network.multilayer_perceptron.MLPRegressor'>
Fitting 3 folds for each of 4 candidates, totalling 12 fits


[Parallel(n_jobs=16)]: Done   7 out of  12 | elapsed:    0.3s remaining:    0.2s
[Parallel(n_jobs=16)]: Done  12 out of  12 | elapsed:    1.8s finished


Pass 2/10 for model <class 'sklearn.neural_network.multilayer_perceptron.MLPRegressor'>
Fitting 3 folds for each of 4 candidates, totalling 12 fits


[Parallel(n_jobs=16)]: Done   7 out of  12 | elapsed:    1.4s remaining:    1.0s
[Parallel(n_jobs=16)]: Done  12 out of  12 | elapsed:    3.8s finished


Pass 3/10 for model <class 'sklearn.neural_network.multilayer_perceptron.MLPRegressor'>
Fitting 3 folds for each of 4 candidates, totalling 12 fits


[Parallel(n_jobs=16)]: Done   7 out of  12 | elapsed:    1.1s remaining:    0.8s
[Parallel(n_jobs=16)]: Done  12 out of  12 | elapsed:    4.1s finished


Pass 4/10 for model <class 'sklearn.neural_network.multilayer_perceptron.MLPRegressor'>
Fitting 3 folds for each of 4 candidates, totalling 12 fits


[Parallel(n_jobs=16)]: Done   7 out of  12 | elapsed:    0.6s remaining:    0.4s
[Parallel(n_jobs=16)]: Done  12 out of  12 | elapsed:    4.5s finished


Pass 5/10 for model <class 'sklearn.neural_network.multilayer_perceptron.MLPRegressor'>
Fitting 3 folds for each of 4 candidates, totalling 12 fits


[Parallel(n_jobs=16)]: Done   7 out of  12 | elapsed:    0.9s remaining:    0.6s
[Parallel(n_jobs=16)]: Done  12 out of  12 | elapsed:    3.4s finished


Pass 6/10 for model <class 'sklearn.neural_network.multilayer_perceptron.MLPRegressor'>
Fitting 3 folds for each of 4 candidates, totalling 12 fits


[Parallel(n_jobs=16)]: Done   7 out of  12 | elapsed:    0.5s remaining:    0.3s
[Parallel(n_jobs=16)]: Done  12 out of  12 | elapsed:    2.8s finished


Pass 7/10 for model <class 'sklearn.neural_network.multilayer_perceptron.MLPRegressor'>
Fitting 3 folds for each of 4 candidates, totalling 12 fits


[Parallel(n_jobs=16)]: Done   7 out of  12 | elapsed:    0.4s remaining:    0.3s
[Parallel(n_jobs=16)]: Done  12 out of  12 | elapsed:    3.3s finished


Pass 8/10 for model <class 'sklearn.neural_network.multilayer_perceptron.MLPRegressor'>
Fitting 3 folds for each of 4 candidates, totalling 12 fits


[Parallel(n_jobs=16)]: Done   7 out of  12 | elapsed:    1.3s remaining:    0.9s
[Parallel(n_jobs=16)]: Done  12 out of  12 | elapsed:    3.5s finished


Pass 9/10 for model <class 'sklearn.neural_network.multilayer_perceptron.MLPRegressor'>
Fitting 3 folds for each of 4 candidates, totalling 12 fits


[Parallel(n_jobs=16)]: Done   7 out of  12 | elapsed:    1.1s remaining:    0.8s
[Parallel(n_jobs=16)]: Done  12 out of  12 | elapsed:    3.4s finished


Pass 10/10 for model <class 'sklearn.neural_network.multilayer_perceptron.MLPRegressor'>
Fitting 3 folds for each of 4 candidates, totalling 12 fits


[Parallel(n_jobs=16)]: Done   7 out of  12 | elapsed:    0.5s remaining:    0.3s
[Parallel(n_jobs=16)]: Done  12 out of  12 | elapsed:    2.5s finished


Model <class 'sklearn.neural_network.multilayer_perceptron.MLPRegressor'> with hyperparams {'hidden_layer_sizes': (100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100)} yielded test error 1.2899468478922822 - overall error 1.5390458738020465
Pass 1/10 for model <class 'sklearn.svm.classes.SVR'>
Fitting 3 folds for each of 2 candidates, totalling 6 fits


[Parallel(n_jobs=16)]: Done   3 out of   6 | elapsed:    5.1s remaining:    5.1s
[Parallel(n_jobs=16)]: Done   6 out of   6 | elapsed:   18.7s finished


In [13]:
evaluate_model(LinearSVR, {'tol': [1e-4, 1e-5, 1e-6, 1e-7],
                          'C' : [0.5, 1, 3, 5, 10],
                          'max_iter': [1000, 2000, 4000, 8000]}, passes=10)

Pass 1/10 for model <class 'sklearn.svm.classes.LinearSVR'>
Fitting 3 folds for each of 80 candidates, totalling 240 fits


[Parallel(n_jobs=16)]: Done  18 tasks      | elapsed:    0.2s


Pass 2/10 for model <class 'sklearn.svm.classes.LinearSVR'>
Fitting 3 folds for each of 80 candidates, totalling 240 fits


[Parallel(n_jobs=16)]: Done 240 out of 240 | elapsed:    2.1s finished
[Parallel(n_jobs=16)]: Done  18 tasks      | elapsed:    0.1s
[Parallel(n_jobs=16)]: Done 240 out of 240 | elapsed:    1.8s finished


Pass 3/10 for model <class 'sklearn.svm.classes.LinearSVR'>
Fitting 3 folds for each of 80 candidates, totalling 240 fits


[Parallel(n_jobs=16)]: Done  18 tasks      | elapsed:    0.1s


Pass 4/10 for model <class 'sklearn.svm.classes.LinearSVR'>
Fitting 3 folds for each of 80 candidates, totalling 240 fits


[Parallel(n_jobs=16)]: Done 240 out of 240 | elapsed:    2.2s finished
[Parallel(n_jobs=16)]: Done  18 tasks      | elapsed:    0.1s
[Parallel(n_jobs=16)]: Done 240 out of 240 | elapsed:    2.2s finished


Pass 5/10 for model <class 'sklearn.svm.classes.LinearSVR'>
Fitting 3 folds for each of 80 candidates, totalling 240 fits


[Parallel(n_jobs=16)]: Done  18 tasks      | elapsed:    0.1s


Pass 6/10 for model <class 'sklearn.svm.classes.LinearSVR'>
Fitting 3 folds for each of 80 candidates, totalling 240 fits


[Parallel(n_jobs=16)]: Done 240 out of 240 | elapsed:    2.1s finished
[Parallel(n_jobs=16)]: Done  18 tasks      | elapsed:    0.1s
[Parallel(n_jobs=16)]: Done 240 out of 240 | elapsed:    2.2s finished


Pass 7/10 for model <class 'sklearn.svm.classes.LinearSVR'>
Fitting 3 folds for each of 80 candidates, totalling 240 fits


[Parallel(n_jobs=16)]: Done  18 tasks      | elapsed:    0.1s


Pass 8/10 for model <class 'sklearn.svm.classes.LinearSVR'>
Fitting 3 folds for each of 80 candidates, totalling 240 fits


[Parallel(n_jobs=16)]: Done 240 out of 240 | elapsed:    2.4s finished
[Parallel(n_jobs=16)]: Done  18 tasks      | elapsed:    0.1s


Pass 9/10 for model <class 'sklearn.svm.classes.LinearSVR'>
Fitting 3 folds for each of 80 candidates, totalling 240 fits


[Parallel(n_jobs=16)]: Done 240 out of 240 | elapsed:    2.2s finished
[Parallel(n_jobs=16)]: Done  18 tasks      | elapsed:    0.1s


Pass 10/10 for model <class 'sklearn.svm.classes.LinearSVR'>
Fitting 3 folds for each of 80 candidates, totalling 240 fits


[Parallel(n_jobs=16)]: Done 240 out of 240 | elapsed:    2.0s finished
[Parallel(n_jobs=16)]: Done  18 tasks      | elapsed:    0.1s


Model <class 'sklearn.svm.classes.LinearSVR'> with hyperparams {'C': 1, 'max_iter': 4000, 'tol': 1e-07} yielded test error 1.3201363665805965 - overall error 1.5351629003173854


[Parallel(n_jobs=16)]: Done 240 out of 240 | elapsed:    2.0s finished
