# Models score

In [1]:
import numpy as np
import os
from sklearn.neural_network import MLPRegressor
from sklearn.ensemble import RandomForestRegressor, GradientBoostingRegressor
from sklearn.neighbors import KNeighborsRegressor
from sklearn.linear_model import BayesianRidge
from sklearn.svm import SVR
from sklearn.preprocessing import StandardScaler

np.set_printoptions(suppress=True)

In [2]:
datasets = []
data_list = os.listdir("./datasets/")
data_list.sort()
for data_str in data_list:
    datasets.append(np.genfromtxt("./datasets/"+data_str, delimiter=","))

In [3]:
from sklearn.model_selection import cross_val_predict, KFold, GridSearchCV
from sklearn.metrics import mean_squared_error, make_scorer
skf_cv = KFold(10, True)
mse_scorer = make_scorer(mean_squared_error, greater_is_better=False)
scaler = StandardScaler()

We will use following models:
1. Multilayer Perceptron Network
2. k Nearest Neighbors
3. Random Forest
4. Bayesian Ridge Regression
5. Support Vector Machine for Regression case
5. Gradient Boosting on Regression Decision Trees

In [5]:
mlp_scores = [0.0 for i in range(len(data_list))]

mlp_pgrid = {'hidden_layer_sizes':[(5, 10, 10, 5)],
             'activation': ['relu', 'logistic'],
             'solver': ['lbfgs', 'sgd'],
             'max_iter': [7000]}

mlp_gs = GridSearchCV(MLPRegressor(), param_grid=mlp_pgrid, 
                      scoring=mse_scorer, cv=skf_cv)

for idx, dataset in enumerate(datasets):
    scaler.fit(dataset[:, :-1])
    mlp_gs.fit(scaler.transform(dataset[:, :-1]), dataset[:, -1:].ravel())
    mtscore = np.array(mlp_gs.cv_results_['mean_test_score'])
    mlp_scores[idx] = np.abs(mtscore).min()

KeyboardInterrupt: 

In [None]:
mlp_scores

In [10]:
rf_scores = [0.0 for i in range(len(data_list))]

rf_pgrid = {'n_estimators':[1000],
             'criterion': ['mse'],
             'max_depth': [5, 10, 20, 50]}

rf_gs = GridSearchCV(RandomForestRegressor(), param_grid=rf_pgrid, 
                      scoring=mse_scorer, cv=skf_cv)

for idx, dataset in enumerate(datasets):
    rf_gs.fit(dataset[:, :-1], dataset[:, -1:].ravel())
    mtscore = np.array(rf_gs.cv_results_['mean_test_score'])
    rf_scores[idx] = np.abs(mtscore).min()

In [11]:
rf_scores

[0.17837583582779573,
 2939.4199997889959,
 1.1024208514750431,
 11.44356113213381,
 0.113462764922512,
 0.12452228898426322,
 0.2982016829268293,
 4653.2723361916132,
 0.033864358523725831,
 0.16179966286625147,
 7.4283370419598054,
 248.18891952172774,
 1717.5307213381561,
 244.80906752907728,
 1613.7541415235964,
 0.33158959099868474,
 0.35581575745202126,
 28.553733303079213,
 2.7085835255869767e-07,
 2.6001217041323571e-07]

In [8]:
knn_scores = [0.0 for i in range(len(data_list))]

knn_pgrid = {'n_neighbors': [x for x in range(3, 50)],
             'weights': ['uniform', 'distance'],
             'metric': ['euclidean', 'manhattan', 'chebyshev']}

knn_gs = GridSearchCV(KNeighborsRegressor(), param_grid=knn_pgrid, 
                      scoring=mse_scorer, cv=skf_cv)

for idx, dataset in enumerate(datasets):
    knn_gs.fit(dataset[:, :-1], dataset[:, -1:].ravel())
    mtscore = np.array(knn_gs.cv_results_['mean_test_score'])
    knn_scores[idx] = np.abs(mtscore).min()

In [9]:
knn_scores

[0.72912118598439613,
 3250.4082694796348,
 63.273715293151916,
 12.474602155840932,
 0.10395,
 0.099150389769641759,
 1.2975804009238912,
 4072.375209165476,
 0.046042833748524574,
 0.17356507059082288,
 14.463169580085951,
 244.36042038417614,
 1803.2878989453729,
 255.04132311354772,
 1698.9267318521686,
 0.40667141473590063,
 0.42991992236347754,
 2.5398921875515037,
 3.3696334521139294e-06,
 3.5174708526879215e-06]

In [6]:
br_scores = [0.0 for i in range(len(data_list))]

br_pgrid = {'normalize': [True], 'n_iter':[3000],
            'alpha_1': [1e-7, 5e-7, 1e-6, 5e-6, 1e-5, 5e-5],
            'alpha_2': [1e-7, 5e-7, 1e-6, 5e-6, 1e-5, 5e-5],
            'lambda_1': [1e-7, 5e-7, 1e-6, 5e-6, 1e-5, 5e-5],
            'lambda_1': [1e-7, 5e-7, 1e-6, 5e-6, 1e-5, 5e-5]}

br_gs = GridSearchCV(BayesianRidge(), param_grid=br_pgrid, 
                     scoring=mse_scorer, cv=skf_cv)

for idx, dataset in enumerate(datasets):
    br_gs.fit(dataset[:, :-1], dataset[:, -1:].ravel())
    mtscore = np.array(br_gs.cv_results_['mean_test_score'])
    br_scores[idx] = np.abs(mtscore).min()

In [7]:
br_scores

[1.3195980228364903,
 4693.1871678035195,
 82.314798191719419,
 7.296782093461414,
 0.11011567632388865,
 0.15205150344185775,
 0.68379059663027741,
 4053.3500644291375,
 0.059468248062771602,
 0.1522721900765644,
 11.499504728830246,
 285.42768088188478,
 1943.4783659032746,
 278.64266512229813,
 1889.1627124897952,
 0.42550439961725228,
 0.56798303666404359,
 89.599473771424584,
 3.1669306855249308e-06,
 3.1684426455837468e-06]

In [8]:
svr_scores = [0.0 for i in range(len(data_list))]

svr_pgrid = {'kernel': ['rbf', 'poly', 'sigmoid'],
            'C': [1e-3, 5e-3, 1e-2, 5e-2, 1e-1, 5e-1, 1, 5, 10, 50],
            'gamma': [1e-5, 5e-5, 1e-4, 5e-4, 1e-3, 5e-3, 1e-2, 5e-2],
            'shrinking': [False, True], 'max_iter': [15000]}

svr_gs = GridSearchCV(SVR(), param_grid=svr_pgrid, 
                      scoring=mse_scorer, cv=skf_cv)

for idx, dataset in enumerate(datasets):
    scaler.fit(dataset[:, :-1])
    svr_gs.fit(scaler.transform(dataset[:, :-1]), dataset[:, -1:].ravel())
    mtscore = np.array(svr_gs.cv_results_['mean_test_score'])
    svr_scores[idx] = np.abs(mtscore).min()

































In [9]:
svr_scores

[0.44473821521850071,
 9752.3763858268267,
 36.736890563792429,
 1.7544398985800755,
 0.10205670434265114,
 0.10355834606166808,
 0.34930666205069094,
 4178.8721943500896,
 0.035777739660353365,
 0.15108165428149153,
 7.1182748392373378,
 244.32167028782851,
 1678.6961782643968,
 240.68099370178768,
 1611.5952862219867,
 0.39061464853232281,
 0.47679508306958246,
 82.596217455518769,
 5.6250000000000107e-05,
 5.6250000000000107e-05]

In [6]:
gb_scores = [0.0 for i in range(len(data_list))]

gb_pgrid = {'criterion': ['mse'], 
            'loss': ['ls', 'lad', 'huber'],
            'n_estimators':[1000],
             'max_depth': [5, 10, 20, 50]}

gb_gs = GridSearchCV(GradientBoostingRegressor(), param_grid=gb_pgrid, 
                     scoring=mse_scorer, cv=skf_cv)

for idx, dataset in enumerate(datasets):
    gb_gs.fit(dataset[:, :-1], dataset[:, -1:].ravel())
    mtscore = np.array(gb_gs.cv_results_['mean_test_score'])
    gb_scores[idx] = np.abs(mtscore).min()

In [7]:
gb_scores

[0.13349213000786195,
 5833.713712916523,
 0.87047956171700214,
 11.0920619302264,
 0.1501135168270773,
 0.15806098463443227,
 0.28264734168240446,
 4190.099301951941,
 0.048196974300717517,
 0.17866074261309386,
 7.2976846779091549,
 247.76681110902879,
 1744.0839861398983,
 244.43288259231011,
 1586.4292551519377,
 0.32717524976843809,
 0.34389699446938155,
 26.970237768457636,
 3.040136231234398e-07,
 2.9048389504707437e-07]