In [1]:
import numpy as np
from sklearn.model_selection import train_test_split
import utils

years = utils.Groups(5)

Groups = utils.CombinedDataSelector(years).ConstructNoFilter()

ds = utils.DataSelector(5,Groups)

dfs = utils.DataFromSelector(ds,1970,35,utils.conn)

data = dfs.constructAll()

X_train, X_test, y_train, y_test = train_test_split(np.array(data[0]),
     np.array(data[1]), test_size=0.1, random_state=42)

In [2]:
from models import LinRegWithPoly, RidgeRegCVWithPoly, LassoCVWithPoly

In [3]:
lr_model = LinRegWithPoly(degree_range=[1,2,3])
lr_model.fit(X_train, y_train)
lr_model.evaluate(X_test, y_test)

(0.08011885176934885,
 {'val_neg_mean_squared_error': -0.5045287419425678,
  'val_neg_root_mean_squared_error': -0.7103018667739568,
  'val_r2': 0.08011885176934885})

In [4]:
lr_model.best_estimator_.named_steps['poly'].degree

1

In [5]:
ridge = RidgeRegCVWithPoly(degree_range=[1, 2, 3], alphas=np.logspace(-2, 1, 10))
ridge.fit(X_train, y_train)
ridge.evaluate(X_test, y_test)

(0.24591426853390908,
 {'val_neg_mean_squared_error': -0.41359465420638436,
  'val_neg_root_mean_squared_error': -0.6431132514622789,
  'val_r2': 0.24591426853390908})

In [6]:
ridge.best_estimator_.named_steps['poly'].degree

3

In [7]:
ridge.best_estimator_.named_steps['model'].alpha_

0.046415888336127774

In [8]:
lasso = LassoCVWithPoly(degree=3, alphas=np.logspace(-10, 1, 10))
lasso.fit(X_train, y_train)
lasso.evaluate(X_test, y_test)

(0.21207019355652268,
 {'val_neg_mean_squared_error': -0.4321571702481515,
  'val_neg_root_mean_squared_error': -0.6573866215920061,
  'val_r2': 0.21207019355652268})

In [9]:
lasso.named_steps['model'].alpha_

0.00012915496650148855

In [10]:
from models import KNNRegressor

In [11]:
knn = KNNRegressor()
knn.fit(X_train, y_train)
knn.evaluate(X_test, y_test)

(0.15007800052337372,
 {'val_neg_mean_squared_error': -0.46615813137387413,
  'val_neg_root_mean_squared_error': -0.682757739885733,
  'val_r2': 0.15007800052337372})

In [12]:
knn.cv_results_

{'mean_fit_time': array([0.00337334, 0.00289378, 0.00285802, 0.00292273, 0.0028758 ,
        0.0028615 , 0.00300293, 0.00291119, 0.00308332, 0.00295205,
        0.00296216, 0.00296574, 0.00292258, 0.00303049]),
 'std_fit_time': array([0.00073453, 0.00038554, 0.00032876, 0.0004283 , 0.00036668,
        0.00033982, 0.00044715, 0.00040008, 0.00042547, 0.00044524,
        0.0004081 , 0.00041451, 0.00033495, 0.00040492]),
 'mean_score_time': array([0.01350598, 0.01533856, 0.01701159, 0.01990113, 0.02117944,
        0.02261572, 0.02367048, 0.02456803, 0.02558208, 0.02483454,
        0.02565718, 0.02705321, 0.0265707 , 0.0271297 ]),
 'std_score_time': array([0.00156405, 0.00172968, 0.00159999, 0.00212429, 0.00115   ,
        0.00165505, 0.00162382, 0.00153804, 0.00147045, 0.00063067,
        0.0012051 , 0.00184697, 0.00137825, 0.00102459]),
 'param_n_neighbors': masked_array(data=[1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14],
              mask=[False, False, False, False, False, False, Fal

In [13]:
from models import SVMRegression

In [14]:
svr = SVMRegression(C_range=np.logspace(0.5, 1.5, 6), epsilon_range=np.logspace(-2, 0, 6))
svr.fit(X_train, y_train)
svr.evaluate(X_test, y_test)

(-0.4406338306942377,
 {'val_neg_mean_squared_error': -0.4406338306942377,
  'val_neg_root_mean_squared_error': -0.6638025539979774,
  'val_r2': 0.19661513719187507})

In [15]:
svr.cv_results_

{'mean_fit_time': array([0.25427599, 0.19287047, 0.14472094, 0.11179795, 0.09741378,
        0.00129552, 0.29445281, 0.23020754, 0.16695662, 0.12176619,
        0.10224199, 0.00132332, 0.3508853 , 0.27814088, 0.19521651,
        0.13972197, 0.10889907, 0.00127726, 0.43429337, 0.33817401,
        0.23469996, 0.16068439, 0.11640654, 0.00124145, 0.53793302,
        0.41060619, 0.29403801, 0.18788977, 0.1272984 , 0.00133324,
        0.69098277, 0.5363955 , 0.35862174, 0.22577095, 0.14493637,
        0.00133443]),
 'std_fit_time': array([4.62292035e-03, 1.43173795e-03, 3.71553488e-03, 1.49717574e-03,
        2.08916147e-03, 1.67870479e-04, 3.09662493e-03, 1.61252704e-03,
        1.65313780e-03, 1.89261842e-03, 1.58495781e-03, 1.20984836e-04,
        5.08285836e-03, 8.46231563e-03, 4.12888643e-03, 5.10461486e-03,
        3.39728102e-03, 1.10496803e-04, 8.79683916e-03, 1.02130459e-02,
        3.75464560e-03, 4.98450231e-03, 2.76898123e-03, 8.01114808e-05,
        1.12619748e-02, 8.52223254e-0

In [16]:
svr.best_estimator_

SVR(C=7.943282347242816, epsilon=0.15848931924611143)

In [17]:
from models import DecTreeReg, RandomForestReg

In [18]:
decTree = DecTreeReg()
decTree.fit(X_train, y_train)
decTree.evaluate(X_test, y_test)

(-0.048549410707328544,
 {'val_neg_mean_squared_error': -0.575099637672042,
  'val_neg_root_mean_squared_error': -0.7583532406946264,
  'val_r2': -0.048549410707328544})

In [19]:
randForest = RandomForestReg()
randForest.fit(X_train, y_train)
randForest.evaluate(X_test, y_test)

(0.5118245330068778,
 {'val_neg_mean_squared_error': -0.26775040958607466,
  'val_neg_root_mean_squared_error': -0.5174460450965633,
  'val_r2': 0.5118245330068778})