In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

from sklearn import linear_model, kernel_ridge
from sklearn import svm
from sklearn.ensemble import RandomForestClassifier

from sklearn import preprocessing
from sklearn import model_selection
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import GridSearchCV
from sklearn.metrics import mean_absolute_error,mean_squared_error, r2_score

In [None]:
# Dummy data for setup

from sklearn import datasets
from sklearn.model_selection import GridSearchCV
boston = datasets.load_boston()
X = boston['data']
y = boston['target']

X_train = X[:400,:]
X_val = X[400:,:]
y_train = y[:400]
y_val = y[400:]

In [None]:
'''To Consider 
- Normalize features for some models
- Need a validation set outside of GridsearchCV? Yes, because gridsearch uses different loss functions for each model.
- Should we use time-based folds?
- Set a standard scoring function in GridsearchCV?  Otherwise it defers to the individual regressors.
'''

In [None]:
def lasso_covid(X,y):
    reg = linear_model.Lasso()
    params = {'alpha':(.01,.1,1)}
    cv_reg = GridSearchCV(reg, 
                          params,
                          n_jobs = 4,
                          cv=5,
                          refit=True)
    cv_reg.fit(X,y)
    return cv_reg

def ridge_covid(X,y):
    reg = linear_model.Ridge()
    params = {'alpha':(.01,.1,1)}
    cv_reg = GridSearchCV(reg, 
                          params,
                          n_jobs = 4,
                          cv=5,
                          refit=True)
    cv_reg.fit(X,y)
    return cv_reg

def kernel_ridge_covid(X,y):
    reg = kernel_ridge.KernelRidge()
    params = {'alpha':(.01,.1,1),
              'kernel':('linear','rbf','lapacian'),
             }
    cv_reg = GridSearchCV(reg, 
                          params,
                          n_jobs = 4,
                          cv=5,
                          refit=True)
    cv_reg.fit(X,y)
    return cv_reg

In [None]:
# Validation function

def evaluate(reg_list,X_val,y_val):
    for reg in reg_list:
        mse = mean_squared_error(y_val,reg.predict(X_val))
        mae = mean_absolute_error(y_val,reg.predict(X_val))
        r2 = r2_score(y_val,reg.predict(X_val))
        print("Validation Scores for {}\n".format(str(reg)[:8])+"*"*30)
        print("MSE: {:2.4f}".format(mse))
        print("MAE: {:2.4f}".format(mae))
        print("r2: {:2.4f}\n".format(r2))
        # To Do: Put these in a DF, not print out

In [None]:
lasso = lasso_covid(X_train,y_train)
ridge = ridge_covid(X_train,y_train)
kridge = kernel_ridge_covid(X_train,y_train)

In [None]:
# Create list of best estimators of each type
reg_list = [lasso.best_estimator_,ridge.best_estimator_,kridge.best_estimator_]

# Run validation function
evaluate(reg_list,X_val,y_val)