In [None]:
from exp.run import run_experiment
from exp.mappings import alg_map
from exp.train import train_model
import pandas as pd
import json
import numpy as np

### hyper-parameter experiments

In [None]:
"""
Example of Cartesian Product of Hyper-parameters for Linear Regression

"lr": {"fit_intercept": [False, True], "normalize": [False, True]}

Cartesian Product: {fit_intercept} x {normalize}

Hyper-parameter choices:
"fit_intercept": False, "normalize": False
"fit_intercept": True, "normalize": False
"fit_intercept": False, "normalize": True
"fit_intercept": True, "normalize": True
"""

params={"lr": {"fit_intercept": [False, True], "normalize": [False, True]},
       "ridge": {"alpha": [.000001, .00001, .0001, .001, .01, .1, 1.0, 10, 100],
                 "fit_intercept": [False, True], "normalize": [False, True]},
       "lasso": {"alpha": [.000001, .00001, .0001, .001, .01, .1, 1.0, 10, 100],
                 "fit_intercept": [False, True], "normalize": [False, True],
                 "positive": [False, False, False, False, False, True],
                 "selection": ["cyclic", "cyclic", "cyclic", "cyclic", "cyclic", "random"]},
       "mtlasso": {"alpha": [.000001, .00001, .0001, .001, .01, .1, 1.0, 10, 100],
                   "fit_intercept": [False, True], "normalize": [False, True],
                   "selection": ["cyclic", "cyclic", "cyclic", "cyclic", "cyclic", "random"]},
       "elastic": {"alpha": [.000001, .00001, .0001, .001, .01, .1, 1.0, 10, 100],
                   "fit_intercept": [False, True], "normalize": [False, True], 
                   "positive": [False, False, False, False, False, True],
                   "l1_ratio": [.01, .99, .2, .4, .6, .8], 
                   "selection": ["cyclic", "cyclic", "cyclic", "cyclic", "cyclic", "random"]},
       "lars": {"fit_intercept": [False, True], "normalize": [False, True],
                "fit_path": [False], "n_nonzero_coefs": [10, 100, 500, 1000, 10000, np.inf]},
       "llars": {"alpha": [.000001, .00001, .0001, .001, .01, .1, 1.0, 10, 100],
                 "fit_intercept": [False, True], "normalize": [False, True],
                "fit_path": [False], "positive": [False, False, False, False, False, True]},
       "omp": {"fit_intercept": [False, True], "normalize": [False, True],
               "n_nonzero_coefs": [10, 100, None, None, None]},
       "sgdreg": {"loss": ["squared_loss", "squared_loss", "squared_loss", "huber", "epsilon_insensitive",
                           "squared_epsilon_insensitive"],
                  "penalty": ["none", "l2", "l1", "elasticnet"], 
                  "alpha": [.000001, .00001, .0001, .001, .01, .1, 1.0, 10, 100],
                  "l1_ratio": [.01, .99, .2, .4, .6, .8], "fit_intercept": [False, True],
                  "learning_rate": ["constant", "optimal", "optimal", "optimal", "invscaling", "adaptive"],
                  "eta0": [1.0, 10.0, .1, .01, .001, .0001],
                  "early_stopping": [False, False, False, False, True]},
       "pareg": {"C": [.001, .01, .1, 1.0, 1.0, 1.0, 10.0, 100.0],
                 "loss": ["epsilon_insensitive", "squared_epsilon_insensitive"],
                 "epsilon": [.01, .05, .1, .1, .1, .5],
                 "early_stopping": [False, False, False, False, True]},
        # "tsreg": {"fit_intercept": [False, True]},
        "hreg": {"epsilon": [1.1, 1.2, 1.35, 1.35, 1.35, 1.35, 1.5, 1.6, 1.8, 2.0, 2.5],
                 "alpha": [.000001, .00001, .0001, .001, .01, .1, 1.0, 10, 100],
                 "fit_intercept": [False, True]},
        "kreg": {"alpha": [.000001, .00001, .0001, .001, .01, .1, 1.0, 10, 100],
                 "kernel": ["linear", "linear", "poly", "rbf", "sigmoid"],
                 "gamma": [None, None, None, None, .001, .0001, .01, .1]}}

### Run Experiment

In [None]:
num_searches=20
n_fold=10
save_results= "exp.csv"

In [None]:
for alg in params.keys():
    print(alg)
    score_df = run_experiment(n_fold=n_fold, alg=alg, alg_params=params[alg], search_type="random", num_searches=num_searches, save_results=save_results)

### Display models ranked by CV scores

In [None]:
score_df = score_df.sort_values(by="cv_score_n_folds_10_shuffle_True_rs_None", axis=0)
display(score_df)

### Load results from CSV File and re-produce models ranked by CV scores

In [None]:
score_df = pd.read_csv(save_results)
score_df = score_df.sort_values(by="cv_score_n_folds_10_shuffle_True_rs_None", axis=0)

In [None]:
display(score_df)

### Load best model from CSV File

In [None]:
# retrieve top scoring row
best = score_df.iloc[0]
display(best)

# retrieve model parameters from pandas row
alg = best["alg"]
params_json = best["params_json"]
fs = best["feature_set"]

print("alg: {}".format(alg))
print("params_json: {}".format(params_json))
print("feature_set: {}".format(fs))

# retrieve relevant values
alg_cls = alg_map[alg]
params = json.loads(params_json)

# train algorithm
train_model(params=params, fs=fs, n_fold=n_fold, alg=alg)