In [None]:
import sys
# !conda install --yes --prefix {sys.prefix} -c conda-forge pygam tsfresh

In [None]:
import numpy as np
import pygam
from load_data import load_learnable
from basic_models import validate_models
from feature_importance import perm_feature_importance
from model_evaluation import model_residuals
from sklearn.linear_model import LinearRegression, BayesianRidge
from sklearn.tree import DecisionTreeRegressor
from sklearn.ensemble import RandomForestRegressor, GradientBoostingRegressor, ExtraTreesRegressor
from time import time

In [None]:
models = [
    ('LR', LinearRegression(normalize=True)),
    ('BRidge', BayesianRidge(normalize=True)),
    ('DT-5', DecisionTreeRegressor(max_depth=5)),
    ('DT-10', DecisionTreeRegressor(max_depth=10)),
    ('RF-5', RandomForestRegressor(max_depth=5)),
    ('RF-10', RandomForestRegressor(max_depth=10)),
    ('XGB-5', GradientBoostingRegressor(max_depth=5)),
    ('XGB-10', GradientBoostingRegressor(max_depth=10)),
    ('Extra', ExtraTreesRegressor()),
]

In [None]:
X, y, features, times = load_learnable(remove_att=False, difference=True, lookback=1)

In [None]:
N = X.shape[0] // len(np.unique(times))
train = np.ones(X.shape[0]).astype(bool)
train[-2*N:] = False
test = ~train

In [None]:
t = time()
best_mdls = validate_models(X[train], y[train], times[train], models=models)
dt1 = time() - t
print('Validation took {:.3f} seconds'.format(dt1))

In [None]:
t_r = time()
model_residuals(X, y, times, features, models=best_mdls)
dt2 = time()-t_r
print('Residuals took {:.3f} seconds'.format(dt2))

In [None]:
t_fi = time()
trained_mdls = perm_feature_importance((X[train], y[train]), (X[test], y[test]), features, models=models)
dt3 = time()-t_fi
print('Model training and feature importances took {:.3f} seconds'.format(dt3))
print('In total, pipeline took {:.3f} minutes'.format((dt1 + dt2 + dt3)/60))