In [None]:
import numpy as np
import pandas as pd

import model as mdl
import viz

pd.options.display.float_format = '{:.8f}'.format
pd.options.display.max_rows = 200
pd.options.display.max_columns = 200

In [None]:
train = pd.read_pickle('tmp/data/train.pkl').fillna(0)
train.isnull().describe().transpose()

In [None]:
describe = train.describe().transpose()
describe = describe.loc[(describe['mean']!=0) & (describe['std']!=0)]
features = describe.index.tolist()
train = train[features]

In [None]:
viz.correlation(train)

In [None]:
data = mdl.get_data(path=train)
X_train, y_train, X_val, y_val = data

## Import Models To Train

In [None]:
from sklearn.pipeline import Pipeline
from sklearn.preprocessing import StandardScaler, MinMaxScaler, RobustScaler
from sklearn.decomposition import PCA
from sklearn.linear_model import LinearRegression, Lasso, Ridge, ElasticNet, SGDRegressor
from sklearn.ensemble import RandomForestRegressor
from sklearn.svm import LinearSVR, SVR

## Baseline Linear Regression

In [None]:
model = LinearRegression()

pipeline = Pipeline([('model', model)])
params = {}
n_experiments = 1

# run experiment
mdl.run_experiment(data, pipeline, params, n_experiments)

In [None]:
scaler = StandardScaler()
model = LinearRegression()

pipeline = Pipeline([('scaler', scaler),
                     ('model', model)])
params = {}
n_experiments = 1

# run experiment
mdl.run_experiment(data, pipeline, params, n_experiments)

In [None]:
scaler = StandardScaler()
pca = PCA(0.99, random_state=42)
model = LinearRegression()

pipeline = Pipeline([('scaler', scaler),
                     ('pca', pca),
                     ('model', model)])
params = {}
n_experiments = 1

# run experiment
mdl.run_experiment(data, pipeline, params, n_experiments)

## Lasso Regression (L1 Regularization)

In [None]:
scaler = StandardScaler()
model = Lasso(random_state=42)

pipeline = Pipeline([('scaler', scaler),
                     ('model', model)])
params = {}
n_experiments = 1

# run experiment
mdl.run_experiment(data, pipeline, params, n_experiments)

In [None]:
scaler = StandardScaler()
pca = PCA(0.99, random_state=42)
model = Lasso(random_state=42)

pipeline = Pipeline([('scaler', scaler),
                     ('pca', pca),
                     ('model', model)])
params = {}
n_experiments = 1

# run experiment
mdl.run_experiment(data, pipeline, params, n_experiments)

In [None]:
scaler = StandardScaler()
model = Lasso(alpha=0.1, random_state=42)

pipeline = Pipeline([('scaler', scaler),
                     ('model', model)])
params = {}
n_experiments = 1

# run experiment
mdl.run_experiment(data, pipeline, params, n_experiments)

In [None]:
scaler = StandardScaler()
pca = PCA(0.99, random_state=42)
model = Lasso(alpha=0.1, random_state=42)

pipeline = Pipeline([('scaler', scaler),
                     ('pca', pca),
                     ('model', model)])
params = {}
n_experiments = 1

# run experiment
mdl.run_experiment(data, pipeline, params, n_experiments)

## Ridge Regression (L2 Regularization)

In [None]:
model = Ridge(random_state=42)

pipeline = Pipeline([('model', model)])
params = {}
n_experiments = 1

# run experiment
mdl.run_experiment(data, pipeline, params, n_experiments)

In [None]:
scaler = StandardScaler()
model = Ridge(random_state=42)

pipeline = Pipeline([('scaler', scaler),
                     ('model', model)])
params = {}
n_experiments = 1

# run experiment
mdl.run_experiment(data, pipeline, params, n_experiments)

In [None]:
scaler = StandardScaler()
pca = PCA(0.99, random_state=42)
model = Ridge(random_state=42)

pipeline = Pipeline([('scaler', scaler),
                     ('pca', pca),
                     ('model', model)])
params = {}
n_experiments = 1

# run experiment
mdl.run_experiment(data, pipeline, params, n_experiments)

In [None]:
model = Ridge(alpha=0.1, random_state=42)

pipeline = Pipeline([('model', model)])
params = {}
n_experiments = 1

# run experiment
mdl.run_experiment(data, pipeline, params, n_experiments)

In [None]:
scaler = StandardScaler()
model = Ridge(alpha=0.1, random_state=42)

pipeline = Pipeline([('scaler', scaler),
                     ('model', model)])
params = {}
n_experiments = 1

# run experiment
mdl.run_experiment(data, pipeline, params, n_experiments)

In [None]:
scaler = StandardScaler()
pca = PCA(0.99, random_state=42)
model = Ridge(alpha=0.1, random_state=42)

pipeline = Pipeline([('scaler', scaler),
                     ('pca', pca),
                     ('model', model)])
params = {}
n_experiments = 1

# run experiment
mdl.run_experiment(data, pipeline, params, n_experiments)

## ElasticNet Regression (L1 & L2 Regularization)

In [None]:
scaler = StandardScaler()
model = ElasticNet(random_state=42)

pipeline = Pipeline([('scaler', scaler),
                     ('model', model)])
params = {}
n_experiments = 1

# run experiment
mdl.run_experiment(data, pipeline, params, n_experiments)

In [None]:
scaler = StandardScaler()
pca = PCA(0.99, random_state=42)
model = ElasticNet(random_state=42)

pipeline = Pipeline([('scaler', scaler),
                     ('pca', pca),
                     ('model', model)])
params = {}
n_experiments = 1

# run experiment
mdl.run_experiment(data, pipeline, params, n_experiments)

## SGDRegressor

In [None]:
model = SGDRegressor(random_state=42)

pipeline = Pipeline([('model', model)])
params = {}
n_experiments = 1

# run experiment
mdl.run_experiment(data, pipeline, params, n_experiments)

In [None]:
scaler = StandardScaler()
model = SGDRegressor(random_state=42)

pipeline = Pipeline([('scaler', scaler),
                     ('model', model)])
params = {}
n_experiments = 1

# run experiment
mdl.run_experiment(data, pipeline, params, n_experiments)

In [None]:
scaler = StandardScaler()
pca = PCA(0.99, random_state=42)
model = SGDRegressor(random_state=42)

pipeline = Pipeline([('scaler', scaler),
                     ('pca', pca),
                     ('model', model)])
params = {}
n_experiments = 1

# run experiment
mdl.run_experiment(data, pipeline, params, n_experiments)

## RandomForestRegressor

In [None]:
model = RandomForestRegressor(random_state=42, n_jobs=-1)

pipeline = Pipeline([('model', model)])
params = {}
n_experiments = 1

# run experiment
mdl.run_experiment(data, pipeline, params, n_experiments)

In [None]:
scaler = StandardScaler()
model = RandomForestRegressor(random_state=42, n_jobs=-1)

pipeline = Pipeline([('scaler', scaler),
                     ('model', model)])
params = {}
n_experiments = 1

# run experiment
mdl.run_experiment(data, pipeline, params, n_experiments)

In [None]:
scaler = StandardScaler()
pca = PCA(0.99, random_state=42)
model = RandomForestRegressor(random_state=42, n_jobs=-1)

pipeline = Pipeline([('scaler', scaler),
                     ('pca', pca),
                     ('model', model)])
params = {}
n_experiments = 1

# run experiment
mdl.run_experiment(data, pipeline, params, n_experiments)

In [None]:
scaler = StandardScaler()
pca = PCA(0.99, random_state=42)
model = RandomForestRegressor(
    criterion='mse',
    n_jobs=-1,
    random_state=42
)

pipeline = Pipeline([('scaler', scaler),
                     ('pca', pca),
                     ('model', model)])


params = {'model__n_estimators' : range(100, 200, 10),
          'model__min_samples_leaf' : [i for i in range(2, 21)],
          'model__min_samples_split' : [0.05, 0.1, 0.15, 0.2, 0.25, 0.3],
          'model__max_depth' : [i for i in range(4, 10)],
          #'model__min_weight_fraction_leaf' : [0.0],
          #'model__max_features' : range(10, len(X_train.columns)),
          #'model__max_leaf_nodes' : [None],
          'model__warm_start' : [True, False]
         }

n_experiments = 1

# run experiment
mdl.run_experiment(data, pipeline, params, n_experiments)