In [1]:
import papermill as pm
import pandas as pd
import numpy as np
from sklearn.linear_model import ElasticNet as en
from sklearn.preprocessing import MinMaxScaler

In [2]:
notebook = r"D:\gitClones\nteract_models\optimize\projects\the_dalles\train_test\the_dalles_2014-12-31_00_00_00.ipynb"

In [3]:
nb = pm.read_notebook(notebook)
nb_df = nb.dataframe
data = nb_df[nb_df['name']=='data']['value'].values[0]

In [4]:
train = pd.DataFrame(data['train']).drop('index', axis = 1)
test = pd.DataFrame(data['test']).drop('index', axis = 1)

train = train[train['tdg_f']>110]
test = test[test['tdg_f']>110]

In [5]:
from bokeh.plotting import figure, show
from bokeh.io import output_notebook, reset_output
from bokeh.layouts import gridplot
from scripts.theme import theme
from bokeh.io import curdoc
output_notebook()

def qq(y,y_hat):
    y_min = y.min()
    y_max = y.max()
    p = figure()
    p.line([y_min,y_max], [y_min,y_max], line_width=2, color = 'black')
    p.circle(y, y_hat, size=1, alpha=0.5)
    p.xaxis.axis_label = "y (%)"
    p.yaxis.axis_label = "y hat (%)"
    return p


In [6]:
# Create interaction term with spill and forebay tdg
train.head()

Unnamed: 0,h_t,p_atm,q_p,q_s,tdg_f,tdg_tw,temp_water
620,23.832217,100791.432,2.330473,1.611226,110.05291,114.229249,9.672222
624,24.267906,100658.11,3.485798,2.39277,110.066225,114.511873,9.7
625,24.319968,100658.11,3.477303,2.389938,110.198675,114.643799,9.7
626,24.325039,100658.11,3.440491,2.387106,110.066225,115.699208,9.711111
627,24.357938,100524.788,3.477303,2.387106,110.344828,116.116248,9.7


In [7]:
#interactions 
from itertools import combinations
x_columns = [x for x in train.columns if x not in  ['tdg_tw', 'h_t']]
doubles = [x for x in combinations(x_columns,2)]
triples = [x for x in combinations(x_columns,3)]
quads = [x for x in combinations(x_columns,4)]
fives = [x for x in combinations(x_columns,5)]
all_cols = [x for x in combinations(x_columns,6)]

interactions = doubles+triples+quads+fives+all_cols

In [8]:
for interaction in interactions:
    name = '-'.join(interaction)
    train[name] = 1
    test[name] = 1
    for term in interaction:
        train[name] = train[name] * train[term]
        test[name] = test[name] * test[term]

In [9]:
df_scaler = pd.concat([train, test], ignore_index = True)
scaler = MinMaxScaler()
scaler.fit(df_scaler)
MinMaxScaler(copy=True, feature_range=(0, 1))

MinMaxScaler(copy=True, feature_range=(0, 1))

In [10]:

train_scaled = pd.DataFrame(columns = train.columns, data = scaler.transform(train))

test_scaled = pd.DataFrame(columns = test.columns, data = scaler.transform(test))

In [11]:
x_columns = [x for x in train.columns if x != 'tdg_tw']
x_train = train_scaled[x_columns]

y_train = train['tdg_tw']


x_test = test_scaled[x_columns]
y_test = test['tdg_tw']

In [12]:
from sklearn.decomposition import PCA

pca = PCA(n_components=2)
pca.fit(x_train)
x_train = pca.transform(x_train)


pca = PCA(n_components=2)
pca.fit(x_test)
x_test = pca.transform(x_test)

In [13]:
params = dict(alpha=1, l1_ratio=.5, fit_intercept=True, normalize=False, precompute=False, max_iter=1000, copy_X=True, tol=0.0001, warm_start=False, positive=False, random_state=0, selection='cyclic')
model = en(**params)
model.fit(x_train, y_train)

ElasticNet(alpha=1, copy_X=True, fit_intercept=True, l1_ratio=0.5,
      max_iter=1000, normalize=False, positive=False, precompute=False,
      random_state=0, selection='cyclic', tol=0.0001, warm_start=False)

In [14]:
model.coef_

array([1.49083168, 0.        ])

In [15]:
y_hat = model.predict(x_test)

In [16]:
sum(abs(y_test-y_hat))/len(y_hat)

1.5139187632439726

In [17]:
sum(y_test-y_hat)**2/len(y_hat)

2683.745846780684

In [18]:
p = qq(y_test,y_hat)
show(p)

In [19]:
#svr
from sklearn.svm import SVR

svr = SVR(kernel = 'poly', degree = 3, C = .5, epsilon = .001)
svr.fit(x_train, y_train.values) 
y_hat = svr.predict(x_test)


In [20]:
sum(abs(y_test-y_hat))/len(y_hat)

1.2193437759929784

In [21]:
sum(y_test-y_hat)**2/len(y_hat)

811.4236012308397

In [22]:
p = qq(y_test,y_hat)
show(p)

In [23]:
# NN
from sklearn.neural_network import MLPRegressor

nn = MLPRegressor(max_iter=1000)
nn.fit(x_train,y_train)
y_hat = nn.predict(x_test)

In [24]:
sum(abs(y_test-y_hat))/len(y_hat)

1.9328557076546478

In [25]:
sum(y_test-y_hat)**2/len(y_hat)

2510.187366065561

In [26]:
p = qq(y_test,y_hat)
show(p)