In [None]:
'''
Data munging libraries

'''
import random

import numpy as np
import pandas as pd
import statsmodels.api as sm

import joblib
'''
Visualization Libraries

'''
import seaborn as sns
%matplotlib inline
#%matplotlib notebook
pd.set_option('display.max_columns', 100)
pd.set_option('display.precision', 2) 
from bokeh.plotting import figure,  show, gridplot
from bokeh.io import output_notebook
from bokeh.layouts import row, column

'''
ML libraries

'''

from sklearn.multioutput import MultiOutputRegressor
from sklearn.preprocessing import StandardScaler
from sklearn.decomposition import PCA
from sklearn.neural_network import MLPRegressor
from sklearn.linear_model import LinearRegression, RANSACRegressor
from sklearn.model_selection import GridSearchCV, train_test_split, cross_val_score;
from sklearn.model_selection import  train_test_split ;
from sklearn.pipeline import Pipeline;

In [None]:
output_notebook()

# Define file name of model_data¶

In [None]:
file_name_model_data = "../model_data/condenser_model_data.gzip"

In [None]:
model_df=pd.read_csv(file_name_model_data, compression='gzip' ,encoding="ISO-8859-2")

In [None]:
model_df.describe()

In [None]:
drybulb = "Amb db"
baro = 'SITE AMBIENT CONDITIONS BARO PRESS XMTR {Avg}'
rh = "SITE AMBIENT CONDITIONS REL HUMIDITY {Avg}"
watertemp = "CND CIRC WTR INLET TEMPERATURE {Avg}"

In [None]:
def model(layers=(40,40),es=True,n_iter=200,tol=0.0001,patience=10):
    ppl=Pipeline([('scaler',StandardScaler()),
                  ('estimator',MLPRegressor(hidden_layer_sizes=layers,
                                            early_stopping=es,
                                            tol=tol,max_iter=n_iter,random_state=2301,n_iter_no_change=patience))
                 ])
    return ppl

In [None]:
def model_pca(pca_frac=None,layers=(40,40),es=True,n_iter=200,tol=0.0001,patience=10,random=2301):
    ppl=Pipeline([('scaler',StandardScaler()),('pca',PCA(n_components=pca_frac,random_state=random)),
                  ('estimator',MLPRegressor(hidden_layer_sizes=layers,
                                            early_stopping=es,
                                            tol=tol,max_iter=n_iter,random_state=random,n_iter_no_change=patience))
                 ])
    return ppl

In [None]:
model_df=model_df[(model_df[drybulb]>0)& (model_df[watertemp]>0)&(model_df[baro]>0
                                                                 &(model_df[rh]>0))].reset_index(drop=True)

In [None]:
X, y=model_df[[drybulb,rh,baro]],model_df[watertemp]

In [None]:
circ_=model(layers=(80,80),n_iter=500,patience=20)
circ_=circ_.fit(X,y)

In [None]:
pred_temp=circ_.predict(X)
'''
creating a new plot with a title and axis labels. 
'''
p1 = figure(plot_width=350, plot_height=350,title="actual vs predicted circ temp", x_axis_label='actual', 
            y_axis_label='predicted')

p1.scatter(y, pred_temp)
show(p1);

In [None]:
# residual plot
pred = circ_.predict(model_df[[drybulb, rh,baro]])
err = model_df[watertemp] - pred

output_notebook()
t1 = figure(plot_width=350, plot_height=350,title="Residual Plot", x_axis_label='Index', 
            y_axis_label='Error')
t1.scatter(list(range(len(err))), err)
show(t1)
print(np.sqrt(np.var(err)))
print(np.mean(np.abs((model_df[watertemp] - pred) / model_df[watertemp])))

In [None]:
pkl = "../../../pickles/condenser.pkl"

models = { 'condenser<circ_water_temp><db|rh|baro>': circ_ }

with open(pkl, "wb") as f:
    joblib.dump(models, f)
    print(f'{f.name}')