In [None]:
'''
Data munging libraries

'''
import random

import numpy as np
import pandas as pd
import statsmodels.api as sm

import joblib
'''
Visualization Libraries

'''
import seaborn as sns
%matplotlib inline
#%matplotlib notebook
pd.set_option('display.max_columns', 100)
pd.set_option('display.precision', 2) 
from bokeh.plotting import figure,  show, gridplot
from bokeh.io import output_notebook
from bokeh.layouts import row, column

'''
ML libraries

'''

from sklearn.multioutput import MultiOutputRegressor
from sklearn.preprocessing import StandardScaler
from sklearn.decomposition import PCA
from sklearn.neural_network import MLPRegressor
from sklearn.linear_model import LinearRegression, RANSACRegressor
from sklearn.model_selection import GridSearchCV, train_test_split, cross_val_score;
from sklearn.model_selection import  train_test_split ;
from sklearn.pipeline import Pipeline;

# Define file name of model_data

In [None]:
file_name_model_data = "../model_data/aux_model_data.gzip"

In [None]:
model_df=pd.read_csv(file_name_model_data, compression='gzip' ,encoding="ISO-8859-2",low_memory=False)

In [None]:
model_df.describe()

In [None]:
model_df.columns

In [None]:
mw1='GT3 Generator Watts Max Selected {Avg}'
mw2='GT4 Generator Watts Max Selected {Avg}'
std1='GT3 Generator Watts Max Selected {StdDev}'
std2='GT4 Generator Watts Max Selected {StdDev}'
stgmw='STG gross mw {Avg}'
stgmwstd='STG gross mw {StdDev}'
chiller1='CHILLER PLANT 2 PLC ENABLE {Sample}'
chiller2='CHILLER PLANT 2 PLC ENABLE {Sample}'
coldflow1='FE-501 CHW FLOW to GT 8 Inlet {Avg}'
coldflow2='FE-501 CHW FLOW to GT 9 Inlet {Avg}'
tempin1='Chiller 3 TT-501A CHW Supply Temperature (Out of Chiller) {Avg}'
tempin2='Chiller 4 TT-502A CHW Supply Temperature (Out of Chiller) {Avg}'
tempout1='Chiller 3 TT-501B CHW Return Temperature (Into Chiller) {Avg}'
tempout2='Chiller 4 TT-502B CHW Return Temperature (Into Chiller) {Avg}'
netmw='Total Plant Net MW {Avg}'
gross='Gross Power'
aux='Aux Load'
dt1='GT 3 delta T'
dt2='GT 4 delta T'
gt1on='GT3 Online'
gt2on='GT4 Online'
mode='Mode of Operation'

In [None]:
def noneg(x):
    if x<2:
        return 0
    return x

In [None]:
model_df[mw1]=model_df[mw1].apply(noneg)
model_df[mw2]=model_df[mw2].apply(noneg)
model_df[stgmw]=model_df[stgmw].apply(noneg)

In [None]:
mw_df=model_df[(model_df[stgmw]>20)& (model_df[stgmwstd]<=3)].reset_index(drop=True)

In [None]:
mw_df[aux]=mw_df[mw1]+mw_df[mw2]+mw_df[stgmw]-mw_df[netmw]

In [None]:
mw_df[[aux,netmw]].describe()

In [None]:
mw_df[[chiller1,chiller2]]=mw_df[[chiller1,chiller2]].astype(int)

In [None]:
aux_df=mw_df[(mw_df[chiller1]==0) & (mw_df[chiller2]==0)].reset_index(drop=True)

In [None]:
aux_df[dt1]=abs(aux_df[tempout1]-aux_df[tempin1])
aux_df[dt2]=abs(aux_df[tempout2]-aux_df[tempin2])

In [None]:
aux_df=aux_df[(aux_df[dt1]<4) & (aux_df[dt2]<4)].reset_index(drop=True)

In [None]:
aux_df[gross]=aux_df[mw1]+aux_df[mw2]+aux_df[stgmw]

In [None]:
sns.lmplot(x=gross, y=aux, data=aux_df, height=8,fit_reg=False,scatter_kws={"s": 25},
           line_kws={"color":"black","linewidth":4},ci=None)

In [None]:
aux_df=aux_df[(aux_df[aux]<25)&(aux_df[aux]>2)].reset_index(drop=True)

In [None]:
sns.lmplot(x=gross, y=aux, data=aux_df, height=8,fit_reg=False,scatter_kws={"s": 25},
           line_kws={"color":"black","linewidth":4},ci=None)

In [None]:
aux_df[gt1on]=aux_df[mw1].apply(lambda x:0 if x==0 else 1)
aux_df[gt2on]=aux_df[mw2].apply(lambda x:0 if x==0 else 1)

In [None]:
aux_df[mode]=aux_df[gt1on]+aux_df[gt2on]

In [None]:
# def adjust_aux(modaux,add1=1,add2=0):
#     if modaux[0]>1:
#         newaux=modaux[1]-add2
#     else:
#         newaux=modaux[1]+add1
#     return newaux

In [None]:
#aux_df['newaux']=aux_df[[mode,aux]].apply(lambda x:adjust_aux(x,add1=1,add2=1.5),axis=1)

In [None]:
#aux_df['newaux']

In [None]:
#sns.lmplot(x=gross, y='newaux', data=aux_df, height=8,fit_reg=False,scatter_kws={"s": 25},
  #         line_kws={"color":"black","linewidth":4},ci=None)

In [None]:
def model_pca(pca_frac=None,layers=(40,40),es=True,n_iter=200,tol=0.0001,patience=10,random=2301):
    ppl=Pipeline([('scaler',StandardScaler()),('pca',PCA(n_components=pca_frac,random_state=random)),
                  ('estimator',MLPRegressor(hidden_layer_sizes=layers,
                                            early_stopping=es,
                                            tol=tol,max_iter=n_iter,random_state=random,n_iter_no_change=patience))
                 ])
    return ppl

In [None]:
# 1x1 data set only
aux1x1df=aux_df[(aux_df[mode]==1)&(aux_df[aux]>3.5)].reset_index(drop=True)
X,y=aux1x1df[[mw1,mw2,stgmw]].values,aux1x1df[aux].values
aux1x1_=model_pca(n_iter=500)
aux1x1_.fit(X,y)
aux_pred=aux1x1_.predict(X)

In [None]:
output_notebook()
t1 = figure(plot_width=350, plot_height=350,title="actua vs pred", x_axis_label='actual', 
            y_axis_label='Pred')
t1.scatter(y, aux_pred)

show(t1)

In [None]:
# 1x1 data set only
aux2x1df=aux_df[(aux_df[mode]==2)&(aux_df[aux]>=7.5)].reset_index(drop=True)
X,y=aux2x1df[[mw1,mw2,stgmw]].values,aux2x1df[aux].values
aux2x1_=model_pca(n_iter=500)
aux2x1_.fit(X,y)
aux_pred=aux2x1_.predict(X)

In [None]:
output_notebook()
t1 = figure(plot_width=350, plot_height=350,title="actua vs pred", x_axis_label='actual', 
            y_axis_label='Pred')
t1.scatter(y, aux_pred)

show(t1)

In [None]:
## write base model to pickle file

pkl = "../../../pickles/auxload.pkl"
models = {'1x1<aux_load><ctg3_mw|ctg4_mw|stg1_mw>'         :aux1x1_,
          '2x1<aux_load><ctg3_mw|ctg4_mw|stg1_mw>'         :aux2x1_                   
         }
with open(pkl, "wb") as f:
    joblib.dump(models, f)
    print(f'{f.name}')