In [None]:
'''
Data munging libraries

'''
import random

import numpy as np
import pandas as pd
import statsmodels.api as sm

import joblib
'''
Visualization Libraries

'''
import seaborn as sns
%matplotlib inline
#%matplotlib notebook
pd.set_option('display.max_columns', 100)
pd.set_option('display.precision', 2) 
from bokeh.plotting import figure,  show, gridplot
from bokeh.io import output_notebook
from bokeh.layouts import row, column

'''
ML libraries

'''

from sklearn.multioutput import MultiOutputRegressor
from sklearn.preprocessing import StandardScaler, PolynomialFeatures
from sklearn.decomposition import PCA
from sklearn.neural_network import MLPRegressor
from sklearn.linear_model import LinearRegression, RANSACRegressor
from sklearn.model_selection import GridSearchCV, train_test_split, cross_val_score;
from sklearn.model_selection import  train_test_split
from sklearn.ensemble import RandomForestRegressor
from sklearn.pipeline import Pipeline;

In [None]:
output_notebook()

# Define file name of model_data¶

In [None]:
file_name_model_data = "../model_data/hrsg_model_data_new.gzip"

In [None]:
model_df=pd.read_csv(file_name_model_data, compression='gzip' ,encoding="ISO-8859-2")

In [None]:
model_df.columns

In [None]:
mw1='GT3 Generator Watts Max Selected {Avg}'
mw2='GT4 Generator Watts Max Selected {Avg}'
mw3='STG gross mw {Avg}'
baro = 'SITE AMBIENT CONDITIONS BARO PRESS XMTR {Avg}'
fuel='GT3 Fuel Energy Flow MMBTU'
exh = 'GT3 Exhaust Temp Median Corrected By Average {Avg}'
# std1 = 'GT3 Generator Watts Max Selected {StdDev}'
# std2 = 'GT4 Generator Watts Max Selected {StdDev}'
# std3 = 'STG gross mw {StdDev}'
hpflow1 = 'HRSG 3 HP STEAM FLOW {Avg}'
hppress1 = 'HRSG 3 HP STEAM OUTLET PRESSURE {Avg}'
hptemp1 = 'HRSG 3 HP STEAM OUTLET TEMPERATURE {Avg}'
lpflow1 = 'HRSG 3 LP STEAM FLOW {Avg}'
lppress1 ='HRSG 3 LP STEAM OUTLET PRESSURE TRANSMITTER {Avg}'
lptemp1 = 'HRSG 3 LP STEAM OUTLET TEMPERATURE {Avg}'
dbfuel1 = 'HRSG3_DB_MMBTU'
#ipflow1 = 'HRSG 8 COLD REHEAT STEAM FLOW DUALSEL {Avg}'

crhflow1 = "HRSG 3 COLD REHEAT STEAM FLOW DUALSEL {Avg}"
crhpress1b = 'CRH 3001 PRESSURE B {Avg}'
crhpress1a = 'CRH 3001 PRESSURE A {Avg}'
crhpress = "CRH Press"
crhtemp1 = 'CRH TO HRSG 3001 THERMOCOUPLE {Avg}'

ipflow1 = "HRSG 3 DUALSEL IP STEAM FLOW {Avg}"

hrhpress1a = "HRH 3001 PRESSURE A {Avg}"
hrhpress1b = "HRH 3001 PRESSURE B {Avg}"
hrhpress= "HRH Press"
hrhtemp1 = "HRSG 3001 HRH MAIN LINE THERMOCOUPLE {Avg}"
hrhflow1 = 'HRH 3 Flow'



sh1a = "HRSG 3 HP SUPERHEATER 1 OUTLET THERMOCOUPLE A {Avg}"
sh1b = "HRSG 3 HP SUPERHEATER 1 OUTLET THERMOCOUPLE B {Avg}"
sh1c = "HRSG 3 HP SUPERHEATER 1 OUTLET THERMOCOUPLE C {Avg}"
sh1d = "HRSG 3 HP SUPERHEATER 1 OUTLET THERMOCOUPLE D {Avg}"
shmax = "HP Max Superheat"
time='TimeStamp'

In [None]:
#HRSG(HP/IP/LP(Flow, P, T)=Function of (MW, GT Fuel, Exh Temp, Duct Firing fuel)
hrsgdf = model_df[[mw1,mw2,fuel,exh,hpflow1,hppress1,hptemp1,lpflow1,lppress1,lptemp1,dbfuel1,
                  crhflow1, crhpress1a,crhpress1b, crhtemp1,  
                   ipflow1, hrhpress1a,hrhpress1b, hrhtemp1,  
                  sh1a, sh1b, sh1c, sh1d]] 

In [None]:
hrsgdf=hrsgdf.astype('float64')

In [None]:
hrsgdf[hrsgdf[dbfuel1]>-5][dbfuel1].describe()

In [None]:
hrsgdf = hrsgdf[(hrsgdf[hpflow1]>100)
                &(hrsgdf[mw1]>80)&
                (hrsgdf[dbfuel1]>-5)
               ].reset_index(drop=True)

In [None]:
hrsgdf[shmax] = np.max([hrsgdf[sh1a].values,hrsgdf[sh1b].values,
                                         hrsgdf[sh1c].values,hrsgdf[sh1d].values],axis=0)


In [None]:
hrsgdf[hrhflow1]=hrsgdf[crhflow1]+hrsgdf[ipflow1]

In [None]:
hrsgdf[crhpress]=(hrsgdf[crhpress1a]+hrsgdf[crhpress1b])/2
hrsgdf[hrhpress]=(hrsgdf[hrhpress1a]+hrsgdf[hrhpress1b])/2

In [None]:
hrsgdf[[exh,fuel,dbfuel1,mw1]].describe()

In [None]:
hrsghp_clean=hrsgdf.copy()
ransac_fl = RANSACRegressor(LinearRegression(),
                         max_trials=100, 
                         min_samples=50, 
                         loss='absolute_loss', 
                         residual_threshold=25, 
                         random_state=2301)
ransac_fl=ransac_fl.fit(hrsghp_clean[[mw1,dbfuel1]].values, hrsghp_clean[hpflow1].values)
hp_flow_pred=ransac_fl.predict(hrsghp_clean[[mw1,dbfuel1]].values)
inlier_mask = ransac_fl.inlier_mask_
hrsghp_clean=hrsghp_clean.assign(normal_hpflow = lambda im: inlier_mask)
hrsghp_clean['hpflow']=hp_flow_pred
sns.lmplot(x=hpflow1,y='hpflow', data=hrsghp_clean,hue="normal_hpflow", height=6,fit_reg=False,
           scatter_kws={"s": 25},line_kws={"color":"black","linewidth":4},ci=None)

In [None]:
# residual plot
pred1=hp_flow_pred
y=hrsghp_clean[hpflow1].values
err1 = y - pred1


output_notebook()
t1 = figure(plot_width=350, plot_height=350,title="Residual Plot", x_axis_label='Index', 
            y_axis_label='Error')
t1.scatter(list(range(len(err1))), err1)
show(t1)


print(np.sqrt(np.var(err1)))
print(np.mean(np.abs((y - pred1) / y)))


In [None]:
hrsghp_clean=hrsghp_clean[hrsghp_clean['normal_hpflow']==True]

In [None]:
def model(layers=(40,40),es=True,n_iter=200,tol=0.0001,patience=10):
    ppl=Pipeline([('scaler',StandardScaler()),
                  ('estimator',MLPRegressor(hidden_layer_sizes=layers,
                                            early_stopping=es,
                                            tol=tol,max_iter=n_iter,random_state=2301,n_iter_no_change=patience))
                 ])
    return ppl

In [None]:
def model_pca(pca_frac=None,layers=(40,40),es=True,n_iter=200,tol=0.0001,patience=10,random=2301):
    ppl=Pipeline([('scaler',StandardScaler()),('pca',PCA(n_components=pca_frac,random_state=random)),
                  ('estimator',MLPRegressor(hidden_layer_sizes=layers,
                                            early_stopping=es,
                                            tol=tol,max_iter=n_iter,random_state=random,n_iter_no_change=patience))
                 ])
    return ppl

In [None]:
def model_lm_pca(pca_frac=None,random=2301):
    ppl=Pipeline([('scaler',StandardScaler()),('pca',PCA(n_components=pca_frac,random_state=random)),
                  ('estimator',LinearRegression())
                 ])
    return ppl

In [None]:
def model_poly_pca(pca_frac=None,random=2301):
    ppl=Pipeline([('scaler',StandardScaler()),('pca',PCA(n_components=pca_frac,random_state=random)),
                  ('quadratic',PolynomialFeatures()),
                  ('estimator',LinearRegression())
                 ])
    return ppl

In [None]:
ransac_p = RANSACRegressor(LinearRegression(),
                         max_trials=100, 
                         min_samples=50, 
                         loss='absolute_loss', 
                         residual_threshold=10, 
                         random_state=2301)
ransac_p=ransac_p.fit(hrsghp_clean[[mw1,dbfuel1]].values, hrsghp_clean[hppress1].values)
hp_press_pred=ransac_p.predict(hrsghp_clean[[mw1,dbfuel1]].values)
inlier_mask = ransac_p.inlier_mask_
hrsghp_clean=hrsghp_clean.assign(normal_hppress = lambda im: inlier_mask)
hrsghp_clean['hp_press_pred']=hp_press_pred
sns.lmplot(x=hpflow1,y='hp_press_pred', data=hrsghp_clean,hue="normal_hpflow", height=6,fit_reg=False,
           scatter_kws={"s": 25},line_kws={"color":"black","linewidth":4},ci=None)

In [None]:
x1 =hrsghp_clean[[mw1,exh,dbfuel1]]
y1 = hrsghp_clean[[hppress1,hptemp1]]
hp_lm = model_poly_pca()
hp_lm = hp_lm.fit(x1, y1)

In [None]:
'''
creating a new plot with a title and axis labels. 
'''
p1 = figure(plot_width=350, plot_height=350,title="actual vs predicted hp flow", x_axis_label='actual', 
            y_axis_label='predicted')

p1.scatter(hrsghp_clean[hpflow1], hrsghp_clean[hppress1]);
show(p1)

In [None]:
hp2x1df=hrsghp_clean[(hrsghp_clean[mw1]>50)&(hrsghp_clean[mw2]>50)
                     &(hrsghp_clean[hpflow1]>800)&
                    (hrsghp_clean[hppress1]<2000)].reset_index(drop=True)
x1=hp2x1df[hpflow1].values.reshape(-1,1)
y1=hp2x1df[hppress1]
hp2x1press=LinearRegression()
hp2x1press=hp2x1press.fit(x1,y1)

In [None]:
# residual plot

pred1=hp2x1press.predict(x1)
y=y1
err1 = y - pred1


output_notebook()
t1 = figure(plot_width=350, plot_height=350,title="Residual Plot", x_axis_label='Index', 
            y_axis_label='Error')
t1.scatter(list(range(len(err1))), err1)
show(t1)


print(np.sqrt(np.var(err1)))
print(np.mean(np.abs((y - pred1) / y)))


In [None]:
X_hp, y_hp_fl=hrsghp_clean[[mw1,exh,dbfuel1]],hrsghp_clean[hpflow1]

In [None]:
#hp_flow_=model_pca(layers=(300,300,300),n_iter=1000,patience=40)
hp_flow_=model_lm_pca()
hp_flow_=hp_flow_.fit(X_hp,y_hp_fl)

In [None]:
pred_hp_fl=hp_flow_.predict(X_hp)
'''
creating a new plot with a title and axis labels. 
'''
p1 = figure(plot_width=350, plot_height=350,title="actual vs predicted hp flow", x_axis_label='actual', 
            y_axis_label='predicted')

p1.scatter(y_hp_fl, pred_hp_fl);
show(p1)

In [None]:
hrsghp_clean[[mw1,exh,dbfuel1]]
pred1=pred_hp_fl
y=y_hp_fl
err1 = y - pred1
output_notebook()
t1 = figure(plot_width=350, plot_height=350,title="Residual Plot", x_axis_label='Index', 
            y_axis_label='Error')
t1.scatter(list(range(len(err1))), err1)
show(t1)

In [None]:
pred2x1flow=hp_flow_.predict(hp2x1df[[mw1,exh,dbfuel1]])
y2x1flow=hp2x1df[hpflow1]
err2x1flow = y2x1flow - pred2x1flow
output_notebook()
t1 = figure(plot_width=350, plot_height=350,title="Residual Plot", x_axis_label='Index', 
            y_axis_label='Error')
t1.scatter(list(range(len(err2x1flow))), err2x1flow)
show(t1)

In [None]:
hrsghp_flow=hrsghp_clean#.loc[14100:,:].reset_index(drop=True)

In [None]:
X_hp, y_hp_fl=hrsghp_flow[[mw1,exh,dbfuel1]],hrsghp_flow[hpflow1]

In [None]:
#hp_flow_=model_pca(layers=(300,300,300),n_iter=1000,patience=40)
hp_flow_=model_lm_pca()
hp_flow_=hp_flow_.fit(X_hp,y_hp_fl)

In [None]:
pred_hp_fl=hp_flow_.predict(X_hp)
'''
creating a new plot with a title and axis labels. 
'''
p1 = figure(plot_width=350, plot_height=350,title="actual vs predicted hp flow", x_axis_label='actual', 
            y_axis_label='predicted')

p1.scatter(y_hp_fl, pred_hp_fl);
show(p1)

In [None]:
hrsghp_flow[[mw1,exh,dbfuel1]]
pred1=pred_hp_fl
y=y_hp_fl
err1 = y - pred1
output_notebook()
t1 = figure(plot_width=350, plot_height=350,title="Residual Plot", x_axis_label='Index', 
            y_axis_label='Error')
t1.scatter(list(range(len(err1))), err1)
show(t1)

In [None]:
ransac_sh = RANSACRegressor(LinearRegression(),
                         max_trials=100, 
                         min_samples=50, 
                         loss='absolute_loss', 
                         residual_threshold=25, 
                         random_state=2301)
ransac_sh=ransac_sh.fit(hrsghp_clean[[mw1,dbfuel1]].values, hrsghp_clean[shmax].values)
sh_pred=ransac_sh.predict(hrsghp_clean[[mw1,dbfuel1]].values)
inlier_mask = ransac_sh.inlier_mask_
hrsghp_clean=hrsghp_clean.assign(normal_sh = lambda im: inlier_mask)
hrsghp_clean['shpred']=sh_pred
sns.lmplot(x=shmax,y='shpred', data=hrsghp_clean,hue="normal_sh", height=6,fit_reg=False,
           scatter_kws={"s": 25},line_kws={"color":"black","linewidth":4},ci=None)

In [None]:
hrsghp_shclean=hrsghp_clean[hrsghp_clean['normal_sh']==True].reset_index(drop=True)

In [None]:
X_sh,y_sh =hrsghp_shclean[[mw1,exh,dbfuel1]],hrsghp_shclean[shmax]

In [None]:
#hp_sh_=model_pca(layers=(200,200,200),n_iter=1000,patience=80)
hp_sh_=model_poly_pca()
hp_sh_=hp_sh_.fit(X_sh,y_sh)

In [None]:
pred_hp_sh=hp_sh_.predict(X_sh)
'''
creating a new plot with a title and axis labels. 
'''
p1 = figure(plot_width=350, plot_height=350,title="actual vs predicted hp flow", x_axis_label='actual', 
            y_axis_label='predicted')

p1.scatter(y_sh, pred_hp_sh);
show(p1)

In [None]:
crh_clean=hrsgdf.copy()
ransac_fl = RANSACRegressor(LinearRegression(),
                         max_trials=100, 
                         min_samples=50, 
                         loss='absolute_loss', 
                         residual_threshold=25, 
                         random_state=2301)
ransac_fl=ransac_fl.fit(crh_clean[[mw1,dbfuel1]].values, crh_clean[crhflow1].values)
crh_flow_pred=ransac_fl.predict(crh_clean[[mw1,dbfuel1]].values)
inlier_mask = ransac_fl.inlier_mask_
crh_clean=crh_clean.assign(normal_crhflow = lambda im: inlier_mask)
crh_clean['crhflow']=crh_flow_pred
sns.lmplot(x=crhflow1,y='crhflow', data=crh_clean,hue="normal_crhflow", height=6,fit_reg=False,
           scatter_kws={"s": 25},line_kws={"color":"black","linewidth":4},ci=None)

In [None]:
crh_clean=crh_clean[crh_clean['normal_crhflow']==True]
'''
creating a new plot with a title and axis labels. 
'''
p1 = figure(plot_width=350, plot_height=350,title="CRH press vs flow", x_axis_label='flow', 
            y_axis_label='pressure')

p1.scatter(crh_clean[crhflow1], crh_clean[crhpress])
show(p1);

In [None]:
crh2x1df=crh_clean[(crh_clean[mw1]>50)&(crh_clean[mw2]>50)].reset_index(drop=True)
x1=crh2x1df[crhflow1].values.reshape(-1,1)
y1=crh2x1df[crhpress]
crhp_=LinearRegression()
crhp_=crhp_.fit(x1,y1)

In [None]:
# residual plot

pred1=crhp_.predict(x1)
y=y1
err1 = y - pred1


output_notebook()
t1 = figure(plot_width=350, plot_height=350,title="Residual Plot", x_axis_label='Index', 
            y_axis_label='Error')
t1.scatter(list(range(len(err1))), err1)
show(t1)


print(np.sqrt(np.var(err1)))
print(np.mean(np.abs((y - pred1) / y)))


In [None]:
x1 = crh_clean[[mw1,dbfuel1]]
y1 = crh_clean[[crhflow1,crhpress,crhtemp1]]
crh_= LinearRegression()
crh_= crh_.fit(x1, y1)

In [None]:
pred1=crh_.predict(x1)[:,0]
y=y1.values[:,0]
err1 = y - pred1


output_notebook()
t1 = figure(plot_width=350, plot_height=350,title="Residual Plot", x_axis_label='Index', 
            y_axis_label='Error')
t1.scatter(list(range(len(err1))), err1)
show(t1)


print(np.sqrt(np.var(err1)))
print(np.mean(np.abs((y - pred1) / y)))


In [None]:
crh_pred_fl=crh_.predict(x1)[:,0]
'''
creating a new plot with a title and axis labels. 
'''
p1 = figure(plot_width=350, plot_height=350,title="actual vs predicted hp flow", x_axis_label='actual', 
            y_axis_label='predicted')

p1.scatter(crh_clean[crhflow1], crh_pred_fl);
show(p1)

In [None]:
x1 = crh_clean[[mw1,exh,dbfuel1]]
y1 = crh_clean[crhflow1]
crh_nn= model_pca(layers=(100,100,100))
crh_nn= crh_nn.fit(x1, y1)

In [None]:
pred1=crh_nn.predict(x1)
y=y1
err1 = y - pred1
output_notebook()
t1 = figure(plot_width=350, plot_height=350,title="Residual Plot", x_axis_label='Index', 
            y_axis_label='Error')
t1.scatter(list(range(len(err1))), err1)
show(t1)


In [None]:
hrsgdf[lpflow1]=hrsgdf[lpflow1].apply(lambda x: 0 if x <0 else x)

In [None]:
x1 = hrsgdf[[mw1,exh,dbfuel1]]
y1 = hrsgdf[[lppress1,lptemp1]]
lp_lm = LinearRegression()
lp_lm = lp_lm.fit(x1, y1)

In [None]:
# lp_=model(layers=(40,40),n_iter=1000,patience=40)
lp_=model_poly_pca()
lp_=lp_.fit(hrsgdf[[mw1,exh,dbfuel1]],hrsgdf[lpflow1])

In [None]:
lp_pred_fl=lp_.predict(hrsgdf[[mw1,exh,dbfuel1]])
'''
creating a new plot with a title and axis labels. 
'''
p1 = figure(plot_width=350, plot_height=350,title="actual vs predicted lp flow", x_axis_label='actual', 
            y_axis_label='predicted')

p1.scatter(hrsgdf[lpflow1], lp_pred_fl);
show(p1)

In [None]:
hrh_clean=hrsgdf.copy()
ransac_fl = RANSACRegressor(LinearRegression(),
                         max_trials=100, 
                         min_samples=50, 
                         loss='absolute_loss', 
                         residual_threshold=25, 
                         random_state=2301)
ransac_fl=ransac_fl.fit(hrh_clean[[mw1,dbfuel1]].values, hrh_clean[hrhflow1].values)
hrh_flow_pred=ransac_fl.predict(hrh_clean[[mw1,dbfuel1]].values)
inlier_mask = ransac_fl.inlier_mask_
hrh_clean=hrh_clean.assign(normal_hrhflow = lambda im: inlier_mask)
hrh_clean['hrhflow']=hrh_flow_pred
sns.lmplot(x=hrhflow1,y='hrhflow', data=hrh_clean,hue="normal_hrhflow", height=6,fit_reg=False,
           scatter_kws={"s": 25},line_kws={"color":"black","linewidth":4},ci=None)

In [None]:
hrh_clean=hrh_clean[hrh_clean['normal_hrhflow']==True]
'''
creating a new plot with a title and axis labels. 
'''
p1 = figure(plot_width=350, plot_height=350,title="HRH press vs flow", x_axis_label='flow', 
            y_axis_label='pressure')

p1.scatter(hrh_clean[hrhflow1], hrh_clean[hrhpress])
show(p1);

In [None]:
hrh_clean=hrh_clean[hrh_clean['normal_hrhflow']==True]
pred1=ransac_fl.predict(hrh_clean[[mw1,dbfuel1]])
y=hrh_clean[hrhflow1].values
err1 = y - pred1


output_notebook()
t1 = figure(plot_width=350, plot_height=350,title="Residual Plot", x_axis_label='Index', 
            y_axis_label='Error')
t1.scatter(list(range(len(err1))), err1)
show(t1)


print(np.sqrt(np.var(err1)))
print(np.mean(np.abs((y - pred1) / y)))


In [None]:
hrh2x1df=hrh_clean[(hrh_clean[mw1]>50)&(hrh_clean[mw2]>50)].reset_index(drop=True)
x1=hrh2x1df[hrhflow1].values.reshape(-1,1)
y1=hrh2x1df[hrhpress]
hrhp_=LinearRegression()
hrhp_=hrhp_.fit(x1,y1)

In [None]:
# residual plot

pred1=hrhp_.predict(x1)
y=y1
err1 = y - pred1


output_notebook()
t1 = figure(plot_width=350, plot_height=350,title="Residual Plot", x_axis_label='Index', 
            y_axis_label='Error')
t1.scatter(list(range(len(err1))), err1)
show(t1)


print(np.sqrt(np.var(err1)))
print(np.mean(np.abs((y - pred1) / y)))


In [None]:
x1 = hrh_clean[[mw1,exh,dbfuel1]]
y1 = hrh_clean[[hrhpress,hrhtemp1]]
hrh_lm = model_lm_pca()
hrh_lm = hrh_lm.fit(x1, y1)

In [None]:
#hrh_=model_pca(layers=(200,200,200),n_iter=500,patience=40)
hrh_=model_lm_pca()
hrh_=hrh_.fit(x1,hrh_clean[hrhflow1])

In [None]:
hrh_pred_fl=hrh_.predict(x1)
'''
creating a new plot with a title and axis labels. 
'''
p1 = figure(plot_width=350, plot_height=350,title="actual vs predicted lp flow", x_axis_label='actual', 
            y_axis_label='predicted')

p1.scatter(hrh_clean[hrhflow1], hrh_pred_fl);
show(p1)

In [None]:
pkl = "../../../pickles/hrsg3.pkl"

models = {'hp<press|temp><mw|exh_temp|db_fuel>'           :hp_lm,
          'hp<flow><mw|exh_temp|db_fuel>'                 :hp_flow_,
          'hp<super_heat_temp><mw|exh_temp|db_fuel>'      :hp_sh_,
          'hrh<press|temp><mw|exh_temp|db_fuel>'          :hrh_lm,
          'hrh<flow><mw|exh_temp|db_fuel>'                :hrh_,
          'lp<press|temp><mw|exh_temp|db_fuel>'           :lp_lm,
          'lp<flow><mw|exh_temp|db_fuel>'                 :lp_,
          }

with open(pkl, "wb") as f:
    joblib.dump(models, f)
    print(f'{f.name}')

In [None]:
from scipy import optimize

In [None]:
def dbmin(x):
    xin= (np.array([mw_,exh_,x]).reshape(1,-1))
    yout=hp_sh_.predict(xin)
    return yout- sh_temp_

In [None]:
mw_=171
exh_=1108
sh_temp_=1060

In [None]:
optimize.fsolve(func=dbmin, x0=500)