In [None]:
'''
Data munging libraries

'''
import random

import numpy as np
import pandas as pd
import statsmodels.api as sm

import joblib
'''
Visualization Libraries

'''
import seaborn as sns
%matplotlib inline
#%matplotlib notebook
pd.set_option('display.max_columns', 100)
pd.set_option('display.precision', 2) 
from bokeh.plotting import figure,  show, gridplot
from bokeh.io import output_notebook
from bokeh.layouts import row, column

'''
ML libraries

'''

from sklearn.multioutput import MultiOutputRegressor
from sklearn.preprocessing import StandardScaler, PolynomialFeatures
from sklearn.decomposition import PCA
from sklearn.neural_network import MLPRegressor
from sklearn.linear_model import LinearRegression, RANSACRegressor
from sklearn.model_selection import GridSearchCV, train_test_split, cross_val_score;
from sklearn.model_selection import  train_test_split ;
from sklearn.pipeline import Pipeline;

# Define file name of model_data

In [None]:
file_name_model_data = "../model_data/ctg4_model_data.gzip"

In [None]:
model_df=pd.read_csv(file_name_model_data, compression='gzip' ,encoding="ISO-8859-2")

In [None]:
model_df.describe()

In [None]:
model_df.info()

In [None]:
mw='GT4 Generator Watts Max Selected {Avg}'
igv='GT4 IGV angle in deg {Avg}'
fsr='GT4 Fuel Stroke Reference {Avg}'
fsrt='GT4 Temperature Control Fuel Stroke Reference {Avg}'
cit='GT4 Compressor Inlet Temperature {Avg}'
fuel='GT4 Fuel Energy Flow MMBTU'
cpd='GT4_CPD_Median'
ctd='GT4_CTD_Median'
baro='SITE AMBIENT CONDITIONS BARO PRESS XMTR {Avg}'
exh='GT4 Exhaust Temp Median Corrected By Average {Avg}'
#std='GT3 Generator Watts Max Selected {StdDev}'

In [None]:
model_df['TimeStamp']=pd.to_datetime(model_df['TimeStamp'])

In [None]:
model_df['year']=model_df['TimeStamp'].dt.year

In [None]:
base_df=model_df[(model_df[mw]>100) &
                (model_df[igv]>84) &
                (abs(model_df[fsr]-model_df[fsrt]<=0.1))]

In [None]:
# Remove outliers mw ~ cit
ransac_mw = RANSACRegressor(LinearRegression(),
                         max_trials=100, 
                         min_samples=50, 
                         loss='absolute_loss', 
                         residual_threshold=5, 
                         random_state=2301)

ransac_mw=ransac_mw.fit(base_df[cit].values.reshape(-1,1), base_df[mw].values)

mw_base_pred=ransac_mw.predict(base_df[cit].values.reshape(-1,1))

inlier_mask = ransac_mw.inlier_mask_

base_df=base_df.assign(normal_mw = lambda im: inlier_mask)

sns.lmplot(x=cit, y=mw, data=base_df,hue="normal_mw", height=8,fit_reg=True,scatter_kws={"s": 25},
           line_kws={"color":"black","linewidth":4},ci=None)

In [None]:
base_df_clean = base_df[base_df["normal_mw"]]

In [None]:
## Remove outlier for ctd vs temp
ransac_ctd = RANSACRegressor(LinearRegression(),
                         max_trials=100, 
                         min_samples=50, 
                         loss='absolute_loss', 
                         residual_threshold=5, 
                         random_state=2301)
ransac_ctd=ransac_ctd.fit(base_df_clean[[cit,baro]], base_df_clean[ctd])
inlier_mask = ransac_ctd.inlier_mask_
base_df_clean=base_df_clean.assign(normal_ctd = lambda im: inlier_mask)
sns.lmplot(x=cit, y=ctd, data=base_df_clean,hue="normal_ctd", height=6,fit_reg=True,scatter_kws={"s": 25},
           line_kws={"color":"black","linewidth":4},ci=None);

In [None]:
## Remove outlier for cpd vs temp
ransac_cpd = RANSACRegressor(LinearRegression(),
                         max_trials=100, 
                         min_samples=50, 
                         loss='absolute_loss', 
                         residual_threshold=5, 
                         random_state=2301)
ransac_cpd=ransac_cpd.fit(base_df_clean[[cit,baro]], base_df_clean[cpd])
inlier_mask = ransac_cpd.inlier_mask_
base_df_clean=base_df_clean.assign(normal_cpd = lambda im: inlier_mask)
sns.lmplot(x=cit, y=cpd, data=base_df_clean,hue="normal_cpd", height=6,fit_reg=True,scatter_kws={"s": 25},
           line_kws={"color":"black","linewidth":4},ci=None)

In [None]:
# Remove outlier for cpd vs temp
ransac_fuel = RANSACRegressor(LinearRegression(),
                         max_trials=100, 
                         min_samples=50, 
                         loss='absolute_loss', 
                         residual_threshold=25, 
                         random_state=2301)
ransac_fuel=ransac_fuel.fit(base_df_clean[mw].values.reshape(-1,1), base_df_clean[fuel].values)
fuel_base_pred=ransac_fuel.predict(base_df_clean[mw].values.reshape(-1,1))
inlier_mask = ransac_fuel.inlier_mask_
base_df_clean=base_df_clean.assign(normal_fuel = lambda im: inlier_mask)
sns.lmplot(x=mw, y=fuel, data=base_df_clean,hue="normal_fuel", height=6,fit_reg=True,scatter_kws={"s": 25},
           line_kws={"color":"black","linewidth":4},ci=None)

In [None]:
base_df_clean_fuel=base_df_clean[base_df_clean['normal_fuel']]

In [None]:
base_df_clean=base_df_clean[(base_df_clean['normal_ctd']) &(base_df_clean['normal_cpd'])]

In [None]:
# Add all augmented data to original df
base_df_all = base_df_clean

In [None]:
output_notebook()
t1 = figure(plot_width=350, plot_height=350,title="MW vs Temp", x_axis_label='Temp', 
            y_axis_label='MW')
t1.scatter(base_df_all[cit], base_df_all[mw])

t2 = figure(plot_width=350, plot_height=350,title="MW vs Temp", x_axis_label='Temp', 
            y_axis_label='MW')
t2.scatter(base_df_clean[cit], base_df_clean[mw])

t3 = figure(plot_width=350, plot_height=350,title="cpd vs Temp", x_axis_label='Temp', 
            y_axis_label='cpd')
t3.scatter(base_df_all[cit], base_df_all[cpd])


show(t1)
show(t2)
show(t3)

In [None]:
sns.lmplot(x=cpd, y=exh, data=base_df_all,height=6,fit_reg=False,scatter_kws={"s": 25},
           line_kws={"color":"black","linewidth":4},ci=None);

In [None]:
sns.lmplot(x=mw, y=fuel, data=base_df_clean_fuel,height=6,fit_reg=False,scatter_kws={"s": 25},
           line_kws={"color":"black","linewidth":4},ci=None);

# Model


In [None]:
def model(layers=(40,40),es=True,n_iter=200,tol=0.0001,patience=10):
    ppl=Pipeline([('scaler',StandardScaler()),
                  ('estimator',MLPRegressor(hidden_layer_sizes=layers,
                                            early_stopping=es,
                                            tol=tol,max_iter=n_iter,random_state=2301,n_iter_no_change=patience))
                 ])
    return ppl

In [None]:
def model_pca(pca_frac=None,layers=(40,40),es=True,n_iter=200,tol=0.0001,patience=10,random=2301):
    ppl=Pipeline([('scaler',StandardScaler()),('pca',PCA(n_components=pca_frac,random_state=random)),
                  ('estimator',MLPRegressor(hidden_layer_sizes=layers,
                                            early_stopping=es,
                                            tol=tol,max_iter=n_iter,random_state=random,n_iter_no_change=patience))
                 ])
    return ppl

In [None]:
def model_lm_pca(pca_frac=None,random=2301):
    ppl=Pipeline([('scaler',StandardScaler()),('pca',PCA(n_components=pca_frac,random_state=random)),
                  ('estimator',LinearRegression())
                 ])
    return ppl

In [None]:
def model_pol_pca(pca_frac=None,random=2301):
    ppl=Pipeline([('scaler',StandardScaler()),('pca',PCA(n_components=pca_frac,random_state=random)),
                  ('quadratic',PolynomialFeatures()),
                  ('estimator',LinearRegression())
                 ])
    return ppl

In [None]:
#base_df_all[mw]=base_df_all[mw].apply(lambda x: 205 if x>205 else x )

In [None]:
train_set, test_set = base_df_all, base_df_all

In [None]:
X_comp,X_mw=train_set[[baro,cit]], train_set[[cit,cpd,ctd,exh]]
X_exh=train_set[cpd].values.reshape(-1,1)
y_cpd,y_ctd =train_set[cpd],train_set[ctd]
y_mw,y_exh=train_set[mw],train_set[exh]

In [None]:
#cpd_=model(n_iter=1000,patience=40)
cpd_=model_lm_pca()
cpd_=cpd_.fit(X_comp, y_cpd)
#cpd_=cpd_.fit(X_comp,y_cpd)

In [None]:
#ctd_=model(layers=(80,80,80),n_iter=1000,patience=40)
ctd_=model_lm_pca()
ctd_=ctd_.fit(X_comp,y_ctd)

In [None]:
# ## Multi output random forest regression of cpd and ctd
# x_train, y_train = train_set[[baro,cit]], train_set[[cpd,ctd]]
# #cpd_ctd_ranf = RandomForestRegressor(max_depth=10,n_estimators=100,random_state = 6567)
# cpd_ctd_ranf = LinearRegression()
# #cpd_ctd_reg = MultiOutputRegressor(cpd_ctd_ranf).fit(x_train, y_train)
# cpd_ctd_reg = cpd_ctd_ranf.fit(x_train, y_train)

In [None]:
test_set['cpd_pred']=cpd_.predict(X_comp)
test_set['ctd_pred']=ctd_.predict(X_comp)

In [None]:
'''
creating a new plot with a title and axis labels. 
'''
p1 = figure(plot_width=350, plot_height=350,title="actual vs predicted CPD", x_axis_label='actual', 
            y_axis_label='predicted')

p2 = figure(plot_width=350, plot_height=350,title="actual vs predicted CtD", x_axis_label='actual', 
            y_axis_label='predicted')

p1.scatter(test_set[cpd], test_set['cpd_pred']),p2.scatter(test_set[ctd], test_set['ctd_pred']);
show(row(p1,p2))

In [None]:
#exh_=model(layers=(100,100,100),n_iter=1000,patience=40)
exh_=model_pol_pca()
exh_=exh_.fit(X_exh,y_exh)

In [None]:
test_set['exh_pred']=exh_.predict(X_exh)

In [None]:
'''
creating a new plot with a title and axis labels. 
'''
p1 = figure(plot_width=350, plot_height=350,title="actual vs predicted exh", x_axis_label='actual', 
            y_axis_label='predicted')

p1.scatter(test_set[exh], test_set['exh_pred']);
show(p1)

In [None]:
#mw_=model_pca(n_iter=1000,patience=40)
mw_=model_lm_pca()
mw_=mw_.fit(X_mw,y_mw)

In [None]:
cpd_.predict(np.array([14.67,38]).reshape(1,-1))

In [None]:
cpd_.predict(np.array([14.9,55]).reshape(1,-1))

In [None]:
ctd_.predict(np.array([14.67,38
                      ]).reshape(1,-1))

In [None]:
ctd_.predict(np.array([14.9,35]).reshape(1,-1))

In [None]:
exh_.predict(np.array([226.63927288]).reshape(1,-1))

In [None]:
exh_.predict(np.array([222.19734717]).reshape(1,-1))

In [None]:
mw_.predict(np.array([14.67,38,226.63927288,702.30829449,1103.43150439]).reshape(1,-1))

In [None]:
mw_.predict(np.array([14.9,55,222.19734717,727.88967357,1112.26269742]).reshape(1,-1))

In [None]:
test_set['mw_pred']=mw_.predict(X_mw)

In [None]:
'''
creating a new plot with a title and axis labels. 
'''
p1 = figure(plot_width=350, plot_height=350,title="actual vs predicted mw", x_axis_label='actual', 
            y_axis_label='predicted')

p1.scatter(test_set[mw], test_set['mw_pred']);
show(p1)

In [None]:
basefueldf=model_df[(model_df[mw]>100) &
                (model_df[igv]>80) &
                (model_df[fuel]>1400)]

In [None]:
ransac_fuel = RANSACRegressor(LinearRegression(),
                         max_trials=100, 
                         min_samples=50, 
                         loss='absolute_loss', 
                         residual_threshold=20, 
                         random_state=2301)
ransac_fuel=ransac_fuel.fit(basefueldf[mw].values.reshape(-1,1), basefueldf[fuel].values)
fuel_base_pred=ransac_fuel.predict(basefueldf[mw].values.reshape(-1,1))
inlier_mask = ransac_fuel.inlier_mask_
basefueldf=basefueldf.assign(normal_fuel = lambda im: inlier_mask)
sns.lmplot(x=mw, y=fuel, data=basefueldf,hue="normal_fuel", height=6,fit_reg=True,scatter_kws={"s": 25},
           line_kws={"color":"black","linewidth":4},ci=None)


In [None]:
basefueldf=basefueldf[basefueldf['normal_fuel']]

In [None]:
X_fuel=basefueldf[mw].values.reshape(-1,1)
y_fuel=basefueldf[fuel]

In [None]:
fuel_=LinearRegression()
fuel_=fuel_.fit(X_fuel,y_fuel)

In [None]:
fuel_pred=fuel_.predict(X_fuel)

In [None]:
'''
creating a new plot with a title and axis labels. 
'''
p1 = figure(plot_width=350, plot_height=350,title="actual vs predicted fuel", x_axis_label='actual', 
            y_axis_label='predicted')

p1.scatter(y_fuel, fuel_pred)
show(p1);

# Part Load Data Augmentation

In [None]:
## Part Load Modeling
baseall=model_df[(model_df[mw]>100) &
                (model_df[igv]>84) &
                (abs(model_df[fsr]-model_df[fsrt]<=0.1))]
baseall_index = baseall.index.tolist()
part_df = model_df[~model_df.index.isin(baseall_index)]

In [None]:
part_df = part_df[(part_df[mw]>60) & 
                  (part_df[cpd]>50) & 
                  (part_df[ctd]>50) & 
                  (part_df[igv]>30) & 
                  (part_df[exh]>50)]

In [None]:

# ## Augment data
# a = list(range(1,31))
# maxigv = max(part_df[igv])
# minigv = min(part_df[igv])
# igv_ad = list(range(80, int(maxigv)))*len(a)
# cit_ad = sorted(a*len(list(range(80, int(maxigv)))))

# #cit_ad = a+np.random.randn(4,1) #add 20 points per degree
# #cit_ad = [x for sublist in cit_ad for x in sublist]
# baro_ad = []
# #rh_ad =[]
# for i in range(len(cit_ad)):
#     baro_ad.append(random.uniform(min(part_df[baro]), max(part_df[baro])))
# #    rh_ad.append(random.uniform(min(base_df_clean[rh]), max(base_df_clean[rh])))



In [None]:

# x_ad = pd.DataFrame([baro_ad,cit_ad,igv_ad,])
# x_ad = x_ad.T
# x_ad.columns = [baro,cit,igv]

In [None]:
# x_ad

In [None]:
#reg mw~cit,igv,baro

x,y = part_df[[baro,cit,igv]].values, part_df[mw].values.reshape(-1,1)
plm_mw = LinearRegression().fit(x, y)

#reg ctd~cit,igv,baro
x,y = part_df[[baro,cit,igv]].values, part_df[ctd].values.reshape(-1,1)
plm_ctd = LinearRegression().fit(x, y)

#reg cpd~cit,igv,baro
x,y = part_df[[baro,cit,igv]].values, part_df[cpd].values.reshape(-1,1)
plm_cpd = LinearRegression().fit(x, y)

In [None]:
pmw_pred=plm_mw.predict(x)
output_notebook()
t1 = figure(plot_width=350, plot_height=350,title="actual vs model", x_axis_label='actual', 
            y_axis_label='model')

t1.scatter(part_df[mw].values, pmw_pred.reshape(-1))


show(t1)

In [None]:

# Augment cpd & ctd
# p_cpd_ad = plm_cpd.predict(x_ad).reshape(-1,1)+np.random.normal(loc=0.0, scale=1.5, size=(x_ad.shape[0],1))
# p_ctd_ad = plm_ctd.predict(x_ad).reshape(-1,1)+np.random.normal(loc=0.0, scale=1.5, size=(x_ad.shape[0],1))
# p_mw_ad = plm_mw.predict(x_ad).reshape(-1,1)+np.random.normal(loc=0.0, scale=1.5, size=(x_ad.shape[0],1))

In [None]:
# pd.DataFrame(p_mw_ad)

In [None]:
# #reg exh~igv,cpd
# x,y = part_df[[baro,cpd]].values, part_df[exh].values.reshape(-1,1)
# plm_exh = LinearRegression().fit(x, y)
# # Augment exh
# p_exh_ad = plm_exh.predict(np.hstack((
#     np.asarray(baro_ad).reshape(-1,1),
#     np.asarray(p_cpd_ad).reshape(-1,1)))) + 2*np.random.normal(loc=0.0, scale=1.5, size=(p_cpd_ad.shape[0],1))

In [None]:

# partdf_ad = pd.concat([pd.Series(igv_ad,name=igv),
#                        pd.Series(cit_ad,name=cit),
#                        pd.Series(p_mw_ad.reshape(-1,),name=mw),
#                        pd.Series(p_cpd_ad.reshape(-1,),name=cpd),
#                        pd.Series(p_ctd_ad.reshape(-1,),name=ctd),
#                        pd.Series(p_exh_ad.reshape(-1,),name=exh),
#                        pd.Series(baro_ad,name=baro)],axis=1).astype(float)

In [None]:
#partdf_all = pd.merge(part_df,partdf_ad,on = [igv,cit,mw,cpd,ctd,exh,baro],how = 'outer')
partdf_all=part_df

In [None]:
#np.where(np.isnan(partdf_all[mw].values))

In [None]:
output_notebook()
t1 = figure(plot_width=350, plot_height=350,title="MW vs Temp", x_axis_label='Temp', 
            y_axis_label='MW')
t1.scatter(partdf_all[cit], partdf_all[mw])

t2 = figure(plot_width=350, plot_height=350,title="CPD vs Temp", x_axis_label='Temp', 
            y_axis_label='CPD')
t3 = figure(plot_width=350, plot_height=350,title="Exh vs CPD", x_axis_label='CPD', 
            y_axis_label='Exh')

t2.scatter(partdf_all[cit], partdf_all[cpd])
t3.scatter(partdf_all[cpd],partdf_all[exh])


show(t1)
show(t2)
show(t3)

# Min Load Data Augmentation

In [None]:
min21 = pd.read_csv("../model_data/ctg4_2x1_min.csv",encoding="ISO-8859-2")
min11 = pd.read_csv("../model_data/ctg4_1x1_min.csv",encoding="ISO-8859-2")

In [None]:
# min 2x1 reg igv~cit
x,y = min21[cit].values.reshape(-1,1), min21[igv].values.reshape(-1,1)
min21_reg = LinearRegression().fit(x, y)

In [None]:
# min 1x1 reg igv~cit
x,y = min11[cit].values.reshape(-1,1), min11[igv].values.reshape(-1,1)
min11_reg = LinearRegression().fit(x, y)

In [None]:
min11_reg.predict(np.array([40]).reshape(1,-1))

In [None]:
# #Augment igv
# a = list(range(0,80))
# cit_ad = a+np.random.randn(2,1) #add 20 points per degree
# cit_ad = [x for sublist in cit_ad for x in sublist]
# igv_ad1 = np.array([max(42,igv_pred[0]+1) 
#                     for igv_pred in min11_reg.predict(np.asanyarray(cit_ad).reshape(-1,1))]).reshape(-1,1)
# igv_ad2 = np.array([max(40,igv_pred[0]+2) 
#                     for igv_pred in min21_reg.predict(np.asanyarray(cit_ad).reshape(-1,1))]).reshape(-1,1)
# igv_ad = np.vstack((igv_ad1,igv_ad2))
# cit_ad = cit_ad*2
# baro_ad = []
# #rh_ad =[]
# for i in range(len(igv_ad)):
#     baro_ad.append(random.uniform(min(part_df[baro]), max(part_df[baro])))

In [None]:
#reg mw~cit,igv,baro
min_df = model_df[~model_df.index.isin(baseall_index)]
min_df = min_df[(min_df[mw]<130) &
                (min_df[mw]>60) & 
                  (min_df[cpd]>-2000) & 
                  (min_df[ctd]>300) & 
                  (min_df[igv]>-2000) & 
                  (min_df[exh]>-2000)]
#min_df = part_df[part_df[mw]<120]
x,y = min_df[[cit,igv,baro]].values.reshape(-1,3), min_df[mw].values.reshape(-1,1)
min21_mw = LinearRegression().fit(x, y)

# #reg ctd~cit,igv,baro
x,y = min_df[[cit,igv,baro]].values.reshape(-1,3), min_df[ctd].values.reshape(-1,1)
min21_ctd = LinearRegression().fit(x, y)

# #reg cpd~cit,igv,baro
x,y = min_df[[cit,igv,baro]].values.reshape(-1,3), min_df[cpd].values.reshape(-1,1)
min21_cpd = LinearRegression().fit(x, y)

In [None]:
# cit_ad = np.asanyarray(cit_ad).reshape(-1,1)
# baro_ad = np.asanyarray(baro_ad).reshape(-1,1)
# igv_ad = np.asanyarray(igv_ad).reshape(-1,1)

In [None]:
# # Augment cpd & ctd
# cpd_ad = min21_cpd.predict(
#     np.hstack((
#         cit_ad,
#         igv_ad,
#         baro_ad)
#     )).reshape(-1,1)+np.random.normal(loc=0.0, scale=1.5, size=(cit_ad.shape[0],1))
# ctd_ad = min21_ctd.predict(
#     np.hstack((
#         cit_ad,
#         igv_ad,
#         baro_ad))).reshape(-1,1)+np.random.normal(loc=0.0, scale=1.5, size=(cit_ad.shape[0],1))
# mw_ad = min21_mw.predict(
#     np.hstack((
#         cit_ad,
#         igv_ad,
#         baro_ad))).reshape(-1,1)+np.random.normal(loc=0.0, scale=1.5, size=(cit_ad.shape[0],1))

In [None]:
# #reg exh~baro,cit,igv,cpd,ctd
# x,y = min_df[[baro,cpd]].values, min_df[exh].values.reshape(-1,1)
# min21_exh = LinearRegression().fit(x, y)

In [None]:
# # Augment exh
# exh_ad = min21_exh.predict(
#     np.hstack((
#         baro_ad,
#         cpd_ad))).reshape(-1,1)+2*np.random.normal(loc=0.0, scale=1.5, size=(cit_ad.shape[0],1))
# exh_ad = np.where(exh_ad>1200,1200,exh_ad)

In [None]:
# partdf_add = pd.concat([pd.Series(igv_ad.reshape(-1,),name=igv),
#                        pd.Series(cit_ad.reshape(-1,),name=cit),
#                        pd.Series(mw_ad.reshape(-1,),name=mw),
#                        pd.Series(cpd_ad.reshape(-1,),name=cpd),
#                        pd.Series(ctd_ad.reshape(-1,),name=ctd),
#                        pd.Series(exh_ad.reshape(-1,),name=exh),                       
#                        pd.Series(baro_ad.reshape(-1,),name=baro)],axis=1)

In [None]:
# partdf_all = pd.merge(partdf_all,partdf_add,on = [igv,cit,mw,cpd,ctd,exh,baro],how = 'outer')

In [None]:
output_notebook()
t1 = figure(plot_width=350, plot_height=350,title="MW vs Temp", x_axis_label='Temp', 
            y_axis_label='MW')
t1.scatter(partdf_all[cit], partdf_all[mw])

t2 = figure(plot_width=350, plot_height=350,title="MW vs Temp", x_axis_label='Temp', 
            y_axis_label='MW')
t2.scatter(min_df[cit], min_df[mw])
t3 = figure(plot_width=350, plot_height=350,title="exh vs cpd", x_axis_label='cpd', 
            y_axis_label='exh')

t3.scatter(partdf_all[cpd], partdf_all[exh])

t4 = figure(plot_width=350, plot_height=350,title="fuel vs mw", x_axis_label='mw', 
            y_axis_label='fuel')

t4.scatter(partdf_all[mw], partdf_all[fuel])

show(t1)
show(t2)
show(t3)
show(t4)

# Part and Min Load models

In [None]:
X_comp,X_mw=partdf_all[[baro,cit,igv]], partdf_all[[cit,igv]]
X_exh=partdf_all[[igv,cpd]]
y_cpd,y_ctd =partdf_all[cpd],partdf_all[ctd]
y_mw,y_exh=partdf_all[mw],partdf_all[exh]

In [None]:
#p_cpd_= model(layers=(100,100,100),n_iter=500,patience=40)
p_cpd_= model_pol_pca()
p_cpd_= p_cpd_.fit(X_comp, y_cpd)

In [None]:
#p_ctd_= model(layers=(100,100,100),n_iter=500,patience=40)
p_ctd_= model_lm_pca()
p_ctd_= p_ctd_.fit(X_comp, y_ctd)

In [None]:
cpd_pred_p=p_cpd_.predict(X_comp)
ctd_pred_p=p_ctd_.predict(X_comp)

In [None]:
#p_mw_= model(layers=(100,100,100),n_iter=500,patience=40)
p_mw_= model_pol_pca()
p_mw_= p_mw_.fit(X_mw, y_mw)

In [None]:
#p_exh_= model(layers=(200,200,200),n_iter=1000,patience=80)
p_exh_= model_pol_pca()
p_exh_= p_exh_.fit(X_exh, y_exh)

In [None]:
part_df[part_df[mw]>=130]

In [None]:
ransac_fuel = RANSACRegressor(LinearRegression(),
                         max_trials=100, 
                         min_samples=50, 
                         loss='absolute_loss', 
                         residual_threshold=25, 
                         random_state=2301)
ransac_fuel=ransac_fuel.fit(part_df[[igv,mw]], part_df[fuel])
#fuel_part_pred=ransac_fuel.predict(base_df_clean[mw].values.reshape(-1,1))
inlier_mask = ransac_fuel.inlier_mask_
part_df=part_df.assign(normal_fuel = lambda im: inlier_mask)
sns.lmplot(x=mw, y=fuel, data=part_df,hue="normal_fuel", height=6,fit_reg=True,scatter_kws={"s": 25},
           line_kws={"color":"black","linewidth":4},ci=None)

In [None]:
X_fuel=part_df[part_df['normal_fuel']][[igv,mw]]
y_fuel=part_df[part_df['normal_fuel']][fuel]

In [None]:
p_fuel_=LinearRegression()
p_fuel_=p_fuel_.fit(X_fuel,y_fuel)

In [None]:
cpd_pred_p=p_cpd_.predict(X_comp)
ctd_pred_p=p_ctd_.predict(X_comp)

In [None]:
'''
creating a new plot with a title and axis labels. 
'''
p1 = figure(plot_width=350, plot_height=350,title="actual vs predicted CPD", x_axis_label='actual', 
            y_axis_label='predicted')

p2 = figure(plot_width=350, plot_height=350,title="actual vs predicted CtD", x_axis_label='actual', 
            y_axis_label='predicted')

p1.scatter(y_cpd, cpd_pred_p),p2.scatter(y_ctd, ctd_pred_p);
show(row(p1,p2))

In [None]:
citar=np.linspace(0,101,200).reshape(-1,1)
baroar=14.65*np.ones((200,1))
igvar=70*np.ones((200,1))
testin=np.hstack((baroar,citar,igvar))

In [None]:
igvar=np.linspace(45,86,60).reshape(-1,1)
baroar=14.65*np.ones((60,1))
citar=70*np.ones((60,1))
testin=np.hstack((baroar,citar,igvar))

In [None]:
cpd_test=p_cpd_.predict(testin)
'''
creating a new plot with a title and axis labels. 
'''
p1 = figure(plot_width=350, plot_height=350,title="CPD vs IGV", x_axis_label='IGV', 
            y_axis_label='predicted CPD')

p1.scatter(igvar.reshape(-1), cpd_test);
show(p1)

In [None]:
mw_pred_p=p_mw_.predict(X_mw)

In [None]:
'''
creating a new plot with a title and axis labels. 
'''
p1 = figure(plot_width=350, plot_height=350,title="actual vs predicted mw", x_axis_label='actual', 
            y_axis_label='predicted')

p1.scatter(y_mw, mw_pred_p);
show(p1)

In [None]:
exh_pred_p=p_exh_.predict(X_exh)

In [None]:
'''
creating a new plot with a title and axis labels. 
'''
p1 = figure(plot_width=350, plot_height=350,title="actual vs predicted exhaust", x_axis_label='actual', 
            y_axis_label='predicted')

p1.scatter(y_exh, exh_pred_p);
show(p1)

In [None]:
fuel_pred_p=p_fuel_.predict(X_fuel)

In [None]:
'''
creating a new plot with a title and axis labels. 
'''
p1 = figure(plot_width=350, plot_height=350,title="actual vs predicted fuel", x_axis_label='actual', 
            y_axis_label='predicted')

p1.scatter(y_fuel, fuel_pred_p);
show(p1)

In [None]:
# solver
'''
Part Load Models
'''
## Multi output random forest regression model of part cpd, ctd and mw
# x_train, y_train = partdf_all[[baro,cit,igv]], partdf_all[mw]
# p_mw_lm = LinearRegression()
# p_mw_lm = p_mw_lm.fit(x_train, y_train)

In [None]:
'''
X_comp,X_mw=partdf_all[[baro,cit,igv]], partdf_all[[baro,cit,igv,cpd,ctd,exh]]
X_exh, X_fuel=partdf_all[[baro,igv,cpd]], partdf_all[[igv,cpd,ctd,mw,exh]]
y_cpd,y_ctd =partdf_all[cpd],partdf_all[ctd]
y_mw,y_exh=partdf_all[mw],partdf_all[exh]
y_fuel=partdf_all[fuel]
'''

In [None]:
'''
Base 
X_comp,X_mw=train_set[[baro,cit]], train_set[[baro,cit,cpd,ctd,exh]]
X_exh, X_fuel=train_set[[baro,cpd]], train_set[[cpd,ctd,mw,exh]]
y_cpd,y_ctd =train_set[cpd],train_set[ctd]
y_mw,y_exh=train_set[mw],train_set[exh]
y_fuel=train_set[fuel]
'''

In [None]:
## write base model to pickle file

pkl = "../../../pickles/ctg4.pkl"

models = {  
            'base<cpd><baro|cit>'                        :cpd_,
            'base<ctd><baro|cit>'                        :ctd_,
            'base<exh_temp><cpd>'                   :exh_,        
            'base<mw><cit|cpd|ctd|exh_temp>'        :mw_,
            'base<fuel><mw>'                             :fuel_,
            'part<cpd><baro|cit|igv>'                    :p_cpd_,
             #The part model below is for solver as well
            'part<ctd><baro|cit|igv>'                    :p_ctd_,        
            'part<exh_temp><igv|cpd>'               :p_exh_,
            'part<mw><cit|igv>'                     :p_mw_,        
            'part<fuel><igv|mw>'                         :p_fuel_,
            #The part model below is for solver
            #'part<mw><baro|cit|igv>'            :p_mw_lm,
            'min2x1<igv><cit>'                           :min21_reg,
            'min1x1<igv><cit>'                           :min11_reg        
         }
with open(pkl, "wb") as f:
    joblib.dump(models, f)
    print(f'{f.name}')