In [None]:
'''
Data munging libraries

'''
import random

import numpy as np
import pandas as pd
import statsmodels.api as sm

import joblib
'''
Visualization Libraries

'''
import seaborn as sns
%matplotlib inline
#%matplotlib notebook
pd.set_option('display.max_columns', 100)
pd.set_option('display.precision', 2) 
from bokeh.plotting import figure,  show, gridplot
from bokeh.io import output_notebook

'''
ML libraries

'''

from sklearn.multioutput import MultiOutputRegressor
from sklearn.svm import SVR
from sklearn.preprocessing import StandardScaler
from sklearn.ensemble import RandomForestRegressor
from sklearn.linear_model import LinearRegression, RANSACRegressor
from sklearn.model_selection import GridSearchCV, train_test_split, cross_val_score;
from sklearn.model_selection import  train_test_split ;

# Define file name of model_data

In [None]:
file_name_model_data = "../model_data/ctg8_model_data.gzip"

In [None]:
model_df=pd.read_csv(file_name_model_data, compression='gzip' ,encoding="ISO-8859-2")

In [None]:
model_df.describe()

In [None]:
mw='GT8 Generator Watts Max Selected {Avg}'
igv='GT8 IGV angle in deg {Avg}'
fsr='GT8 Fuel Stroke Reference {Avg}'
fsrt='GT8 Temperature Control Fuel Stroke Reference {Avg}'
cit='GT8 Compressor Inlet Temperature {Avg}'
fuel='GT8 Fuel Energy Flow MMBTU'
cpd='GT8 Compressor Discharge Press Max Select {Avg}'
ctd='GT8 Compressor Discharge Temperature {Avg}'
baro='SITE AMBIENT CONDITIONS BARO PRESS XMTR {Avg}'
exh='GT8 Exhaust Temp Median Corrected By Average {Avg}'
std='GT8 Generator Watts Max Selected {StdDev}'

In [None]:
base_df=model_df[(model_df[mw]>100) &
                (model_df[igv]>84) &
                (model_df[std]<0.25)&
                (abs(model_df[fsr]-model_df[fsrt]<=0.1))]

In [None]:
# Remove outliers mw ~ cit
ransac_mw = RANSACRegressor(LinearRegression(),
                         max_trials=100, 
                         min_samples=50, 
                         loss='absolute_loss', 
                         residual_threshold=5, 
                         random_state=2301)

ransac_mw=ransac_mw.fit(base_df[cit].values.reshape(-1,1), base_df[mw].values)

mw_base_pred=ransac_mw.predict(base_df[cit].values.reshape(-1,1))

inlier_mask = ransac_mw.inlier_mask_

base_df=base_df.assign(normal_mw = lambda im: inlier_mask)

sns.lmplot(x=cit, y=mw, data=base_df,hue="normal_mw", height=8,fit_reg=True,scatter_kws={"s": 25},
           line_kws={"color":"black","linewidth":4},ci=None)
base_df_clean = base_df[base_df["normal_mw"]]

In [None]:
## Data Augamentation
x=base_df_clean[[baro,cit]]
x=sm.add_constant(x)

In [None]:
# LR of GT MW
lm_mw = sm.OLS(base_df_clean[mw], x).fit()
lm_mw.summary()

In [None]:
#Data Augamentation - expand cit & baro
a = list(range(92,111))
b = list(range(1,31))
a = np.hstack((a,b))
cit_ad = a+np.random.randn(4,1) #add 20 points per degree
cit_ad = [x for sublist in cit_ad for x in sublist]
baro_ad = []
#rh_ad =[]
for i in range(len(cit_ad)):
    baro_ad.append(random.uniform(min(base_df_clean[baro]), max(base_df_clean[baro])))
#    rh_ad.append(random.uniform(min(base_df_clean[rh]), max(base_df_clean[rh])))


In [None]:
baro_ad,cit_ad = pd.Series(baro_ad),pd.Series(cit_ad)
new_x = pd.DataFrame([baro_ad, cit_ad])
new_x = new_x.T
new_x.columns =  [baro,cit]

# predict mw based on extended baro and cit
new_x = sm.add_constant(new_x)
exp_mw = lm_mw.predict(new_x)#+np.random.randn(len(new_x),1).reshape(len(new_x),)

In [None]:
#merge extended baro, cit, mw
new_df = pd.concat([new_x,exp_mw],axis=1)
new_df.rename(columns = {0:mw},inplace=True)
new_df.describe()

In [None]:
## Remove outlier for ctd vs temp
ransac_mw = RANSACRegressor(LinearRegression(),
                         max_trials=100, 
                         min_samples=50, 
                         loss='absolute_loss', 
                         residual_threshold=7, 
                         random_state=2301)
ransac_mw=ransac_mw.fit(base_df_clean[cit].values.reshape(-1,1), base_df_clean[ctd].values)
mw_base_pred=ransac_mw.predict(base_df_clean[ctd].values.reshape(-1,1))
inlier_mask = ransac_mw.inlier_mask_
base_df_clean=base_df_clean.assign(normal_ctd = lambda im: inlier_mask)
sns.lmplot(x=cit, y=ctd, data=base_df_clean,hue="normal_ctd", height=6,fit_reg=True,scatter_kws={"s": 25},
           line_kws={"color":"black","linewidth":4},ci=None);

In [None]:
## Augment ctd
base_df_ctd = base_df_clean[base_df_clean["normal_ctd"]]
x = np.array(base_df_ctd[[cit,baro]]).reshape(-1,2)
y = np.array(base_df_ctd[ctd]).reshape(-1,1)
lm_ctd = LinearRegression()
lm_ctd.fit(x,y)
new_x = pd.DataFrame([cit_ad,baro_ad])
new_x = new_x.T
new_x.columns =  [cit,baro]

ctd_aug = lm_ctd.predict(new_x)

In [None]:
## Remove outlier for cpd vs temp
ransac_mw = RANSACRegressor(LinearRegression(),
                         max_trials=100, 
                         min_samples=50, 
                         loss='absolute_loss', 
                         residual_threshold=5, 
                         random_state=2301)
ransac_mw=ransac_mw.fit(base_df_clean[cit].values.reshape(-1,1), base_df_clean[cpd].values)
mw_base_pred=ransac_mw.predict(base_df_clean[cpd].values.reshape(-1,1))
inlier_mask = ransac_mw.inlier_mask_
base_df_clean=base_df_clean.assign(normal_cpd = lambda im: inlier_mask)
sns.lmplot(x=cit, y=cpd, data=base_df_clean,hue="normal_cpd", height=6,fit_reg=True,scatter_kws={"s": 25},
           line_kws={"color":"black","linewidth":4},ci=None)

In [None]:
## Augment cpd
base_df_cpd = base_df_clean[base_df_clean["normal_cpd"]]
x = np.array(base_df_ctd[[cit,baro]]).reshape(-1,2)
y = np.array(base_df_ctd[cpd]).reshape(-1,1)
lm_cpd = LinearRegression()
lm_cpd.fit(x,y)

new_x = pd.DataFrame([cit_ad,baro_ad])
new_x = new_x.T
new_x.columns =  [cit,baro]
                         
cpd_aug = lm_cpd.predict(new_x)

In [None]:
sns.lmplot(x=cpd, y=exh, data=base_df_cpd,height=6,fit_reg=True,scatter_kws={"s": 25},
           line_kws={"color":"black","linewidth":4},ci=None);

In [None]:
## Add augmented cpd and ctd to data
new_df[cpd],new_df[ctd] = cpd_aug,ctd_aug


In [None]:
#Augment exh
x = np.array(base_df_clean[cpd]).reshape(-1,1)
y = np.array(base_df_clean[exh]).reshape(-1,1)
lm_exh = LinearRegression()
lm_exh.fit(x,y)

exh_aug = lm_exh.predict(np.array(cpd_aug).reshape(-1,1))#.reshape(-1,1)

## Add augmented exh to data
new_df[exh] = exh_aug
#new_df[exh]=new_df[exh].apply(lambda x: 1200 if x>1200 else x)
sns.lmplot(x=cpd, y=exh, data=new_df,height=6,fit_reg=False,scatter_kws={"s": 25},
           line_kws={"color":"black","linewidth":4},ci=None);

#Augment exh
x = np.array(base_df_clean[cpd]).reshape(-1,1)
x = sm.add_constant(x)
y = np.array(base_df_clean[exh]).reshape(-1,1)
lm_exh = LinearRegression()
lm_exh.fit(x,y)

exh_aug = lm_exh.predict(np.hstack((np.array(cpd_aug).reshape(-1,1),np.ones(len(cpd_aug)).reshape(-1,1))))

## Add augmented exh to data
new_df[exh] = exh_aug
new_df[exh]=new_df[exh].apply(lambda x: 1200 if x>1200 else x)

In [None]:
#Augment fuel
x = np.array(base_df_clean[mw]).reshape(-1,1)
y = np.array(base_df_clean[fuel]).reshape(-1,1)
lm_f = LinearRegression()
lm_f.fit(x,y)

fuel_aug = lm_f.predict(new_df[mw].values.reshape(-1,1))

## Add augmented exh to data
new_df[fuel] = fuel_aug
sns.lmplot(x=mw, y=fuel, data=new_df,height=6,fit_reg=False,scatter_kws={"s": 25},
           line_kws={"color":"black","linewidth":4},ci=None);

In [None]:
base_df_clean[[baro,cit,ctd,cpd,mw,exh,fuel]]=base_df_clean[[baro,cit,ctd,cpd,mw,exh,fuel]].astype(float)

In [None]:
# Add all augmented data to original df
base_df_all = pd.merge(base_df_clean,new_df,on = [baro,cit,ctd,cpd,mw,exh,fuel],how = 'outer')

In [None]:
output_notebook()
t1 = figure(plot_width=350, plot_height=350,title="MW vs Temp", x_axis_label='Temp', 
            y_axis_label='MW')
t1.scatter(base_df_all[cit], base_df_all[mw])

t2 = figure(plot_width=350, plot_height=350,title="MW vs Temp", x_axis_label='Temp', 
            y_axis_label='MW')
t2.scatter(base_df_clean[cit], base_df_clean[mw])


show(t1)
show(t2)

In [None]:
sns.lmplot(x=cpd, y=exh, data=base_df_all,height=6,fit_reg=False,scatter_kws={"s": 25},
           line_kws={"color":"black","linewidth":4},ci=None);

In [None]:
sns.lmplot(x=mw, y=fuel, data=base_df_all,height=6,fit_reg=False,scatter_kws={"s": 25},
           line_kws={"color":"black","linewidth":4},ci=None);

# Model


In [None]:

train_set, test_set = base_df_all, base_df_all

In [None]:
## Multi output random forest regression of cpd and ctd
x_train, y_train = train_set[[baro,cit]], train_set[[cpd,ctd]]
#cpd_ctd_ranf = RandomForestRegressor(max_depth=10,n_estimators=100,random_state = 6567)
cpd_ctd_ranf = LinearRegression()
#cpd_ctd_reg = MultiOutputRegressor(cpd_ctd_ranf).fit(x_train, y_train)
cpd_ctd_reg = cpd_ctd_ranf.fit(x_train, y_train)

In [None]:
# residual graph of cpd, ctd
x_test, y_test = test_set[[baro,cit]], test_set[[cpd,ctd]]
df = cpd_ctd_reg.predict(x_test)
cpd_err = y_test.iloc[:,0] - df[:,0]
ctd_err = y_test.iloc[:,1] - df[:,1]

t1 = figure(plot_width=350, plot_height=350,title="Residual Plot", x_axis_label='Index', 
            y_axis_label='Error')
t2 = figure(plot_width=350, plot_height=350,title="Residual Plot", x_axis_label='Index', 
            y_axis_label='Error')
t1.scatter(list(range(len(cpd_err))), cpd_err)
t2.scatter(list(range(len(ctd_err))), ctd_err)
output_notebook()
show(gridplot([[t1,t2]]))
print(np.sqrt(np.var(cpd_err)), np.sqrt(np.var(ctd_err)))

In [None]:
## Predict base load with predicted CPD CTD only to be used with neural networks and Random forest. 
## For now we will use Linear Regressions only
#x_train = train_set[[baro,cit]]
#df1 = cpd_ctd_reg.predict(x_train)
#cpd_pred = df1[:,0]
#ctd_pred = df1[:,1]
#x_train, y_train = pd.concat([train_set[[baro,cit]],pd.Series(cpd_pred),
#                              pd.Series(ctd_pred)],axis=1,ignore_index = True), train_set[mw]

#ctg_ranf = RandomForestRegressor(max_depth=10,n_estimators=50,random_state=6567)
x_train, y_train = train_set[[baro,cit]], train_set[mw]
ctg_ranf = LinearRegression()
ctg_ranf.fit(x_train,y_train)
base_mw_pred = ctg_ranf.predict(x_train)

In [None]:
err = base_mw_pred - y_train
output_notebook()
t1 = figure(plot_width=350, plot_height=350,title="Residual Plot", x_axis_label='Index', 
            y_axis_label='Error')

t1.scatter(list(range(len(err))), err)


show(t1)
print(np.sqrt(np.var(err)))

In [None]:
sns.lmplot(x=baro, y=cpd, data=base_df_all,height=6,fit_reg=False,scatter_kws={"s": 25},
           line_kws={"color":"black","linewidth":4},ci=None);

In [None]:
## Predict exhaust with actual cpd & ctd
#trainexh = train_set[~train_set[exh].isna()]
#trainexh = trainexh.reset_index(drop = True)
x_train, y_train = train_set[[baro,cpd]], train_set[exh]
exh_ranf = LinearRegression()
#exh_ranf = RandomForestRegressor(max_depth=20,n_estimators=200,random_state=6567)
exh_ranf.fit(x_train,y_train)
#rf_scores = cross_val_score(exh_ranf,x_train,y_train,scoring="neg_mean_squared_error",cv=10)
#rf_rmse = np.sqrt(-rf_scores)
#rf_rmse

In [None]:
#exh_ranf.feature_importances_

In [None]:
base_exh_pred = exh_ranf.predict(x_train)
err = base_exh_pred - y_train
output_notebook()
t1 = figure(plot_width=350, plot_height=350,title="Residual Plot", x_axis_label='Index', 
            y_axis_label='Error')

t1.scatter(list(range(len(err))), err)


show(t1)
print(np.sqrt(np.var(err)))

In [None]:
## Predict Fuel
#trainf = train_set[~train_set[gtfuel].isna()]
#trainf = trainf.reset_index(drop = True)
x_train, y_train = np.asanyarray( train_set[mw]).reshape(-1,1), np.asarray(train_set[fuel])
#fuel_ranf = RandomForestRegressor(max_depth=10,n_estimators=150,random_state=6567)
fuel_ranf = LinearRegression()
fuel_ranf.fit(x_train, y_train)

In [None]:
base_fuel_pred = fuel_ranf.predict(x_train)
err = base_fuel_pred - y_train

In [None]:

output_notebook()
t1 = figure(plot_width=350, plot_height=350,title="Residual Plot", x_axis_label='Index', 
            y_axis_label='Error')

t1.scatter(list(range(len(err))), err)


show(t1)
print(np.sqrt(np.var(err)))

# Part Load Model

In [None]:
## Part Load Modeling
base_df_index = base_df.index.tolist()
part_df = model_df[~model_df.index.isin(base_df_index)]

In [None]:
part_df = part_df[(part_df[mw]>60) & 
                  (part_df[std]<0.5)&
                  (part_df[cpd]>50) & 
                  (part_df[ctd]>50) & 
                  (part_df[igv]>30) & 
                  (part_df[exh]>50)]

In [None]:
'''
## Augment data
a = list(range(1,31))
maxigv = max(part_df[igv])
minigv = min(part_df[igv])
igv_ad = list(range(int(minigv), int(maxigv)))*len(a)
cit_ad = sorted(a*len(list(range(int(minigv), int(maxigv)))))

#cit_ad = a+np.random.randn(4,1) #add 20 points per degree
#cit_ad = [x for sublist in cit_ad for x in sublist]
baro_ad = []
#rh_ad =[]
for i in range(len(cit_ad)):
    baro_ad.append(random.uniform(min(part_df[baro]), max(part_df[baro])))
#    rh_ad.append(random.uniform(min(base_df_clean[rh]), max(base_df_clean[rh])))

'''

In [None]:
'''
x_ad = pd.DataFrame([cit_ad,igv_ad,baro_ad])
x_ad = x_ad.T
x_ad.columns = [cit,igv,baro]
'''

In [None]:
#reg mw~cit,igv,baro

x,y = part_df[[baro,cit,igv]].values, part_df[mw].values.reshape(-1,1)
plm_mw = LinearRegression().fit(x, y)

#reg ctd~cit,igv,baro
x,y = part_df[[baro,cit,igv]].values, part_df[ctd].values.reshape(-1,1)
plm_ctd = LinearRegression().fit(x, y)

#reg cpd~cit,igv,baro
x,y = part_df[[baro,cit,igv]].values, part_df[cpd].values.reshape(-1,1)
plm_cpd = LinearRegression().fit(x, y)

In [None]:
pmw_pred=plm_mw.predict(x)
output_notebook()
t1 = figure(plot_width=350, plot_height=350,title="actual vs model", x_axis_label='actual', 
            y_axis_label='model')

t1.scatter(part_df[mw].values, pmw_pred.reshape(-1))


show(t1)

In [None]:
'''
# Augment cpd & ctd
p_cpd_ad = plm_cpd.predict(x_ad).reshape(-1,1)
p_ctd_ad = plm_ctd.predict(x_ad).reshape(-1,1)
p_mw_ad = plm_mw.predict(x_ad).reshape(-1,1)
'''

In [None]:
#reg exh~igv,cpd
x,y = part_df[[baro,igv,cpd]].values, part_df[exh].values.reshape(-1,1)
plm_exh = LinearRegression().fit(x, y)
# Augment exh
#p_exh_ad = plm_exh.predict(np.hstack((np.asarray(igv_ad).reshape(-1,1), 
#                                        np.asarray(p_cpd_ad).reshape(-1,1))).reshape(-1,2)).reshape(-1,1)

In [None]:
#reg fuel~igv,mw
x,y = part_df[[igv,mw]].values.reshape(-1,2), part_df[fuel].values.reshape(-1,1)
plm_fuel = LinearRegression().fit(x, y)

# Augment fuel
#p_fuel_ad = plm_fule.predict(np.hstack((np.asarray(igv_ad).reshape(-1,1), 
#                                        np.asarray(p_mw_ad).reshape(-1,1))).reshape(-1,2)).reshape(-1,1)

In [None]:
'''
partdf_ad = pd.concat([pd.Series(igv_ad,name=igv),
                       pd.Series(cit_ad,name=cit),
                       pd.Series(p_mw_ad.reshape(-1,),name=mw),
                       pd.Series(p_cpd_ad.reshape(-1,),name=cpd),
                       pd.Series(p_ctd_ad.reshape(-1,),name=ctd),
                       pd.Series(p_exh_ad.reshape(-1,),name=exh),
                       pd.Series(p_fuel_ad.reshape(-1,),name=fuel),
                       pd.Series(baro_ad,name=baro)],axis=1).astype(float)
'''

In [None]:
#partdf_all = pd.merge(part_df,partdf_ad,on = [igv,cit,mw,cpd,ctd,exh,fuel,baro],how = 'outer')
partdf_all=part_df

In [None]:
output_notebook()
t1 = figure(plot_width=350, plot_height=350,title="MW vs Temp", x_axis_label='Temp', 
            y_axis_label='MW')
t1.scatter(partdf_all[cit], partdf_all[mw])

t2 = figure(plot_width=350, plot_height=350,title="MW vs Temp", x_axis_label='Temp', 
            y_axis_label='MW')
t3 = figure(plot_width=350, plot_height=350,title="Exh vs CPD", x_axis_label='CPD', 
            y_axis_label='Exh')

t2.scatter(partdf_all[cit], partdf_all[mw])
t3.scatter(partdf_all[cpd],partdf_all[exh])


show(t1)
show(t2)
show(t3)

In [None]:
## Multi output regression model of part cpd, ctd and mw
x_train, y_train = partdf_all[[baro,cit,igv]], partdf_all[[cpd, ctd, mw]]
p_cpd_ctd_mw_lm = LinearRegression()
#p_cpd_ctd_mw_reg = MultiOutputRegressor(p_cpd_ctd_mw_lm).fit(x_train, y_train)
p_cpd_ctd_mw_reg = p_cpd_ctd_mw_lm.fit(x_train, y_train)

In [None]:
# residual graph of cpd, ctd
x_test, y_test = partdf_all[[baro,cit,igv]], partdf_all[[cpd, ctd, mw]]
df = p_cpd_ctd_mw_reg.predict(x_test)
cpd_err = y_test.iloc[:,0] - df[:,0]
ctd_err = y_test.iloc[:,1] - df[:,1]
mw_err = y_test.iloc[:,2] - df[:,2]
t1 = figure(plot_width=300, plot_height=300,title="Residual Plot", x_axis_label='Index', 
            y_axis_label='Error')
t2 = figure(plot_width=300, plot_height=300,title="Residual Plot", x_axis_label='Index', 
            y_axis_label='Error')
t3 = figure(plot_width=300, plot_height=300,title="Residual Plot", x_axis_label='Index', 
            y_axis_label='Error')

t1.scatter(list(range(len(cpd_err))), cpd_err)
t2.scatter(list(range(len(ctd_err))), ctd_err)
t3.scatter(list(range(len(ctd_err))), ctd_err)

output_notebook()
show(gridplot([[t1,t2,t3]]))
print(np.sqrt(np.var(cpd_err)), np.sqrt(np.var(ctd_err)),np.sqrt(np.var(mw_err)))
print(np.mean(np.abs((y_test.iloc[:,0] - df[:,0]) / y_test.iloc[:,0]))*100,
      np.mean(np.abs((y_test.iloc[:,1] - df[:,1]) / y_test.iloc[:,1]))*100,
     np.mean(np.abs((y_test.iloc[:,2] - df[:,2]) / y_test.iloc[:,2]))*100)

In [None]:
## Predict part exhaust temp with all original data
x_train, y_train = partdf_all[[baro,igv,cpd]], partdf_all[exh]
p_exh_lm = LinearRegression()
p_exh_lm.fit(x_train,y_train)

In [None]:
# residual graph of exh
x_test, y_test = partdf_all[[baro,igv,cpd]], partdf_all[exh]
df = p_exh_lm.predict(x_test)
exh_err = y_test - df
t1 = figure(plot_width=300, plot_height=300,title="Residual Plot", x_axis_label='Index', 
            y_axis_label='Error')

t1.scatter(list(range(len(exh_err))), exh_err)
output_notebook()
show(t1)
print(np.sqrt(np.var(exh_err)))

In [None]:
'''
x_train, y_train = partdf_all[[igv,cpd]], partdf_all[exh]
p_exh_reg = LinearRegression()
p_exh_reg.fit(x_train,y_train)
'''

In [None]:
'''
# residual graph of exh
x_test, y_test = partdf_all[[igv,cpd]], partdf_all[exh]
df = p_exh_reg.predict(x_test)
exh_err = y_test - df
t1 = figure(plot_width=300, plot_height=300,title="Residual Plot", x_axis_label='Index', 
            y_axis_label='Error')

t1.scatter(list(range(len(exh_err))), exh_err)

t2 = figure(plot_width=300, plot_height=300,title="Pred vs Actua", x_axis_label='actual', 
            y_axis_label='pred')

t1.scatter(list(range(len(exh_err))), exh_err)
t2.scatter(y_test, df)
output_notebook()
show(t1)
show(t2)
print(np.sqrt(np.var(exh_err)))
'''

In [None]:
## Predict Fuel
x_train, y_train = partdf_all[[igv,mw]], partdf_all[fuel]
p_fuel_lm = LinearRegression()
p_fuel_lm.fit(x_train, y_train)

In [None]:
# residual graph of Fuel
x_test, y_test = partdf_all[[igv,mw]], partdf_all[fuel]
df = p_fuel_lm.predict(x_test)
fuel_err = y_test - df
t1 = figure(plot_width=300, plot_height=300,title="Residual Plot", x_axis_label='Index', 
            y_axis_label='Error')

t1.scatter(list(range(len(fuel_err))), fuel_err)
output_notebook()
show(t1)
print(np.sqrt(np.var(fuel_err)))

# Min Load

In [None]:
#min_df = pd.read_csv("../model_data/ctg8_min_load_data.csv", encoding="utf-16",sep='\t')

In [None]:
'''
## Predict min mw
x_train, y_train = min_df[cit].values.reshape(-1,1), min_df[mw].values.reshape(-1,1)
min_ranf = RandomForestRegressor(max_depth=20,n_estimators=300)
min_ranf.fit(x_train, y_train)
y_pred = min_ranf.predict(x_train)
'''

In [None]:
min21 = pd.read_csv("../model_data/ctg8_2x1_min.csv",encoding="utf-16",sep='\t')
min11 = pd.read_csv("../model_data/ctg8_1x1_min.csv",encoding="utf-16",sep='\t')

In [None]:
# min 2x1 reg igv~cit
x,y = min21[cit].values.reshape(-1,1), min21[igv].values.reshape(-1,1)
min21_reg = LinearRegression().fit(x, y)

In [None]:
# min 1x1 reg igv~cit
x,y = min11[cit].values.reshape(-1,1), min11[igv].values.reshape(-1,1)
min11_reg = LinearRegression().fit(x, y)

In [None]:
#Augment igv
a = list(range(0,61))
cit_ad = a+np.random.randn(2,1) #add 20 points per degree
cit_ad = [x for sublist in cit_ad for x in sublist]
igv_ad1 = min11_reg.predict(np.asanyarray(cit_ad).reshape(-1,1))
igv_ad2 = min21_reg.predict(np.asanyarray(cit_ad).reshape(-1,1))
igv_ad = np.vstack((igv_ad1,igv_ad2))
cit_ad = cit_ad*2
baro_ad = []
#rh_ad =[]
for i in range(len(igv_ad)):
    baro_ad.append(random.uniform(min(part_df[baro]), max(part_df[baro])))

In [None]:
#reg mw~cit,igv,baro
min_df = model_df[~model_df.index.isin(base_df_index)]
min_df = min_df[(min_df[mw]<120) &
                (min_df[mw]>60) & 
                  (min_df[std]<2)&
                  (min_df[cpd]>-2000) & 
                  (min_df[ctd]>300) & 
                  (min_df[igv]>-2000) & 
                  (min_df[exh]>-2000)]
#min_df = part_df[part_df[mw]<120]
x,y = min_df[[cit,igv,baro]].values.reshape(-1,3), min_df[mw].values.reshape(-1,1)
min21_mw = LinearRegression().fit(x, y)

#reg ctd~cit,igv,baro
x,y = min_df[[cit,igv,baro]].values.reshape(-1,3), min_df[ctd].values.reshape(-1,1)
min21_ctd = LinearRegression().fit(x, y)

#reg cpd~cit,igv,baro
x,y = min_df[[cit,igv,baro]].values.reshape(-1,3), min_df[cpd].values.reshape(-1,1)
min21_cpd = LinearRegression().fit(x, y)

In [None]:
'''
cit_ad = np.asanyarray(cit_ad).reshape(-1,1)
baro_ad = np.asanyarray(baro_ad).reshape(-1,1)
igv_ad = np.asanyarray(igv_ad).reshape(-1,1)
'''

In [None]:
'''
# Augment cpd & ctd
cpd_ad = min21_cpd.predict(np.hstack((cit_ad, igv_ad, baro_ad))).reshape(-1,1)
ctd_ad = min21_ctd.predict(np.hstack((cit_ad, igv_ad, baro_ad))).reshape(-1,1)
mw_ad = min21_mw.predict(np.hstack((cit_ad, igv_ad, baro_ad))).reshape(-1,1)
'''

In [None]:
#reg exh~baro,cit,igv,cpd,ctd
x,y = min_df[[baro,igv,cpd]].values, min_df[exh].values.reshape(-1,1)
min21_exh = LinearRegression().fit(x, y)

In [None]:
'''
# Augment exh
exh_ad = min21_exh.predict(np.hstack((baro_ad,cit_ad, igv_ad,cpd_ad,ctd_ad)).reshape(-1,5)).reshape(-1,1)
exh_ad = np.where(exh_ad>1200,1200,exh_ad)
'''

In [None]:
#reg fule~igv,mw
x,y = min_df[[igv,mw]].values, min_df[fuel].values.reshape(-1,1)
min21_fuel = LinearRegression().fit(x, y)

# Augment fuel
#fuel_ad = min21_fuel.predict(np.hstack((igv_ad, mw_ad)).reshape(-1,2)).reshape(-1,1)

In [None]:
'''
partdf_add = pd.concat([pd.Series(igv_ad.reshape(-1,),name=igv),
                       pd.Series(cit_ad.reshape(-1,),name=cit),
                       pd.Series(mw_ad.reshape(-1,),name=mw),
                       pd.Series(cpd_ad.reshape(-1,),name=cpd),
                       pd.Series(ctd_ad.reshape(-1,),name=ctd),
                       pd.Series(exh_ad.reshape(-1,),name=exh),
                       pd.Series(fuel_ad.reshape(-1,),name=fuel),
                       pd.Series(baro_ad.reshape(-1,),name=baro)],axis=1)
'''

In [None]:
#partdf_all = pd.merge(partdf_all,partdf_add,on = [igv,cit,mw,cpd,ctd,exh,fuel,baro],how = 'outer')

In [None]:
output_notebook()
t1 = figure(plot_width=350, plot_height=350,title="MW vs Temp", x_axis_label='Temp', 
            y_axis_label='MW')
t1.scatter(partdf_all[cit], partdf_all[mw])

t2 = figure(plot_width=350, plot_height=350,title="MW vs Temp", x_axis_label='Temp', 
            y_axis_label='MW')
t2.scatter(min_df[cit], min_df[mw])
t3 = figure(plot_width=350, plot_height=350,title="exh vs cpd", x_axis_label='cpd', 
            y_axis_label='exh')

t3.scatter(partdf_all[cpd], partdf_all[exh])

t4 = figure(plot_width=350, plot_height=350,title="fuel vs mw", x_axis_label='mw', 
            y_axis_label='fuel')

t4.scatter(partdf_all[mw], partdf_all[fuel])

show(t1)
show(t2)
show(t3)
show(t4)

In [None]:
output_notebook()
t1 = figure(plot_width=350, plot_height=350,title="MW vs Temp", x_axis_label='Temp', 
            y_axis_label='MW')
t1.scatter(min_df[cit], min_df[mw])

t3 = figure(plot_width=350, plot_height=350,title="exh vs cpd", x_axis_label='cpd', 
            y_axis_label='exh')

t3.scatter(min_df[cpd], min_df[exh])

t4 = figure(plot_width=350, plot_height=350,title="fuel vs mw", x_axis_label='mw', 
            y_axis_label='fuel')

t4.scatter(min_df[mw], min_df[fuel])

show(t1)
#show(t2)
show(t3)
show(t4)

In [None]:
'''
Part Load Models
'''
## Multi output random forest regression model of part cpd, ctd and mw
x_train, y_train = partdf_all[[baro,cit,igv]], partdf_all[[cpd, ctd, mw]]
p_cpd_ctd_mw_lm = LinearRegression()
#p_cpd_ctd_mw_reg = MultiOutputRegressor(p_cpd_ctd_mw_lm).fit(x_train, y_train)
p_cpd_ctd_mw_reg = p_cpd_ctd_mw_lm.fit(x_train, y_train)
p_mw_lm=LinearRegression()
p_mw_lm=p_mw_lm.fit(x_train,partdf_all[mw])

In [None]:
'''
Part Load Exhaust Models
'''
## Multi output random forest regression model of part cpd, ctd and mw
x_train, y_train = partdf_all[[baro,igv,cpd]], partdf_all[exh]
p_exh_lm = LinearRegression()
p_exh_lm = p_exh_lm.fit(x_train, y_train)

In [None]:
'''
Part Load Fuel Models
'''
## Multi output random forest regression model of part cpd, ctd and mw
x_train, y_train = partdf_all[[igv,mw]], partdf_all[fuel]
p_fuel_lm = LinearRegression()
p_fuel_lm= p_fuel_lm.fit(x_train, y_train)

In [None]:
'''
Min Load Models
'''
## Multi output random forest regression model of part cpd, ctd and mw
x_train, y_train = min_df[[baro,cit,igv]], min_df[[cpd, ctd, mw]]
m_cpd_ctd_mw_lm = LinearRegression()
#m_cpd_ctd_mw_reg = MultiOutputRegressor(p_cpd_ctd_mw_lm).fit(x_train, y_train)
m_cpd_ctd_mw_reg = p_cpd_ctd_mw_lm.fit(x_train, y_train)

In [None]:
'''
min Load Exhaust Models
'''
## Multi output random forest regression model of part cpd, ctd and mw
x_train, y_train = min_df[[baro,igv,cpd]], min_df[exh]
m_exh_lm = LinearRegression()
m_exh_lm = m_exh_lm.fit(x_train, y_train)

In [None]:
'''
Min Fuel Models
'''
## Multi output random forest regression model of part cpd, ctd and mw
x_train, y_train = min_df[[igv,mw]], min_df[fuel]
m_fuel_lm = LinearRegression()
m_fuel_lm = m_fuel_lm.fit(x_train, y_train)

In [None]:
# solver
'''
Part Load Models
'''
## Multi output random forest regression model of part cpd, ctd and mw
x_train, y_train = partdf_all[[baro,cit,igv]], partdf_all[mw]
p_mw_lm = LinearRegression()
p_mw_lm = p_mw_lm.fit(x_train, y_train)

In [None]:
## write base model to pickle file

pkl = "../../../pickles/ctg8.pkl"

models = {  
            'base<cpd|ctd><baro|cit>'           :cpd_ctd_reg,
            'base<mw><baro|cit>'                :ctg_ranf,
            'base<exh_temp><baro|cpd>'          :exh_ranf,
            'base<fuel><mw>'                    :fuel_ranf,
            'part<cpd|ctd|mw><baro|cit|igv>'    :p_cpd_ctd_mw_reg,
            'part<exh_temp><baro|igv|cpd>'      :p_exh_lm,
            'part<fuel><igv|mw>'                :p_fuel_lm,
            #The part model below is for solver
            'part<mw><baro|cit|igv>'            :p_mw_lm,
            'min2x1<igv><cit>'                  :min21_reg,
            'min1x1<igv><cit>'                  :min11_reg,
            'min<cpd|ctd|mw><baro|cit|igv>'     :m_cpd_ctd_mw_reg,
            'min<exh_temp><baro|igv|cpd>'       :m_exh_lm,
            'min<fuel><igv|mw>'                 :m_fuel_lm,         
         }
with open(pkl, "wb") as f:
    joblib.dump(models, f)
    print(f'{f.name}')