# Gas-Turbine-Flask Predictions


In [1]:
import warnings
warnings.filterwarnings('ignore')
import numpy as np
import pandas as pd 
import matplotlib.pyplot as plt
import seaborn as sns 
from mlxtend.plotting import plot_decision_regions

In [2]:
from sklearn.model_selection import train_test_split,cross_val_score
from sklearn.linear_model import LinearRegression,SGDRegressor
from sklearn.metrics import r2_score,mean_absolute_error
from sklearn.tree import DecisionTreeRegressor
from sklearn.ensemble import RandomForestRegressor,VotingRegressor,BaggingRegressor

In [3]:
Ordata = pd.read_csv('datasets/gt_full.csv')
Ordata.sample(4)

Unnamed: 0,AT,AP,AH,AFDP,GTEP,TIT,TAT,TEY,CDP,CO,NOX
16779,11.797,1008.5,84.482,2.6308,19.011,1051.7,548.96,109.13,10.391,6.6984,97.071
5134,23.054,1014.8,80.945,3.1697,20.136,1060.7,550.03,112.17,10.641,2.3776,52.171
18685,25.446,1004.1,68.23,4.1564,25.99,1092.9,549.98,135.28,12.139,5.6531,62.823
9495,24.141,1014.6,53.273,5.1039,25.611,1091.6,549.61,133.7,12.077,1.2115,74.174


In [3]:
Ordata.isnull().sum()

AT      0
AP      0
AH      0
AFDP    0
GTEP    0
TIT     0
TAT     0
TEY     0
CDP     0
CO      0
NOX     0
dtype: int64

In [5]:
Ordata.describe().T

Unnamed: 0,count,mean,std,min,25%,50%,75%,max
AT,36733.0,17.712726,7.447451,-6.2348,11.781,17.801,23.665,37.103
AP,36733.0,1013.070165,6.463346,985.85,1008.8,1012.6,1017.0,1036.6
AH,36733.0,77.867015,14.461355,24.085,68.188,80.47,89.376,100.2
AFDP,36733.0,3.925518,0.773936,2.0874,3.3556,3.9377,4.3769,7.6106
GTEP,36733.0,25.563801,4.195957,17.698,23.129,25.104,29.061,40.716
TIT,36733.0,1081.428084,17.536373,1000.8,1071.8,1085.9,1097.0,1100.9
TAT,36733.0,546.158517,6.84236,511.04,544.72,549.88,550.04,550.61
TEY,36733.0,133.506404,15.618634,100.02,124.45,133.73,144.08,179.5
CDP,36733.0,12.060525,1.088795,9.8518,11.435,11.965,12.855,15.159
CO,36733.0,2.372468,2.262672,0.000388,1.1824,1.7135,2.8429,44.103


In [13]:
print(Ordata.min())
print("\n")
print(Ordata.max())

AT        -6.234800
AP       985.850000
AH        24.085000
AFDP       2.087400
GTEP      17.698000
TIT     1000.800000
TAT      511.040000
TEY      100.020000
CDP        9.851800
CO         0.000388
NOX       25.905000
dtype: float64


AT        37.1030
AP      1036.6000
AH       100.2000
AFDP       7.6106
GTEP      40.7160
TIT     1100.9000
TAT      550.6100
TEY      179.5000
CDP       15.1590
CO        44.1030
NOX      119.9100
dtype: float64


In [14]:
for i in range(0,11):
    print(Ordata.iloc[:,i:i+1].skew())
    print("-------------------------")

AT   -0.043547
dtype: float64
-------------------------
AP    0.194121
dtype: float64
-------------------------
AH   -0.628034
dtype: float64
-------------------------
AFDP    0.381097
dtype: float64
-------------------------
GTEP    0.329021
dtype: float64
-------------------------
TIT   -0.888278
dtype: float64
-------------------------
TAT   -1.755907
dtype: float64
-------------------------
TEY    0.116555
dtype: float64
-------------------------
CDP    0.236792
dtype: float64
-------------------------
CO    4.838174
dtype: float64
-------------------------
NOX    1.026779
dtype: float64
-------------------------


In [15]:
Ordata.shape

(36733, 11)

In [4]:
x = Ordata.iloc[:,0:9]
y = Ordata.iloc[:,9:10] #CO
z = Ordata.iloc[:,10:11] #NOX


In [5]:
xtrainc,xtestc,ytrainc,ytestc = train_test_split(x,y,test_size=0.25)
print(xtrainc.shape)
print(xtestc.shape)
print(ytrainc.shape)
print(ytestc.shape)

print(xtrainc.columns)
print(xtestc.columns)
print(ytrainc.columns)
print(ytestc.columns)

(27549, 9)
(9184, 9)
(27549, 1)
(9184, 1)
Index(['AT', 'AP', 'AH', 'AFDP', 'GTEP', 'TIT', 'TAT', 'TEY', 'CDP'], dtype='object')
Index(['AT', 'AP', 'AH', 'AFDP', 'GTEP', 'TIT', 'TAT', 'TEY', 'CDP'], dtype='object')
Index(['CO'], dtype='object')
Index(['CO'], dtype='object')


# CO Model prediction

In [33]:
lr = LinearRegression()
lr.fit(xtrainc,ytrainc)
pred1 = lr.predict(xtestc)
print("R2 Accuracy = ",r2_score(ytestc,pred1))
print("Mean Absolute Error = ",mean_absolute_error(ytestc,pred1))
print("Weight = ",lr.coef_)
print("y = ",lr.intercept_)

R2 Accuracy =  0.530913468438007
Mean Absolute Error =  0.880912212372612
Weight =  [[-0.05079696 -0.00481327 -0.00790062 -0.14268051  0.09851643 -0.07014661
  -0.07521466 -0.18904053  1.92692307]]
y =  [125.72634136]


In [45]:
sgdr = SGDRegressor(loss='squared_error',penalty='l1')
sgdr.fit(xtrainc,ytrainc)
pred2 = sgdr.predict(xtestc)
print("R2 Accuracy = ",r2_score(ytestc,pred2))
print("Mean Absolute Error = ",mean_absolute_error(ytestc,pred2))
print("Weight = ",sgdr.coef_)
print("y = ",sgdr.intercept_)

R2 Accuracy =  -2.205782116944323e+28
Mean Absolute Error =  367700994879485.7
Weight =  [-1.55223303e+12 -1.18099785e+12 -9.47038474e+11 -5.63076062e+09
  1.15237880e+11  1.04561503e+12  7.86254144e+11  7.68349578e+11
  1.19641516e+10]
y =  [-2.62496841e+09]


In [52]:
dtr = DecisionTreeRegressor(criterion='absolute_error',splitter='best')
dtr.fit(xtrainc,ytrainc)
pred3 = dtr.predict(xtestc)
print("R2 Accuracy = ",r2_score(ytestc,pred3))
print("Mean Absolute Error = ",mean_absolute_error(ytestc,pred3))
#print("Weight = ",dtr.coef_)
#print("y = ",dtr.intercept_)

R2 Accuracy =  0.577531372007444
Mean Absolute Error =  0.6958757862467335


In [28]:
rfr = RandomForestRegressor(n_estimators=1000)
rfr.fit(xtrainc,ytrainc)
pred4 = rfr.predict(xtestc)
print("R2 Accuracy = ",r2_score(ytestc,pred4))
print("Mean Absolute Error = ",mean_absolute_error(ytestc,pred4))

R2 Accuracy =  0.7315679458465287
Mean Absolute Error =  0.49174542161197726


In [11]:
lr = LinearRegression()
dtr = DecisionTreeRegressor(criterion='absolute_error',splitter='best')
rfr = RandomForestRegressor()
models = [('linear regression',lr),('decisiontreereg',dtr),('randomforestreg',rfr)]

In [None]:
for model in models:
    score = cross_val_score(model[1],xtrainc,ytrainc,scoring='r2',cv=15)
    print(model[0],np.round(np.mean(score),3))

linear regression 0.578
decisiontreereg 0.58


In [12]:
lr = LinearRegression()
dtr = DecisionTreeRegressor(criterion='absolute_error',splitter='best')
rfr = RandomForestRegressor()
models = [('linear regression',lr),('decisiontreereg',dtr)]
vr = VotingRegressor(models)
scores = cross_val_score(vr,xtrainc,ytrainc,scoring='r2',cv=12)
print("R2 score = ",np.round(np.mean(scores),3))

R2 score =  0.659


In [54]:
bgr = BaggingRegressor(estimator=DecisionTreeRegressor(splitter='random',max_depth=62),n_estimators=50,max_samples=0.7)
bgr.fit(xtrainc,ytrainc)
pred5 = bgr.predict(xtestc)
print("R2 Score = ",r2_score(ytestc,pred5))

R2 Score =  0.7543209499548583


In [25]:
bgr1 = BaggingRegressor(estimator=RandomForestRegressor(),n_estimators=20,max_samples=0.5)
bgr1.fit(xtrainc,ytrainc)
pred6 = bgr1.predict(xtestc)
print("R2 score = ",r2_score(ytestc,pred6))

R2 score =  0.7354080713094011


# NOX Prediction Model

In [40]:
x = Ordata.iloc[:,0:9]
z = Ordata.iloc[:,10:11] #NOX
print(x.columns)
print(z.columns)

Index(['AT', 'AP', 'AH', 'AFDP', 'GTEP', 'TIT', 'TAT', 'TEY', 'CDP'], dtype='object')
Index(['NOX'], dtype='object')


In [41]:
xtrainn,xtestn,ytrainn,ytestn = train_test_split(x,z,test_size=0.25)
print(xtrainn.shape)
print(xtestn.shape)
print(ytrainn.shape)
print(ytestn.shape)

print(xtrainn.columns)
print(xtestn.columns)
print(ytrainn.columns)
print(ytestn.columns)

(27549, 9)
(9184, 9)
(27549, 1)
(9184, 1)
Index(['AT', 'AP', 'AH', 'AFDP', 'GTEP', 'TIT', 'TAT', 'TEY', 'CDP'], dtype='object')
Index(['AT', 'AP', 'AH', 'AFDP', 'GTEP', 'TIT', 'TAT', 'TEY', 'CDP'], dtype='object')
Index(['NOX'], dtype='object')
Index(['NOX'], dtype='object')


In [42]:
print(xtrainn.columns)
print(ytrainn.columns)

Index(['AT', 'AP', 'AH', 'AFDP', 'GTEP', 'TIT', 'TAT', 'TEY', 'CDP'], dtype='object')
Index(['NOX'], dtype='object')


In [43]:
lr = LinearRegression()
lr.fit(xtrainn,ytrainn)
pred7 = lr.predict(xtestn)
print("R2 Accuracy = ",r2_score(ytestn,pred7))
print("Mean Absolute Error = ",mean_absolute_error(ytestn,pred7))
print("Weight = ",lr.coef_)
print("y = ",lr.intercept_)

R2 Accuracy =  0.529600450434093
Mean Absolute Error =  5.782888368180365
Weight =  [[-1.7614702  -0.22106553 -0.21960249  0.82717057 -0.05828028  1.40233976
  -1.50533963 -1.95600427 -1.70469754]]
y =  [-76.87056946]


In [45]:
dtr = DecisionTreeRegressor(criterion='absolute_error',splitter='random')
dtr.fit(xtrainn,ytrainn)
pred8 = dtr.predict(xtestn)
print("R2 Accuracy = ",r2_score(ytestn,pred8))
print("Mean Absolute Error = ",mean_absolute_error(ytestn,pred8))

R2 Accuracy =  0.7018181148537573
Mean Absolute Error =  3.97576099738676


In [59]:
bgr1 = BaggingRegressor(estimator=DecisionTreeRegressor(splitter='random'),n_estimators=50)
bgr1.fit(xtrainn,ytrainn)
pred9 = bgr1.predict(xtestn)
print("R2 Score = ",r2_score(ytestn,pred9))

R2 Score =  0.8792075292349458


In [60]:
print(bgr)
print("\n")
print(bgr1)

BaggingRegressor(estimator=DecisionTreeRegressor(max_depth=62,
                                                 splitter='random'),
                 max_samples=0.7, n_estimators=50)


BaggingRegressor(estimator=DecisionTreeRegressor(splitter='random'),
                 n_estimators=50)


# Testing models

In [79]:
import warnings
warnings.filterwarnings('ignore')
import numpy as np
import pandas as pd 
import pickle
data = pd.read_csv('datasets/gt_full.csv')
data = data.sample(1)
x = data.iloc[:,0:9]
y = data.iloc[:,9:10] #CO
z = data.iloc[:,10:11] #NOX

print(x.columns)


head = x
zhead = z
yhead = y
print(head)
print(yhead)
print(zhead)

print("==================================================================================\n")
print("Predicted value = \n")
#loading models 

with open('models\pred_model_co.pkl','rb') as co_model,open('models\pred_model_nox.pkl','rb') as nox_model:
    co_model = pickle.load(co_model)
    nox_model = pickle.load(nox_model)

pred = co_model.predict(head)
pred1 = nox_model.predict(head)

print(np.round(pred[0],3))
print(np.round(pred1[0]),3)


Index(['AT', 'AP', 'AH', 'AFDP', 'GTEP', 'TIT', 'TAT', 'TEY', 'CDP'], dtype='object')
          AT      AP      AH    AFDP    GTEP     TIT    TAT     TEY     CDP
7852  11.145  999.83  100.17  5.3024  24.506  1085.0  549.8  134.48  11.787
          CO
7852  1.6401
         NOX
7852  75.007

Predicted value = 

1.512
75.0 3
