predicting turbine energy yield (TEY) using ambient variables as features.

In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import mean_squared_error, mean_absolute_error, mean_absolute_percentage_error
import tensorflow as tf
from tensorflow.keras import layers, models

In [2]:
df = pd.read_csv('gas_turbines.csv')

In [3]:
df

Unnamed: 0,AT,AP,AH,AFDP,GTEP,TIT,TAT,TEY,CDP,CO,NOX
0,6.8594,1007.9,96.799,3.5000,19.663,1059.2,550.00,114.70,10.605,3.1547,82.722
1,6.7850,1008.4,97.118,3.4998,19.728,1059.3,550.00,114.72,10.598,3.2363,82.776
2,6.8977,1008.8,95.939,3.4824,19.779,1059.4,549.87,114.71,10.601,3.2012,82.468
3,7.0569,1009.2,95.249,3.4805,19.792,1059.6,549.99,114.72,10.606,3.1923,82.670
4,7.3978,1009.7,95.150,3.4976,19.765,1059.7,549.98,114.72,10.612,3.2484,82.311
...,...,...,...,...,...,...,...,...,...,...,...
15034,9.0301,1005.6,98.460,3.5421,19.164,1049.7,546.21,111.61,10.400,4.5186,79.559
15035,7.8879,1005.9,99.093,3.5059,19.414,1046.3,543.22,111.78,10.433,4.8470,79.917
15036,7.2647,1006.3,99.496,3.4770,19.530,1037.7,537.32,110.19,10.483,7.9632,90.912
15037,7.0060,1006.8,99.008,3.4486,19.377,1043.2,541.24,110.74,10.533,6.2494,93.227


In [4]:
df.isna().sum()

AT      0
AP      0
AH      0
AFDP    0
GTEP    0
TIT     0
TAT     0
TEY     0
CDP     0
CO      0
NOX     0
dtype: int64

In [5]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 15039 entries, 0 to 15038
Data columns (total 11 columns):
 #   Column  Non-Null Count  Dtype  
---  ------  --------------  -----  
 0   AT      15039 non-null  float64
 1   AP      15039 non-null  float64
 2   AH      15039 non-null  float64
 3   AFDP    15039 non-null  float64
 4   GTEP    15039 non-null  float64
 5   TIT     15039 non-null  float64
 6   TAT     15039 non-null  float64
 7   TEY     15039 non-null  float64
 8   CDP     15039 non-null  float64
 9   CO      15039 non-null  float64
 10  NOX     15039 non-null  float64
dtypes: float64(11)
memory usage: 1.3 MB


In [6]:
X = df.drop('TEY', axis=1)
y = df['TEY']

In [7]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=0)

In [8]:
X_train

Unnamed: 0,AT,AP,AH,AFDP,GTEP,TIT,TAT,CDP,CO,NOX
10187,17.4200,1015.4,56.526,3.5327,24.659,1087.8,550.05,11.979,1.52160,73.865
581,10.3210,1029.1,84.519,5.7759,33.104,1100.2,528.33,14.082,1.93590,71.891
1090,8.9166,1017.7,94.937,6.2532,33.701,1099.9,527.26,14.081,0.58865,78.189
6753,14.3080,1020.8,86.602,2.2080,18.882,1053.2,548.95,10.472,4.96240,67.287
1467,17.6370,1004.7,61.288,4.9691,25.059,1087.9,549.88,11.957,1.05220,76.902
...,...,...,...,...,...,...,...,...,...,...
13123,23.1770,1017.4,73.491,3.2509,20.246,1061.2,549.96,10.831,2.67630,57.271
3264,29.7670,1012.5,66.533,4.2209,26.290,1093.9,549.98,12.222,1.77170,64.133
9845,12.4800,1016.7,72.020,4.1439,29.251,1099.8,537.65,13.503,0.88512,65.532
10799,26.2940,1011.1,65.623,3.5515,23.846,1080.9,550.27,11.598,1.10500,60.692


In [9]:
X_test

Unnamed: 0,AT,AP,AH,AFDP,GTEP,TIT,TAT,CDP,CO,NOX
7052,15.3040,1000.3,65.933,3.0021,24.361,1085.2,549.86,11.896,1.27910,77.036
13849,16.3320,1020.6,57.919,4.9235,31.802,1100.0,531.67,13.820,1.15540,70.765
13112,27.6380,1017.0,57.860,4.1522,25.954,1094.5,550.37,12.241,1.11050,65.651
5268,23.7750,1009.3,86.774,4.3658,25.787,1092.0,550.04,12.181,1.79170,63.681
8720,7.6426,1013.2,82.689,4.0112,23.502,1083.7,550.12,11.731,1.56420,82.617
...,...,...,...,...,...,...,...,...,...,...
9973,13.4160,1010.1,100.130,3.2325,19.625,1054.1,547.37,10.601,4.76850,77.677
3539,27.5860,1009.3,77.724,4.1525,26.033,1092.3,550.45,12.048,1.59880,57.541
2180,17.3370,1003.6,82.099,5.0158,24.913,1086.1,550.14,11.825,0.67895,65.615
9596,13.6830,1014.7,86.376,4.4516,24.383,1087.2,549.41,12.011,1.48860,72.297


In [10]:
y_train

10187    133.66
581      160.99
1090     162.69
6753     110.01
1467     133.70
          ...  
13123    112.04
3264     133.80
9845     155.46
10799    126.28
2732     109.74
Name: TEY, Length: 12031, dtype: float64

In [11]:
y_test

7052     134.06
13849    155.98
13112    133.77
5268     134.02
8720     134.14
          ...  
9973     112.56
3539     133.74
2180     133.23
9596     134.71
5218     133.76
Name: TEY, Length: 3008, dtype: float64

In [12]:
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

In [13]:
model = models.Sequential()
model.add(layers.Dense(64, activation='relu', input_shape=(X_train.shape[1],)))
model.add(layers.Dense(64, activation='relu'))
model.add(layers.Dense(1))

In [14]:
model.compile(optimizer='adam', loss='mse', metrics=['mae', 'mape'])

In [15]:
history = model.fit(X_train, y_train, epochs=50, batch_size=32, validation_data=(X_test, y_test))

Epoch 1/50
Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50
Epoch 7/50
Epoch 8/50
Epoch 9/50
Epoch 10/50
Epoch 11/50
Epoch 12/50
Epoch 13/50
Epoch 14/50
Epoch 15/50
Epoch 16/50
Epoch 17/50
Epoch 18/50
Epoch 19/50
Epoch 20/50
Epoch 21/50
Epoch 22/50
Epoch 23/50
Epoch 24/50
Epoch 25/50
Epoch 26/50
Epoch 27/50
Epoch 28/50
Epoch 29/50
Epoch 30/50
Epoch 31/50
Epoch 32/50
Epoch 33/50
Epoch 34/50
Epoch 35/50
Epoch 36/50
Epoch 37/50
Epoch 38/50
Epoch 39/50
Epoch 40/50
Epoch 41/50
Epoch 42/50
Epoch 43/50
Epoch 44/50
Epoch 45/50
Epoch 46/50
Epoch 47/50
Epoch 48/50
Epoch 49/50
Epoch 50/50


In [16]:
test_loss, test_mae, test_mape = model.evaluate(X_test, y_test)



In [17]:
print("MSE: {:.2f}".format(test_loss))
print("MAE: {:.2f}".format(test_mae))
print("MAPE: {:.2%}".format(test_mape))

MSE: 0.45
MAE: 0.50
MAPE: 37.65%
