# **Model Training: Gas Turbina**

Since the EDA showed some non-linearity in Turbine Energy Yield (TEY) with respect to features, we will use models that capture these non-linearities.

Models:

- Randon Forests
- XGboost
- Neural Networks
- Ensemble Methods

In [4]:
from sklearn.ensemble import RandomForestRegressor
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error, r2_score


from xgboost import XGBRegressor
import torch


import pandas as pd 


# data's 2011
gt_2011 = pd.read_csv('../data/gas_turbine_emision/gt_2011.csv' )
gt_2011['Year'] = 2011

# data's 2012
gt_2012 = pd.read_csv('../data/gas_turbine_emision/gt_2012.csv' )
gt_2012['Year'] = 2014

# data's 2013
gt_2013 = pd.read_csv('../data/gas_turbine_emision/gt_2013.csv' )
gt_2013['Year'] = 2013

# data's 2014
gt_2014 = pd.read_csv('../data/gas_turbine_emision/gt_2014.csv' )
gt_2014['Year'] = 2014

# data's 2015
gt_2015 = pd.read_csv('../data/gas_turbine_emision/gt_2015.csv' )
gt_2015['Year'] = 2015


gt = pd.concat([gt_2011, gt_2012, gt_2013, gt_2014, gt_2015], ignore_index=True)



In [5]:
# simple feature engineer

gt['TAT_TIT_Ratio'] = gt['TAT'] / gt['TIT']

### **Random Forests**

In [6]:




# Split data
X = gt.drop(columns=['TEY'])
y = gt['TEY']
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)



# Train model
model = RandomForestRegressor(n_estimators=100, random_state=42)
model.fit(X_train, y_train)

# Evaluate
y_pred = model.predict(X_test)
print('RMSE:', mean_squared_error(y_test, y_pred, squared=False))
print('R2 Score:', r2_score(y_test, y_pred))

RMSE: 0.6270042034451444
R2 Score: 0.9983868712020609


### **XGboost**

In [7]:
from xgboost import XGBRegressor

# Train model
model = XGBRegressor(n_estimators=100, learning_rate=0.1, random_state=42)
model.fit(X_train, y_train)

# Evaluate
y_pred = model.predict(X_test)
print('RMSE:', mean_squared_error(y_test, y_pred, squared=False))
print('R2 Score:', r2_score(y_test, y_pred))

RMSE: 0.6741293039685858
R2 Score: 0.9981352760372852


### **SVR**

In [8]:
from sklearn.svm import SVR

# Train model
model = SVR(kernel='rbf')
model.fit(X_train, y_train)

# Evaluate
y_pred = model.predict(X_test)
print('RMSE:', mean_squared_error(y_test, y_pred, squared=False))
print('R2 Score:', r2_score(y_test, y_pred))

RMSE: 12.900700270031024
R2 Score: 0.3171037782499224


### **Neural Network**


In [9]:
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader, TensorDataset
from sklearn.metrics import mean_squared_error, r2_score
import numpy as np
import pandas as pd

# Assuming train and test are Pandas DataFrames
# Example: train and test have columns 'feature1', 'feature2', ..., 'target'

# Separate features (X) and target (y) for train and test
X_train_np = X_train.values  # Convert DataFrame to numpy array
y_train_np = y_train.values
X_test_np = X_test.values
y_test_np = y_test.values

# Convert numpy arrays to PyTorch tensors
X_train_tensor = torch.tensor(X_train_np, dtype=torch.float32)
y_train_tensor = torch.tensor(y_train_np, dtype=torch.float32)
X_test_tensor = torch.tensor(X_test_np, dtype=torch.float32)
y_test_tensor = torch.tensor(y_test_np, dtype=torch.float32)

# Create a DataLoader for training
train_dataset = TensorDataset(X_train_tensor, y_train_tensor)
train_loader = DataLoader(train_dataset, batch_size=64, shuffle=True)

# Define the Neural Network
class SimpleNN(nn.Module):
    def __init__(self, input_dim):
        super(SimpleNN, self).__init__()
        self.fc1 = nn.Linear(input_dim, 64)  # Input layer to hidden layer (64 neurons)
        self.fc2 = nn.Linear(64, 32)         # Hidden layer (64 neurons) to hidden layer (32 neurons)
        self.fc3 = nn.Linear(32, 1)          # Hidden layer (32 neurons) to output layer (1 neuron)
        self.relu = nn.ReLU()                # Activation function

    def forward(self, x):
        x = self.relu(self.fc1(x))
        x = self.relu(self.fc2(x))
        x = self.fc3(x)  # No activation for regression output
        return x

# Initialize the model
input_dim = X_train.shape[1]  # Number of features
model = SimpleNN(input_dim)

# Define loss function and optimizer
criterion = nn.MSELoss()  # Root Mean Squared Error Loss
optimizer = optim.Adam(model.parameters(), lr=0.0001)  # Adam optimizer

# Training loop
num_epochs = 50
for epoch in range(num_epochs):
    model.train()  # Set model to training mode
    for batch_X, batch_y in train_loader:
        # Forward pass
        outputs = model(batch_X)
        loss = torch.sqrt( criterion(outputs, batch_y.unsqueeze(1)) )  # Add extra dimension to batch_y

        # Backward pass and optimization
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

    # Print loss for each epoch
    print(f'Epoch [{epoch+1}/{num_epochs}], Loss: {loss.item():.4f}')

# Evaluation
model.eval()  # Set model to evaluation mode
with torch.no_grad():
    y_pred_tensor = model(X_test_tensor)
    y_pred = y_pred_tensor.numpy()  # Convert predictions to numpy array

# Calculate RMSE and R2 Score
rmse = mean_squared_error(y_test, y_pred, squared=False)
r2 = r2_score(y_test, y_pred)
print('RMSE:', rmse)
print('R2 Score:', r2)

Epoch [1/50], Loss: 15.8230
Epoch [2/50], Loss: 12.0738
Epoch [3/50], Loss: 11.9437
Epoch [4/50], Loss: 13.7326
Epoch [5/50], Loss: 9.9565
Epoch [6/50], Loss: 4.8949
Epoch [7/50], Loss: 5.2905
Epoch [8/50], Loss: 2.3011
Epoch [9/50], Loss: 1.6876
Epoch [10/50], Loss: 1.3890
Epoch [11/50], Loss: 1.8725
Epoch [12/50], Loss: 1.3269
Epoch [13/50], Loss: 1.4471
Epoch [14/50], Loss: 1.9581
Epoch [15/50], Loss: 2.6491
Epoch [16/50], Loss: 1.1991
Epoch [17/50], Loss: 2.4160
Epoch [18/50], Loss: 2.5304
Epoch [19/50], Loss: 2.6530
Epoch [20/50], Loss: 1.7775
Epoch [21/50], Loss: 1.7783
Epoch [22/50], Loss: 1.8708
Epoch [23/50], Loss: 1.4341
Epoch [24/50], Loss: 1.2975
Epoch [25/50], Loss: 0.9303
Epoch [26/50], Loss: 1.6207
Epoch [27/50], Loss: 1.7251
Epoch [28/50], Loss: 1.0305
Epoch [29/50], Loss: 2.0330
Epoch [30/50], Loss: 1.5242
Epoch [31/50], Loss: 0.9900
Epoch [32/50], Loss: 1.5950
Epoch [33/50], Loss: 1.5506
Epoch [34/50], Loss: 1.2276
Epoch [35/50], Loss: 1.9028
Epoch [36/50], Loss: 2.15

In [10]:
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader, TensorDataset
from sklearn.metrics import mean_squared_error, r2_score

# Separate features (X) and target (y) for train and test
X_train_np = X_train.values  # Convert DataFrame to numpy array
y_train_np = y_train.values
X_test_np = X_test.values
y_test_np = y_test.values

# Convert numpy arrays to PyTorch tensors
X_train_tensor = torch.tensor(X_train_np, dtype=torch.float32)
y_train_tensor = torch.tensor(y_train_np, dtype=torch.float32).view(-1, 1)
X_test_tensor = torch.tensor(X_test_np, dtype=torch.float32)
y_test_tensor = torch.tensor(y_test_np, dtype=torch.float32).view(-1, 1)



# Define model
class SimpleModel(nn.Module):
    def __init__(self, input_dim):
        super(SimpleModel, self).__init__()
        self.fc1 = nn.Linear(input_dim, 64)
        self.bn1 = nn.BatchNorm1d(64)  # Normalización Batch
        self.dropout1 = nn.Dropout(0.05)  # Dropout de 20%

        self.fc2 = nn.Linear(64, 32)
        self.bn2 = nn.BatchNorm1d(32)  # Normalización Batch
        self.dropout2 = nn.Dropout(0.1)  # Dropout de 20%

        self.fc3 = nn.Linear(32, 1)
        
    def forward(self, x):
        x = torch.relu(  self.fc1(x)  )
        # x = self.dropout1(x)
        x = torch.relu( self.fc2(x)   ) 
        # x = self.dropout2(x)
        x = self.fc3(x)
        return x

# Initialize model
model = SimpleModel(X_train.shape[1])


class RMSELoss(nn.Module):
    def forward(self, y_pred, y_true):
        return torch.sqrt(nn.MSELoss()(y_pred, y_true) + 1e-8)

criterion = RMSELoss()

optimizer = optim.Adam(model.parameters(), lr=0.0001)

# DataLoader
train_dataset = TensorDataset(X_train_tensor, y_train_tensor)
train_loader = DataLoader(train_dataset, batch_size=64, shuffle=True)

# Train model
num_epochs = 50
for epoch in range(num_epochs):
    for batch_X, batch_y in train_loader:
        optimizer.zero_grad()
        y_pred = model(batch_X)
        loss = criterion(y_pred, batch_y)
        loss.backward()
        optimizer.step()
    
    print(f'Epoch [{epoch+1}/{num_epochs}], Loss: {loss.item():.4f}')

    
# Evaluate
with torch.no_grad():
    y_pred = model(X_test_tensor).numpy()

y_test_np = y_test_tensor.numpy()
print('RMSE:', mean_squared_error(y_test_np, y_pred, squared=False))
print('R2 Score:', r2_score(y_test_np, y_pred))


Epoch [1/50], Loss: 16.6000
Epoch [2/50], Loss: 17.4160
Epoch [3/50], Loss: 17.1187
Epoch [4/50], Loss: 11.2346
Epoch [5/50], Loss: 5.5927
Epoch [6/50], Loss: 8.6207
Epoch [7/50], Loss: 4.0801
Epoch [8/50], Loss: 3.6201
Epoch [9/50], Loss: 3.4901
Epoch [10/50], Loss: 2.4927
Epoch [11/50], Loss: 3.3552
Epoch [12/50], Loss: 2.1293
Epoch [13/50], Loss: 3.1068
Epoch [14/50], Loss: 1.6472
Epoch [15/50], Loss: 1.9279
Epoch [16/50], Loss: 1.4969
Epoch [17/50], Loss: 1.2768
Epoch [18/50], Loss: 1.8642
Epoch [19/50], Loss: 2.2314
Epoch [20/50], Loss: 2.7275
Epoch [21/50], Loss: 1.5079
Epoch [22/50], Loss: 1.9655
Epoch [23/50], Loss: 1.7608
Epoch [24/50], Loss: 1.5440
Epoch [25/50], Loss: 1.2751
Epoch [26/50], Loss: 2.3007
Epoch [27/50], Loss: 2.2722
Epoch [28/50], Loss: 2.2310
Epoch [29/50], Loss: 1.2280
Epoch [30/50], Loss: 1.8400
Epoch [31/50], Loss: 0.8457
Epoch [32/50], Loss: 1.6057
Epoch [33/50], Loss: 2.7278
Epoch [34/50], Loss: 1.8290
Epoch [35/50], Loss: 2.3259
Epoch [36/50], Loss: 2.25

### **Linear Regression**

In [11]:
from sklearn.linear_model import LinearRegression

# Train model
model = LinearRegression()
model.fit(X_train, y_train)

# Evaluate
y_pred = model.predict(X_test)
print('RMSE:', mean_squared_error(y_test, y_pred, squared=False))
print('R2 Score:', r2_score(y_test, y_pred))

RMSE: 0.9507531359552979
R2 Score: 0.9962909415885439


**Implementation Neural Network Sequential**

In [12]:
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader, TensorDataset
from sklearn.metrics import mean_squared_error, r2_score

# Separate features (X) and target (y) for train and test
X_train_np = X_train.values  # Convert DataFrame to numpy array
y_train_np = y_train.values
X_test_np = X_test.values
y_test_np = y_test.values

# Convert numpy arrays to PyTorch tensors
X_train_tensor = torch.tensor(X_train_np, dtype=torch.float32)
y_train_tensor = torch.tensor(y_train_np, dtype=torch.float32).view(-1, 1)
X_test_tensor = torch.tensor(X_test_np, dtype=torch.float32)
y_test_tensor = torch.tensor(y_test_np, dtype=torch.float32).view(-1, 1)


# Define model using Sequential
model = nn.Sequential(
    nn.Linear(X_train.shape[1], 64),
   # nn.BatchNorm1d(64),
    nn.ReLU(), 
    nn.Linear(64, 32),
    nn.ReLU(),
    nn.Linear(32, 1)
)



# Loss and optimizer


class RMSELoss(nn.Module):
    def forward(self, y_pred, y_true):
        return torch.sqrt(nn.MSELoss()(y_pred, y_true))

criterion = RMSELoss()

optimizer = optim.Adam(model.parameters(), lr=0.0001)

# DataLoader
train_dataset = TensorDataset(X_train_tensor, y_train_tensor)
train_loader = DataLoader(train_dataset, batch_size=64, shuffle=True)

# Train model
for epoch in range(50):
    for batch_X, batch_y in train_loader:
        optimizer.zero_grad()
        y_pred = model(batch_X)
        loss = criterion(y_pred, batch_y)
        loss.backward()
        optimizer.step()

    print(f'Epoch [{epoch+1}/{num_epochs}], Loss: {loss.item():.4f}')

# Evaluate
with torch.no_grad():
    y_pred = model(X_test_tensor).numpy()

y_test_np = y_test_tensor.numpy()
print('RMSE:', mean_squared_error(y_test_np, y_pred, squared=False))
print('R2 Score:', r2_score(y_test_np, y_pred))


Epoch [1/50], Loss: 20.3533
Epoch [2/50], Loss: 7.3787
Epoch [3/50], Loss: 14.7957
Epoch [4/50], Loss: 16.0361
Epoch [5/50], Loss: 9.5628
Epoch [6/50], Loss: 10.6319
Epoch [7/50], Loss: 9.1049
Epoch [8/50], Loss: 11.3360
Epoch [9/50], Loss: 6.3958
Epoch [10/50], Loss: 3.5614
Epoch [11/50], Loss: 2.6150
Epoch [12/50], Loss: 1.9737
Epoch [13/50], Loss: 1.3646
Epoch [14/50], Loss: 1.9268
Epoch [15/50], Loss: 1.1691
Epoch [16/50], Loss: 1.2018
Epoch [17/50], Loss: 0.9832
Epoch [18/50], Loss: 2.0750
Epoch [19/50], Loss: 1.1936
Epoch [20/50], Loss: 1.3323
Epoch [21/50], Loss: 0.9930
Epoch [22/50], Loss: 1.0422
Epoch [23/50], Loss: 1.3470
Epoch [24/50], Loss: 1.6193
Epoch [25/50], Loss: 1.4352
Epoch [26/50], Loss: 1.1866
Epoch [27/50], Loss: 2.0838
Epoch [28/50], Loss: 2.9976
Epoch [29/50], Loss: 0.6158
Epoch [30/50], Loss: 1.7780
Epoch [31/50], Loss: 1.2854
Epoch [32/50], Loss: 1.3691
Epoch [33/50], Loss: 1.4150
Epoch [34/50], Loss: 1.5902
Epoch [35/50], Loss: 1.3733
Epoch [36/50], Loss: 2.1

### **Ensemble Method**

In [13]:
from sklearn.ensemble import VotingRegressor
from sklearn.linear_model import LinearRegression
from sklearn.ensemble import RandomForestRegressor
from xgboost import XGBRegressor

# Define models
model1 = LinearRegression()
model2 = RandomForestRegressor(n_estimators=100, random_state=42)
model3 = XGBRegressor(n_estimators=100, learning_rate=0.1, random_state=42)

# Create ensemble
ensemble = VotingRegressor(estimators=[('lr', model1), ('rf', model2), ('xgb', model3)])
ensemble.fit(X_train, y_train)

# Evaluate
y_pred = ensemble.predict(X_test)
print('RMSE:', mean_squared_error(y_test, y_pred, squared=False))
print('R2 Score:', r2_score(y_test, y_pred))

RMSE: 0.650799902164649
R2 Score: 0.9982621067451478


The best implementation are Random Forest and XGboost