# **Model Training: Gas Turbina**

Since the EDA showed some non-linearity in Turbine Energy Yield (TEY) with respect to features, we will use models that capture these non-linearities.

Models:

- Randon Forests
- XGboost
- Neural Networks
- Ensemble Methods

In [3]:
from sklearn.ensemble import RandomForestRegressor
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error, r2_score


from xgboost import XGBRegressor
import torch


import pandas as pd 


# data's 2011
gt_2011 = pd.read_csv('../data/gas_turbine_emision/gt_2011.csv' )
gt_2011['Year'] = 2011

# data's 2012
gt_2012 = pd.read_csv('../data/gas_turbine_emision/gt_2012.csv' )
gt_2012['Year'] = 2014

# data's 2013
gt_2013 = pd.read_csv('../data/gas_turbine_emision/gt_2013.csv' )
gt_2013['Year'] = 2013

# data's 2014
gt_2014 = pd.read_csv('../data/gas_turbine_emision/gt_2014.csv' )
gt_2014['Year'] = 2014

# data's 2015
gt_2015 = pd.read_csv('../data/gas_turbine_emision/gt_2015.csv' )
gt_2015['Year'] = 2015


gt = pd.concat([gt_2011, gt_2012, gt_2013, gt_2014, gt_2015], ignore_index=True)



### **Random Forests**

In [4]:




# Split data
X = gt.drop(columns=['TEY'])
y = gt['TEY']
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)



# Train model
model = RandomForestRegressor(n_estimators=100, random_state=42)
model.fit(X_train, y_train)

# Evaluate
y_pred = model.predict(X_test)
print('RMSE:', mean_squared_error(y_test, y_pred, squared=False))
print('R2 Score:', r2_score(y_test, y_pred))

RMSE: 0.6309406027072189
R2 Score: 0.9983665528319152


### **XGboost**

In [5]:
from xgboost import XGBRegressor

# Train model
model = XGBRegressor(n_estimators=100, learning_rate=0.1, random_state=42)
model.fit(X_train, y_train)

# Evaluate
y_pred = model.predict(X_test)
print('RMSE:', mean_squared_error(y_test, y_pred, squared=False))
print('R2 Score:', r2_score(y_test, y_pred))

RMSE: 0.6778543264263197
R2 Score: 0.9981146113675685


### **SVR**

In [6]:
from sklearn.svm import SVR

# Train model
model = SVR(kernel='rbf')
model.fit(X_train, y_train)

# Evaluate
y_pred = model.predict(X_test)
print('RMSE:', mean_squared_error(y_test, y_pred, squared=False))
print('R2 Score:', r2_score(y_test, y_pred))

RMSE: 12.798293075699801
R2 Score: 0.32790255889491626


### **Neural Network**


In [38]:
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader, TensorDataset
from sklearn.metrics import mean_squared_error, r2_score
import numpy as np
import pandas as pd

# Assuming train and test are Pandas DataFrames
# Example: train and test have columns 'feature1', 'feature2', ..., 'target'

# Separate features (X) and target (y) for train and test
X_train_np = X_train.values  # Convert DataFrame to numpy array
y_train_np = y_train.values
X_test_np = X_test.values
y_test_np = y_test.values

# Convert numpy arrays to PyTorch tensors
X_train_tensor = torch.tensor(X_train_np, dtype=torch.float32)
y_train_tensor = torch.tensor(y_train_np, dtype=torch.float32)
X_test_tensor = torch.tensor(X_test_np, dtype=torch.float32)
y_test_tensor = torch.tensor(y_test_np, dtype=torch.float32)

# Create a DataLoader for training
train_dataset = TensorDataset(X_train_tensor, y_train_tensor)
train_loader = DataLoader(train_dataset, batch_size=64, shuffle=True)

# Define the Neural Network
class SimpleNN(nn.Module):
    def __init__(self, input_dim):
        super(SimpleNN, self).__init__()
        self.fc1 = nn.Linear(input_dim, 64)  # Input layer to hidden layer (64 neurons)
        self.fc2 = nn.Linear(64, 32)         # Hidden layer (64 neurons) to hidden layer (32 neurons)
        self.fc3 = nn.Linear(32, 1)          # Hidden layer (32 neurons) to output layer (1 neuron)
        self.relu = nn.ReLU()                # Activation function

    def forward(self, x):
        x = self.relu(self.fc1(x))
        x = self.relu(self.fc2(x))
        x = self.fc3(x)  # No activation for regression output
        return x

# Initialize the model
input_dim = X_train.shape[1]  # Number of features
model = SimpleNN(input_dim)

# Define loss function and optimizer
criterion = nn.MSELoss()  # Root Mean Squared Error Loss
optimizer = optim.Adam(model.parameters(), lr=0.0001)  # Adam optimizer

# Training loop
num_epochs = 50
for epoch in range(num_epochs):
    model.train()  # Set model to training mode
    for batch_X, batch_y in train_loader:
        # Forward pass
        outputs = model(batch_X)
        loss = torch.sqrt( criterion(outputs, batch_y.unsqueeze(1)) )  # Add extra dimension to batch_y

        # Backward pass and optimization
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

    # Print loss for each epoch
    print(f'Epoch [{epoch+1}/{num_epochs}], Loss: {loss.item():.4f}')

# Evaluation
model.eval()  # Set model to evaluation mode
with torch.no_grad():
    y_pred_tensor = model(X_test_tensor)
    y_pred = y_pred_tensor.numpy()  # Convert predictions to numpy array

# Calculate RMSE and R2 Score
rmse = mean_squared_error(y_test, y_pred, squared=False)
r2 = r2_score(y_test, y_pred)
print('RMSE:', rmse)
print('R2 Score:', r2)

Epoch [1/50], Loss: 15.9185
Epoch [2/50], Loss: 15.7784
Epoch [3/50], Loss: 11.2145
Epoch [4/50], Loss: 7.8605
Epoch [5/50], Loss: 4.5186
Epoch [6/50], Loss: 3.3040
Epoch [7/50], Loss: 2.8145
Epoch [8/50], Loss: 2.1687
Epoch [9/50], Loss: 3.2363
Epoch [10/50], Loss: 2.7823
Epoch [11/50], Loss: 2.1711
Epoch [12/50], Loss: 1.8753
Epoch [13/50], Loss: 1.2740
Epoch [14/50], Loss: 2.4648
Epoch [15/50], Loss: 2.3358
Epoch [16/50], Loss: 0.9906
Epoch [17/50], Loss: 1.3737
Epoch [18/50], Loss: 1.4369
Epoch [19/50], Loss: 1.4444
Epoch [20/50], Loss: 1.8640
Epoch [21/50], Loss: 1.3789
Epoch [22/50], Loss: 1.6881
Epoch [23/50], Loss: 1.4980
Epoch [24/50], Loss: 1.4698
Epoch [25/50], Loss: 2.0576
Epoch [26/50], Loss: 1.5076
Epoch [27/50], Loss: 1.4967
Epoch [28/50], Loss: 1.9779
Epoch [29/50], Loss: 1.5673
Epoch [30/50], Loss: 1.1279
Epoch [31/50], Loss: 1.3035
Epoch [32/50], Loss: 1.6537
Epoch [33/50], Loss: 1.6817
Epoch [34/50], Loss: 1.1341
Epoch [35/50], Loss: 2.4853
Epoch [36/50], Loss: 1.259

In [37]:
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader, TensorDataset
from sklearn.metrics import mean_squared_error, r2_score

# Separate features (X) and target (y) for train and test
X_train_np = X_train.values  # Convert DataFrame to numpy array
y_train_np = y_train.values
X_test_np = X_test.values
y_test_np = y_test.values

# Convert numpy arrays to PyTorch tensors
X_train_tensor = torch.tensor(X_train_np, dtype=torch.float32)
y_train_tensor = torch.tensor(y_train_np, dtype=torch.float32).view(-1, 1)
X_test_tensor = torch.tensor(X_test_np, dtype=torch.float32)
y_test_tensor = torch.tensor(y_test_np, dtype=torch.float32).view(-1, 1)



# Define model
class SimpleModel(nn.Module):
    def __init__(self, input_dim):
        super(SimpleModel, self).__init__()
        self.fc1 = nn.Linear(input_dim, 64)
        self.bn1 = nn.BatchNorm1d(64)  # Normalización Batch
        self.dropout1 = nn.Dropout(0.05)  # Dropout de 20%

        self.fc2 = nn.Linear(64, 32)
        self.bn2 = nn.BatchNorm1d(32)  # Normalización Batch
        self.dropout2 = nn.Dropout(0.1)  # Dropout de 20%

        self.fc3 = nn.Linear(32, 1)
        
    def forward(self, x):
        x = torch.relu(  self.fc1(x)  )
        # x = self.dropout1(x)
        x = torch.relu( self.fc2(x)   ) 
        # x = self.dropout2(x)
        x = self.fc3(x)
        return x

# Initialize model
model = SimpleModel(X_train.shape[1])


class RMSELoss(nn.Module):
    def forward(self, y_pred, y_true):
        return torch.sqrt(nn.MSELoss()(y_pred, y_true) + 1e-8)

criterion = RMSELoss()

optimizer = optim.Adam(model.parameters(), lr=0.0001)

# DataLoader
train_dataset = TensorDataset(X_train_tensor, y_train_tensor)
train_loader = DataLoader(train_dataset, batch_size=64, shuffle=True)

# Train model
num_epochs = 50
for epoch in range(num_epochs):
    for batch_X, batch_y in train_loader:
        optimizer.zero_grad()
        y_pred = model(batch_X)
        loss = criterion(y_pred, batch_y)
        loss.backward()
        optimizer.step()
    
    print(f'Epoch [{epoch+1}/{num_epochs}], Loss: {loss.item():.4f}')

    
# Evaluate
with torch.no_grad():
    y_pred = model(X_test_tensor).numpy()

y_test_np = y_test_tensor.numpy()
print('RMSE:', mean_squared_error(y_test_np, y_pred, squared=False))
print('R2 Score:', r2_score(y_test_np, y_pred))


Epoch [1/50], Loss: 12.7055
Epoch [2/50], Loss: 11.1755
Epoch [3/50], Loss: 14.5827
Epoch [4/50], Loss: 14.9308
Epoch [5/50], Loss: 11.1173
Epoch [6/50], Loss: 11.2879
Epoch [7/50], Loss: 8.4283
Epoch [8/50], Loss: 9.0800
Epoch [9/50], Loss: 6.9546
Epoch [10/50], Loss: 3.8544
Epoch [11/50], Loss: 3.8431
Epoch [12/50], Loss: 2.3537
Epoch [13/50], Loss: 2.7514
Epoch [14/50], Loss: 1.8905
Epoch [15/50], Loss: 2.6046
Epoch [16/50], Loss: 1.8857
Epoch [17/50], Loss: 1.7869
Epoch [18/50], Loss: 1.7176
Epoch [19/50], Loss: 1.8138
Epoch [20/50], Loss: 2.1267
Epoch [21/50], Loss: 1.6524
Epoch [22/50], Loss: 1.1641
Epoch [23/50], Loss: 2.5789
Epoch [24/50], Loss: 1.9459
Epoch [25/50], Loss: 0.7264
Epoch [26/50], Loss: 1.3648
Epoch [27/50], Loss: 3.0549
Epoch [28/50], Loss: 1.9294
Epoch [29/50], Loss: 1.4346
Epoch [30/50], Loss: 1.9572
Epoch [31/50], Loss: 1.5490
Epoch [32/50], Loss: 2.4520
Epoch [33/50], Loss: 1.5640
Epoch [34/50], Loss: 1.2576
Epoch [35/50], Loss: 1.0150
Epoch [36/50], Loss: 1.

### **Linear Regression**

In [9]:
from sklearn.linear_model import LinearRegression

# Train model
model = LinearRegression()
model.fit(X_train, y_train)

# Evaluate
y_pred = model.predict(X_test)
print('RMSE:', mean_squared_error(y_test, y_pred, squared=False))
print('R2 Score:', r2_score(y_test, y_pred))

RMSE: 0.9526760174890128
R2 Score: 0.9962759234057789


**Implementation Neural Network Sequetial**

In [36]:
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader, TensorDataset
from sklearn.metrics import mean_squared_error, r2_score

# Separate features (X) and target (y) for train and test
X_train_np = X_train.values  # Convert DataFrame to numpy array
y_train_np = y_train.values
X_test_np = X_test.values
y_test_np = y_test.values

# Convert numpy arrays to PyTorch tensors
X_train_tensor = torch.tensor(X_train_np, dtype=torch.float32)
y_train_tensor = torch.tensor(y_train_np, dtype=torch.float32).view(-1, 1)
X_test_tensor = torch.tensor(X_test_np, dtype=torch.float32)
y_test_tensor = torch.tensor(y_test_np, dtype=torch.float32).view(-1, 1)


# Define model using Sequential
model = nn.Sequential(
    nn.Linear(X_train.shape[1], 64),
   # nn.BatchNorm1d(64),
    nn.ReLU(), 
    nn.Linear(64, 32),
    nn.ReLU(),
    nn.Linear(32, 1)
)



# Loss and optimizer


class RMSELoss(nn.Module):
    def forward(self, y_pred, y_true):
        return torch.sqrt(nn.MSELoss()(y_pred, y_true))

criterion = RMSELoss()

optimizer = optim.Adam(model.parameters(), lr=0.0001)

# DataLoader
train_dataset = TensorDataset(X_train_tensor, y_train_tensor)
train_loader = DataLoader(train_dataset, batch_size=64, shuffle=True)

# Train model
for epoch in range(50):
    for batch_X, batch_y in train_loader:
        optimizer.zero_grad()
        y_pred = model(batch_X)
        loss = criterion(y_pred, batch_y)
        loss.backward()
        optimizer.step()

    print(f'Epoch [{epoch+1}/{num_epochs}], Loss: {loss.item():.4f}')

# Evaluate
with torch.no_grad():
    y_pred = model(X_test_tensor).numpy()

y_test_np = y_test_tensor.numpy()
print('RMSE:', mean_squared_error(y_test_np, y_pred, squared=False))
print('R2 Score:', r2_score(y_test_np, y_pred))


Epoch [1/30], Loss: 9.3605
Epoch [2/30], Loss: 16.4405
Epoch [3/30], Loss: 14.2831
Epoch [4/30], Loss: 6.5444
Epoch [5/30], Loss: 6.0547
Epoch [6/30], Loss: 7.1938
Epoch [7/30], Loss: 4.1058
Epoch [8/30], Loss: 3.8899
Epoch [9/30], Loss: 2.7259
Epoch [10/30], Loss: 3.1827
Epoch [11/30], Loss: 2.1833
Epoch [12/30], Loss: 1.6510
Epoch [13/30], Loss: 1.5384
Epoch [14/30], Loss: 2.7545
Epoch [15/30], Loss: 1.8946
Epoch [16/30], Loss: 2.9216
Epoch [17/30], Loss: 1.9433
Epoch [18/30], Loss: 4.5035
Epoch [19/30], Loss: 2.0331
Epoch [20/30], Loss: 3.1704
Epoch [21/30], Loss: 1.5661
Epoch [22/30], Loss: 2.5183
Epoch [23/30], Loss: 1.4569
Epoch [24/30], Loss: 1.4632
Epoch [25/30], Loss: 1.3138
Epoch [26/30], Loss: 1.3630
Epoch [27/30], Loss: 2.2453
Epoch [28/30], Loss: 1.5302
Epoch [29/30], Loss: 2.0054
Epoch [30/30], Loss: 2.5792
Epoch [31/30], Loss: 2.0569
Epoch [32/30], Loss: 2.0166
Epoch [33/30], Loss: 0.9535
Epoch [34/30], Loss: 2.1010
Epoch [35/30], Loss: 1.6253
Epoch [36/30], Loss: 1.2705

### **Ensemble Method**

In [11]:
from sklearn.ensemble import VotingRegressor
from sklearn.linear_model import LinearRegression
from sklearn.ensemble import RandomForestRegressor
from xgboost import XGBRegressor

# Define models
model1 = LinearRegression()
model2 = RandomForestRegressor(n_estimators=100, random_state=42)
model3 = XGBRegressor(n_estimators=100, learning_rate=0.1, random_state=42)

# Create ensemble
ensemble = VotingRegressor(estimators=[('lr', model1), ('rf', model2), ('xgb', model3)])
ensemble.fit(X_train, y_train)

# Evaluate
y_pred = ensemble.predict(X_test)
print('RMSE:', mean_squared_error(y_test, y_pred, squared=False))
print('R2 Score:', r2_score(y_test, y_pred))

RMSE: 0.6538504200832819
R2 Score: 0.9982457763818717


The best implementation are Random Forest and XGboost