In [1]:
import torch
import torch.nn as nn
from torch.utils.data import DataLoader, Dataset
import pytorch_lightning as pl
import torch.optim as optim
import torch.utils.data as data
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.preprocessing import MinMaxScaler
import numpy as np
from numpy import exp, sum, log, log10


In [2]:
# Define a custom dataset class
class AircraftDataset(Dataset):
    def __init__(self, X, y):
        self.X = X
        self.y = y
    
    def __len__(self):
        return len(self.X)
    
    def __getitem__(self, idx):
        return self.X[idx], self.y[idx]


In [3]:
def get_data(io='Tabulated flight data.xlsx', sheet_name=0):
    return pd.read_excel(io, sheet_name)

path= 'Tabulated flight data.xlsx'
X_data= 'X'
y_data='y'

# Getting Data
allX=get_data(path, sheet_name=X_data)
ally=get_data(path, sheet_name=y_data)

In [4]:
allX

Unnamed: 0,Airspeed (Knots)(TAS),Altitude (FT)(ALT),Temperature (DEG)(TAT),Exhaust Temperature (DEG)(EGT),Mach Number(MACH)(MACH)
0,0.0,-17,14.50,24.750,0.0
1,0.0,-17,14.50,24.750,0.0
2,0.0,-18,14.50,24.750,0.0
3,0.0,-18,14.50,24.625,0.0
4,0.0,-18,14.75,24.625,0.0
...,...,...,...,...,...
22879,0.0,867,26.25,240.500,0.0
22880,0.0,867,26.25,239.875,0.0
22881,0.0,866,26.25,239.125,0.0
22882,0.0,866,26.00,238.500,0.0


In [5]:
ally

Unnamed: 0,Thrust (%RPM)(N1),Ground Speed (Knots)(GS),Fuel Consumption (LBS/HR)
0,1.625000,0.0,0
1,1.625000,0.0,0
2,1.625000,0.0,0
3,1.625000,0.0,0
4,1.625000,0.0,0
...,...,...,...
22879,1.625000,0.0,0
22880,1.625000,0.0,0
22881,1.625000,0.0,0
22882,1.625000,0.0,0


In [6]:
class PerformancePredictionModel(pl.LightningModule):
    def __init__(self, input_size= 5, hidden_size = 64, output_size=3, dropout_rate=0.2):
        super().__init__()
        self.criterion = nn.MSELoss()
        self.flatten = nn.Flatten()
        self.fc1 = nn.Linear(input_size, hidden_size)
        self.fc2 = nn.Linear(hidden_size, hidden_size)
        self.fc3 = nn.Linear(hidden_size, output_size)
        self.dropout = nn.Dropout(dropout_rate)
        self.relu = nn.LeakyReLU()

    def forward(self, x):
        x = torch.relu(x)
        x = self.flatten(x)
        x = self.dropout(self.relu(self.fc1(x)))
        x = self.dropout(self.relu(self.fc2(x)))
        x = self.fc3(x)
        return x
        
    def training_step(self, batch, batch_idx):
        x, y = batch
        y_hat = self.forward(x)
        loss = self.criterion(y_hat, y)
        self.log('train_loss', loss)
        return loss

    def validation_step(self, batch, batch_idx):
        x, y = batch
        y_hat = self.forward(x)
        loss = self.criterion(y_hat, y)
        self.log('val_loss', loss)
        return loss

    def test_step(self, batch, batch_idx):
        x, y = batch
        y_hat = self.forward(x)
        loss = self.criterion(y_hat, y)
        self.log('test_loss', loss)
        return loss
        
    def configure_optimizers(self):
        # Use Adam optimizer for LM and BR
        return optim.Adam(self.parameters(), lr=0.01)
    
    def lm_train(self, train_loader, max_epochs=100):
        optimizer = torch.optim.LBFGS(self.parameters(), lr=0.001)  # Levenberg-Marquardt optimizer
        
        def closure():
            optimizer.zero_grad()
            predictions = self.forward(x)
            loss = self.criterion(predictions, y)
            loss.backward()
            return loss
        
        for epoch in range(max_epochs):
            self.train()
            for x, y in train_loader:
                optimizer.step(closure)
    
    def br_train(self, train_loader, max_epochs=100):
        optimizer = torch.optim.RMSprop(self.parameters(), lr=0.001, alpha=0.99)  # Bayesian regularization optimizer
        
        for epoch in range(max_epochs):
            self.train()
            for x, y in train_loader:
                predictions = self.forward(x)
                loss = self.criterion(predictions, y)
                optimizer.zero_grad()
                loss.backward()
                optimizer.step()

In [7]:
class DatasetModule(pl.LightningDataModule):
    def __init__(self, allX, ally):
        super().__init__()
        self.allX = allX
        self.ally = ally

        self.allX = self.allX.to_numpy().astype(np.float32)
        self.ally = self.ally.to_numpy().astype(np.float32)

        # Normalize data
        #self.scaler_X = StandardScaler().fit(self.allX)
        #self.allX = self.scaler_X.transform(self.allX)
        #self.scaler_y = StandardScaler().fit(self.ally)
        #self.ally = self.scaler_y.transform(self.ally)

        # Split data into training and validation sets
        self.X_train, self.X_test, self.y_train, self.y_test = train_test_split(self.allX, self.ally, test_size=0.2, random_state=42)
        self.X_train, self.X_val, self.y_train, self.y_val = train_test_split(self.X_train, self.y_train, test_size=0.25, random_state=42)

        self.scaler_X_train = MinMaxScaler().fit(self.X_train)
        self.X_train = self.scaler_X_train.transform(self.X_train)
        self.scaler_y_train = MinMaxScaler().fit(self.y_train)
        self.y_train = self.scaler_y_train.transform(self.y_train)
        
        self.scaler_X_test = MinMaxScaler().fit(self.X_test)
        self.X_test = self.scaler_X_test.transform(self.X_test)
        self.scaler_y_test = MinMaxScaler().fit(self.y_test)
        self.y_test = self.scaler_y_test.transform(self.y_test)
        
        self.scaler_X_val = MinMaxScaler().fit(self.X_val)
        self.X_val = self.scaler_X_val.transform(self.X_val)
        self.scaler_y_val = MinMaxScaler().fit(self.y_val)
        self.y_val = self.scaler_y_val.transform(self.y_val)

        self.train_set = AircraftDataset(self.X_train, self.y_train)
        self.valid_set = AircraftDataset(self.X_val, self.y_val)
        self.test_set = AircraftDataset(self.X_test, self.y_test)
        self.batch_size = 200  # batch size roughly 1/100th of large dataset

    def train_dataloader(self):
        train_dataloader = DataLoader(self.train_set, batch_size=self.batch_size, shuffle=True, num_workers=0) 
        return train_dataloader

    def val_dataloader(self):
        val_dataloader = DataLoader(self.valid_set, batch_size=self.batch_size, shuffle=False, num_workers=0)
        return val_dataloader

    def test_dataloader(self):
        test_dataloader = DataLoader(self.test_set, batch_size=self.batch_size, shuffle=False, num_workers=0)
        return test_dataloader

In [8]:
import warnings
warnings.filterwarnings("ignore", ".*does not have many workers.*")

### Testing with only adam optimizer

In [10]:
input_size = 5  # Airspeed, _Altitude, Air Temp, Exhaust Temp, Cruise Mach Number
hidden_size = 128
output_size = 3 # Thrust, Ground Speed, Fuel Consumption
dropout_rate = 0.2


In [11]:
# Create DatasetModule instance
DatasetM = DatasetModule(allX, ally)

In [12]:
# Initialize the model
model = PerformancePredictionModel(input_size=input_size, hidden_size = hidden_size, output_size=output_size, dropout_rate=dropout_rate)

# Initialize a trainer
trainer = pl.Trainer(devices="auto", accelerator="auto", log_every_n_steps= 1, max_epochs=100, enable_progress_bar=False)

# Train the model
trainer.fit(model, DatasetM)

# Get MSE of Training Sets
train_loss = trainer.callback_metrics['train_loss'].item()
print(f'Training MSE: {train_loss}')

# Get MSE of Validation Set
val_loss = trainer.callback_metrics['val_loss'].item()
print(f'Validation MSE: {val_loss}')

# Test the model
trainer.test(model, dataloaders=DatasetM)

GPU available: False, used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs

  | Name      | Type      | Params
----------------------------------------
0 | criterion | MSELoss   | 0     
1 | flatten   | Flatten   | 0     
2 | fc1       | Linear    | 768   
3 | fc2       | Linear    | 16.5 K
4 | fc3       | Linear    | 387   
5 | dropout   | Dropout   | 0     
6 | relu      | LeakyReLU | 0     
----------------------------------------
17.7 K    Trainable params
0         Non-trainable params
17.7 K    Total params
0.071     Total estimated model params size (MB)
`Trainer.fit` stopped: `max_epochs=100` reached.


Training MSE: 0.0017307059606537223
Validation MSE: 0.0010246665915474296


[{'test_loss': 0.000944719125982374}]

In [13]:
# Create DatasetModule instance
DatasetM = DatasetModule(allX, ally)

# Initialize the model
model = PerformancePredictionModel(input_size=input_size, hidden_size = hidden_size, output_size=output_size, dropout_rate=dropout_rate)

# Initialize a trainer
trainer = pl.Trainer(devices="auto", accelerator="auto", log_every_n_steps= 1, max_epochs=100, enable_progress_bar=False)

# Train with Levenberg-Marquardt optimizer
train_loader = DatasetM.train_dataloader()
model.lm_train(train_loader, max_epochs=100)

# Train the model
trainer.fit(model, DatasetM)

# Get MSE of Training Sets
train_loss = trainer.callback_metrics['train_loss'].item()
print(f'Training MSE: {train_loss}')

# Get MSE of Validation Set
val_loss = trainer.callback_metrics['val_loss'].item()
print(f'Validation MSE: {val_loss}')

# Test the model
trainer.test(model, dataloaders=DatasetM)

GPU available: False, used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs

  | Name      | Type      | Params
----------------------------------------
0 | criterion | MSELoss   | 0     
1 | flatten   | Flatten   | 0     
2 | fc1       | Linear    | 768   
3 | fc2       | Linear    | 16.5 K
4 | fc3       | Linear    | 387   
5 | dropout   | Dropout   | 0     
6 | relu      | LeakyReLU | 0     
----------------------------------------
17.7 K    Trainable params
0         Non-trainable params
17.7 K    Total params
0.071     Total estimated model params size (MB)
`Trainer.fit` stopped: `max_epochs=100` reached.


Training MSE: 0.001132642268203199
Validation MSE: 0.001031257095746696


[{'test_loss': 0.0009905369952321053}]

In [14]:
# Create DatasetModule instance
DatasetM = DatasetModule(allX, ally)

# Initialize the model
model = PerformancePredictionModel(input_size=input_size, hidden_size = hidden_size, output_size=output_size, dropout_rate=dropout_rate)

# Initialize a trainer
trainer = pl.Trainer(devices="auto", accelerator="auto", log_every_n_steps= 1, max_epochs=100, enable_progress_bar=False)

# Train with Bayesian regularization optimizer
train_loader = DatasetM.train_dataloader()
model.br_train(train_loader, max_epochs=100)

# Train the model
trainer.fit(model, DatasetM)

# Get MSE of Training Sets
train_loss = trainer.callback_metrics['train_loss'].item()
print(f'Training MSE: {train_loss}')

# Get MSE of Validation Set
val_loss = trainer.callback_metrics['val_loss'].item()
print(f'Validation MSE: {val_loss}')

# Test the model
trainer.test(model, dataloaders=DatasetM)

GPU available: False, used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs

  | Name      | Type      | Params
----------------------------------------
0 | criterion | MSELoss   | 0     
1 | flatten   | Flatten   | 0     
2 | fc1       | Linear    | 768   
3 | fc2       | Linear    | 16.5 K
4 | fc3       | Linear    | 387   
5 | dropout   | Dropout   | 0     
6 | relu      | LeakyReLU | 0     
----------------------------------------
17.7 K    Trainable params
0         Non-trainable params
17.7 K    Total params
0.071     Total estimated model params size (MB)
`Trainer.fit` stopped: `max_epochs=100` reached.


Training MSE: 0.0014377435436472297
Validation MSE: 0.0007794809644110501


[{'test_loss': 0.0007170778117142618}]

In [15]:
import optuna

In [16]:
def objective(trial):
    hidden_size = trial.suggest_int('hidden_size', 16, 256)
    dropout_rate = trial.suggest_float('dropout_rate', 0.1, 1)

    # Initialize the model with optimized hyperparameters
    model = PerformancePredictionModel(hidden_size=hidden_size, dropout_rate=dropout_rate)

    # Initialize a trainer
    trainer = pl.Trainer(devices="auto", accelerator="auto", log_every_n_steps= 1, max_epochs=50, enable_progress_bar=False)
    
    # Train with Bayesian regularization optimizer
    train_loader = DatasetM.train_dataloader()
    model.br_train(train_loader, max_epochs=10)
    
    # Train the model
    trainer.fit(model, DatasetM)

    # Return validation loss for optimization
    return trainer.callback_metrics['val_loss'].item()

# Create DatasetModule instance
DatasetM = DatasetModule(allX, ally)

# Perform hyperparameter optimization
study = optuna.create_study()
study.optimize(objective, n_trials=20)
best_params = study.best_params

[I 2024-05-20 18:31:38,661] A new study created in memory with name: no-name-2fae3e51-23b1-42d1-802b-371d9d11fbbf
GPU available: False, used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs

  | Name      | Type      | Params
----------------------------------------
0 | criterion | MSELoss   | 0     
1 | flatten   | Flatten   | 0     
2 | fc1       | Linear    | 786   
3 | fc2       | Linear    | 17.3 K
4 | fc3       | Linear    | 396   
5 | dropout   | Dropout   | 0     
6 | relu      | LeakyReLU | 0     
----------------------------------------
18.5 K    Trainable params
0         Non-trainable params
18.5 K    Total params
0.074     Total estimated model params size (MB)
`Trainer.fit` stopped: `max_epochs=50` reached.
[I 2024-05-20 18:32:12,547] Trial 0 finished with value: 0.05289389565587044 and parameters: {'hidden_size': 131, 'dropout_rate': 0.9681616545244155}. Best is trial 0 with value: 0.05289389565587044

In [17]:
# Initialize the best model with the optimized hyperparameters
best_model = PerformancePredictionModel(hidden_size=best_params['hidden_size'], dropout_rate=best_params['dropout_rate'])

# Initialize a trainer
trainer = pl.Trainer(devices="auto", accelerator="auto", log_every_n_steps= 1, max_epochs=100, enable_progress_bar=False)

# Train with Bayesian regularization optimizer
train_loader = DatasetM.train_dataloader()
best_model.br_train(train_loader, max_epochs=100)

# Train the model
trainer.fit(best_model, DatasetM)

GPU available: False, used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs

  | Name      | Type      | Params
----------------------------------------
0 | criterion | MSELoss   | 0     
1 | flatten   | Flatten   | 0     
2 | fc1       | Linear    | 1.0 K 
3 | fc2       | Linear    | 28.1 K
4 | fc3       | Linear    | 504   
5 | dropout   | Dropout   | 0     
6 | relu      | LeakyReLU | 0     
----------------------------------------
29.6 K    Trainable params
0         Non-trainable params
29.6 K    Total params
0.118     Total estimated model params size (MB)
`Trainer.fit` stopped: `max_epochs=100` reached.


In [18]:
# Get MSE of Training Set
train_loss = trainer.callback_metrics['train_loss'].item()
print(f'Training MSE: {train_loss}')

# Get MSE of Validation Set
val_loss = trainer.callback_metrics['val_loss'].item()
print(f'Validation MSE: {val_loss}')

# Test the model
trainer.test(best_model, dataloaders=DatasetM)

Training MSE: 0.0014634444378316402
Validation MSE: 0.0007065354147925973


[{'test_loss': 0.0006854256498627365}]