<h2> Finding the best Channel Model for an Optical Fiber Channel </h2>

<h3> Setup: imports <h3>

In [None]:
import pandas as pd
import torch
import torch.nn as nn
import torch.optim as optim
from sklearn.preprocessing import MinMaxScaler
from torch.utils.data import TensorDataset, DataLoader
import matplotlib.pyplot as plt
import numpy as np
from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score
import optuna
import plotly.graph_objects as go
import json
import os
print(os.getcwd())
path = "model results/basic/QPSK/best_params.json"

# Ensure the parent directory exists
#os.makedirs(os.path.dirname(path), exist_ok=True)

# Example data to save
best_params = {"param1": 0.1, "param2": 0.2}

# Attempt to write the file
try:
    with open(path, "w") as json_file:
        json.dump(best_params, json_file, indent=4)
        print(f"JSON file successfully saved to: {os.path.abspath(path)}")
except Exception as e:
    print(f"Error while saving JSON file: {e}")

<h3> Step 1 - Obtain fraction of the dataset and split it into train,test, and validation subsets </h3>
We obtain a fraction, because the original dataset size is huge and would take too long if we used all of it. 

In [None]:
# Function to obtain training, validation, and testing datasets
def obtain_datasets(fraction):
    # Helper function to load a fraction of the dataset
    def load_fraction(file_path, fraction):
        # Load the entire dataset
        full_data = pd.read_csv(file_path, sep = r'\s+', header=None, names=["Time", "Amplitude"]).to_numpy()
        
        # Calculate the number of rows to load
        num_rows = int(len(full_data) * fraction)
        
        # Select the first `num_rows` rows (contiguous block)
        sampled_data = full_data[:num_rows]
        
        return sampled_data

    # Load sub-versions of each dataset
    pam_input_data = load_fraction("data/PAM-4 Input Data.txt", fraction)
    pam_output_data = load_fraction("data/PAM-4 Output Data.txt", fraction)

    qpsk_input_data = load_fraction("data/QPSK Input Data.txt", fraction)
    qpsk_output_data = load_fraction("data/QPSK Output Data.txt", fraction)

    qam_input_data = load_fraction("data/16-QAM Input Data.txt", fraction)
    qam_output_data = load_fraction("data/16-QAM Output Data.txt", fraction)

    # Helper function to split into train, validation, and test sets
    def obtain_train_validate_test(data):
        # Compute sizes
        train_size = int(0.7 * len(data))
        val_size = int(0.15 * len(data))
        test_size = len(data) - train_size - val_size

        # Perform splits
        train_data = data[:train_size]
        val_data = data[train_size:train_size + val_size]
        test_data = data[train_size + val_size:]

        return train_data, val_data, test_data

    # Split PAM datasets
    pam_train_input, pam_val_input, pam_test_input = obtain_train_validate_test(pam_input_data)
    pam_train_output, pam_val_output, pam_test_output = obtain_train_validate_test(pam_output_data)

    # Split QPSK datasets
    qpsk_train_input, qpsk_val_input, qpsk_test_input = obtain_train_validate_test(qpsk_input_data)
    qpsk_train_output, qpsk_val_output, qpsk_test_output = obtain_train_validate_test(qpsk_output_data)

    # Split QAM datasets
    qam_train_input, qam_val_input, qam_test_input = obtain_train_validate_test(qam_input_data)
    qam_train_output, qam_val_output, qam_test_output = obtain_train_validate_test(qam_output_data)

    # Organize everything into a dictionary for easy access
    datasets = {
        "PAM": {
            "train": (pam_train_input, pam_train_output),
            "validate": (pam_val_input, pam_val_output),
            "test": (pam_test_input, pam_test_output)
        },
        "QPSK": {
            "train": (qpsk_train_input, qpsk_train_output),
            "validate": (qpsk_val_input, qpsk_val_output),
            "test": (qpsk_test_input, qpsk_test_output)
        },
        "QAM": {
            "train": (qam_train_input, qam_train_output),
            "validate": (qam_val_input, qam_val_output),
            "test": (qam_test_input, qam_test_output)
        }
    }

    return datasets


Testing this function

In [None]:
# Specify the fraction of data to use (e.g., 0.1 for 10%)
fraction = 1

# Get the datasets
datasets = obtain_datasets(fraction)

# Access the PAM training dataset
pam_train_input, pam_train_output = datasets["PAM"]["train"]

# Access the QPSK validation dataset
qpsk_val_input, qpsk_val_output = datasets["QPSK"]["validate"]

# Access the QAM test dataset
qam_test_input, qam_test_output = datasets["QAM"]["test"]

print("PAM Training Input Shape:", pam_train_input.shape)
print("PAM Training Output Shape:", pam_train_output.shape)



# Plot the PAM training data
plt.figure(figsize=(10, 6))

# Input data plot
plt.plot(qpsk_val_input[:, 0], qpsk_val_input[:, 1], label="Input Data", color="blue", alpha=0.7)

# Output data plot
plt.plot(qpsk_val_output[:, 0], qpsk_val_output[:, 1], label="Output Data", color="red", alpha=0.7)

# Add labels and legend
plt.title("QPSK Testing Data: Input vs. Output", fontsize=14)
plt.xlabel("Time", fontsize=12)
plt.ylabel("Amplitude", fontsize=12)
plt.legend(fontsize=12)
plt.grid(True)
plt.tight_layout()

# Show the plot
plt.show()


<h3>Step 2 - Data Processing </h3>

In [None]:
input_paths = ["data/QPSK Input Data.txt"]
output_paths = ["data/QPSK Output Data.txt"]

#initialize lists
input_data = []
output_data = []

for i in range(len(input_paths)):

    input_data_temp = pd.read_csv(input_paths[i], sep = r'\s+', header = None, names = ["Time", "Amplitude"]).to_numpy()
    output_data_temp = pd.read_csv(output_paths[i], sep = r'\s+', header = None, names = ["Time", "Amplitude"]).to_numpy()

    # Align data sizes by truncating to the minimum length
    if len(input_data_temp) != len(output_data_temp):
        min_length = min(len(input_data_temp), len(output_data_temp))
        input_data_temp = input_data_temp[:min_length]
        output_data_temp = output_data_temp[:min_length]

    # Replace NaN values with the mean of the column
    if np.isnan(input_data_temp[:, 1]).any():# Check for NaN values
        input_mean = np.nanmean(input_data_temp[:, 1])  # Mean ignoring NaNs
        input_data_temp[np.isnan(input_data_temp[:, 1]), 1] = input_mean

    if np.isnan(output_data_temp[:, 1]).any():  # Check for NaN values
        output_mean = np.nanmean(output_data_temp[:, 1])  # Mean ignoring NaNs
        output_data_temp[np.isnan(output_data_temp[:, 1]), 1] = output_mean

    #append arrays to list
    input_data.append(input_data_temp)
    output_data.append(output_data_temp)

#convert lists to numpy arrays
input_data = np.vstack(input_data)
output_data = np.vstack(output_data)

print(f"Input data shape: {input_data.shape}")
print(f"Output data shape: {output_data.shape}")

# Validate raw data
print(f"Number of datasets: {len(input_data)}")
print(f"First dataset input shape: {input_data[0].shape}")
print(f"First dataset output shape: {output_data[0].shape}")

# Standardize input and output data
input_mean, input_std = input_data[:, 1].mean(), input_data[:, 1].std()
output_mean, output_std = output_data[:, 1].mean(), output_data[:, 1].std()

input_data[:, 1] = (input_data[:, 1] - input_mean) / input_std
output_data[:, 1] = (output_data[:, 1] - output_mean) / output_std

# Validate Standardized data
print("\nStandardized Input Data (First 5 rows):")
print(input_data[:5])
print("Standardized Output Data (First 5 rows):")
print(output_data[:5])

data = np.column_stack((input_data[:, 0], input_data[:, 1], output_data[:, 1]))

# Validate combined data
print("\nCombined Data (First 5 rows):")
print(data[:5])


# Sliding window function
def create_windows(data, window_size, step_size):
    num_windows = (len(data) - window_size) // step_size + 1
    windows = np.array([
        data[i:i + window_size]  # Extract rows for each window
        for i in range(0, num_windows * step_size, step_size)
    ])
    return windows


# Define window size and step size
window_size = 512
step_size = 1
middle_index = window_size // 2
windows = create_windows(data, window_size, step_size)

# Validate sliding windows
print("\nSliding Windows (Shape):", windows.shape)
print("First Sliding Window (First 5 rows):")
print(windows[0][:5])

# Flatten the input windows for FNN
X = windows[:, :, 1].reshape(windows.shape[0], -1)  # Amplitude only
y = windows[:, middle_index, 2]  # Output amplitude for the middle time step
time_index = windows[:, :, 0]  # Time values retained for indexing

# Validate flattened input and output
print("\nFlattened Input X (Shape):", X.shape)
print("First Flattened Input X (First 5 values):")
print(X[0][:5])
print("\nOutput y (Shape):", y.shape)
print("First 5 Output Values y:")
print(y[:5])
print("\nTime Index (Shape):", time_index.shape)
print("First Time Index (First 5 rows):")
print(time_index[:5])

# Split data into training, validation, and testing sets
train_size = int(0.7 * len(X))
val_size = int(0.15 * len(X))
test_size = len(X) - train_size - val_size

X_train, y_train, time_train = X[:train_size], y[:train_size], time_index[:train_size]
X_val, y_val, time_val = X[train_size:train_size + val_size], y[train_size:train_size + val_size], time_index[train_size:train_size + val_size]
X_test, y_test, time_test = X[train_size + val_size:], y[train_size + val_size:], time_index[train_size + val_size:]

# Validate splits
print("\nTraining Set Shapes:")
print("X_train:", X_train.shape, "y_train:", y_train.shape)
print("\nValidation Set Shapes:")
print("X_val:", X_val.shape, "y_val:", y_val.shape)
print("\nTest Set Shapes:")
print("X_test:", X_test.shape, "y_test:", y_test.shape)

# Convert data to PyTorch tensors
X_train_tensor = torch.tensor(X_train, dtype=torch.float32)
y_train_tensor = torch.tensor(y_train, dtype=torch.float32).view(-1, 1)
X_val_tensor = torch.tensor(X_val, dtype=torch.float32)
y_val_tensor = torch.tensor(y_val, dtype=torch.float32).view(-1, 1)
X_test_tensor = torch.tensor(X_test, dtype=torch.float32)
y_test_tensor = torch.tensor(y_test, dtype=torch.float32).view(-1, 1)

# Validate tensor shapes
print("\nTensor Shapes:")
print("X_train_tensor:", X_train_tensor.shape, "y_train_tensor:", y_train_tensor.shape)
print("X_val_tensor:", X_val_tensor.shape, "y_val_tensor:", y_val_tensor.shape)
print("X_test_tensor:", X_test_tensor.shape, "y_test_tensor:", y_test_tensor.shape)

# Create TensorDatasets
train_dataset = TensorDataset(X_train_tensor, y_train_tensor)
val_dataset = TensorDataset(X_val_tensor, y_val_tensor)
test_dataset = TensorDataset(X_test_tensor, y_test_tensor)

# Create DataLoaders
batch_size = 1024
train_loader = DataLoader(train_dataset, batch_size=batch_size, num_workers= 4,shuffle=True, pin_memory=True)
val_loader = DataLoader(val_dataset, batch_size=batch_size, num_workers= 4,shuffle=False, pin_memory=True)
test_loader = DataLoader(test_dataset, batch_size=batch_size, num_workers= 4,shuffle=False, pin_memory=True)

# Validate DataLoader
print("\nDataLoader Validation:")
for batch_idx, (X_batch, y_batch) in enumerate(train_loader):
    print(f"Batch {batch_idx + 1} - X_batch Shape: {X_batch.shape}, y_batch Shape: {y_batch.shape}")
    break  # Print only the first batch

<h3>Step 3 - NN models </h3>

<h4> model 1: Basic Model </h4>

- This is a shallow neural network serving as a baseline for fiber optic data regression tasks.
- Features two hidden layers with progressive dimensionality reduction (hidden_dim to hidden_dim // 2).
- Incorporates dropout layers and batch normalization for regularization and training stability.
- Prioritizes simplicity and computational efficiency.
- Acts as a benchmark for performance comparison with other architectures.



In [None]:
# Define the basic FNN model
class FiberOpticFNN0(nn.Module):
    def __init__(self, input_dim, hidden_dim, output_dim, dropout):
        super(FiberOpticFNN0, self).__init__()
        self.fc = nn.Sequential(
            nn.Linear(input_dim, hidden_dim),
            nn.BatchNorm1d(hidden_dim),  # Batch normalization
            nn.ReLU(),
            nn.Dropout(dropout),  # Dropout for regularization
            nn.Linear(hidden_dim, hidden_dim // 2),
            nn.BatchNorm1d(hidden_dim // 2),  # Batch normalization
            nn.ReLU(),
            nn.Dropout(dropout),
            nn.Linear(hidden_dim // 2, output_dim)
        )

    def forward(self, x):
        return self.fc(x)


<h4> model 2: Deeper model </h4>

- This is a deeper neural network designed to enhance modeling capacity for fiber optic data regression.
- Consists of three hidden layers, maintaining hidden_dim for two layers before reducing to hidden_dim // 2.
- Employs dropout layers and batch normalization to mitigate overfitting and improve training stability.
- Leverages additional depth to capture complex patterns and nuances in data.
- Serves as an extended architecture to evaluate the benefits of increased depth over simpler models


In [None]:
# Define the deeper model
class FiberOpticFNN1(nn.Module):
    def __init__(self, input_dim, hidden_dim, output_dim, dropout):
        super(FiberOpticFNN1, self).__init__()
        self.fc = nn.Sequential(
            nn.Linear(input_dim, hidden_dim),
            nn.BatchNorm1d(hidden_dim),
            nn.ReLU(),
            nn.Dropout(dropout),
            nn.Linear(hidden_dim, hidden_dim),
            nn.BatchNorm1d(hidden_dim),
            nn.ReLU(),
            nn.Dropout(dropout),
            nn.Linear(hidden_dim, hidden_dim // 2),
            nn.BatchNorm1d(hidden_dim // 2),
            nn.ReLU(),
            nn.Dropout(dropout),
            nn.Linear(hidden_dim // 2, output_dim)
        )

    def forward(self, x):
        return self.fc(x)

<h4> model 3: Wider Model </h4>

- This is a wider neural network architecture
- Begins with a significantly wider first hidden layer (hidden_dim * 2) to enhance feature extraction capacity.
- Progressively narrows through subsequent layers, reducing to hidden_dim and then hidden_dim // 2.
- Incorporates batch normalization and dropout layers to improve training stability and mitigate overfitting.
- Aims to assess the benefits of increased layer width in capturing complex data patterns compared to deeper or simpler models.


In [None]:
# Define the wider model
class FiberOpticFNN2(nn.Module):
    def __init__(self, input_dim, hidden_dim, output_dim, dropout):
        super(FiberOpticFNN2, self).__init__()
        self.fc = nn.Sequential(
            nn.Linear(input_dim, hidden_dim * 2),
            nn.BatchNorm1d(hidden_dim * 2),
            nn.ReLU(),
            nn.Dropout(dropout),
            nn.Linear(hidden_dim * 2, hidden_dim),
            nn.BatchNorm1d(hidden_dim),
            nn.ReLU(),
            nn.Dropout(dropout),
            nn.Linear(hidden_dim, hidden_dim // 2),
            nn.BatchNorm1d(hidden_dim // 2),
            nn.ReLU(),
            nn.Dropout(dropout),
            nn.Linear(hidden_dim // 2, output_dim)
        )
    def forward(self, x):
        return self.fc(x)

<h4> model 4: Dynamic Model </h4>

- This is a dynamic neural network architecture
- Features a progressively shrinking hidden layer size, starting from hidden_dim and reducing by a factor of 0.75 and 0.5 in subsequent layers.
- Employs batch normalization after each layer to stabilize training and dropout to reduce overfitting.
- Incorporates ReLU activations for non-linearity and efficient feature learning.
- Evaluates the effectiveness of a dynamically shrinking architecture in balancing complexity and computational efficiency.


In [None]:
# Define the dynamic model
class FiberOpticFNN3(nn.Module):
    def __init__(self, input_dim, hidden_dim, output_dim, dropout):
        super(FiberOpticFNN3, self).__init__()
        self.fc = nn.Sequential(
            nn.Linear(input_dim, hidden_dim),
            nn.BatchNorm1d(hidden_dim),
            nn.ReLU(),
            nn.Dropout(dropout),
            nn.Linear(hidden_dim, int(hidden_dim * 0.75)),
            nn.BatchNorm1d(int(hidden_dim * 0.75)),
            nn.ReLU(),
            nn.Dropout(dropout),
            nn.Linear(int(hidden_dim * 0.75), int(hidden_dim * 0.5)),
            nn.BatchNorm1d(int(hidden_dim * 0.5)),
            nn.ReLU(),
            nn.Dropout(dropout),
            nn.Linear(int(hidden_dim * 0.5), output_dim)
        )
    def forward(self, x):
        return self.fc(x)

<h4> model 5: Noise Resilient Model </h4>

- This is explicitly designed to be noise-resilient, making it adept at capturing small but frequent deviations in fiber optic data, which may appear noise-like.
- The model begins with a feature extractor, which maps the input into a higher-dimensional representation. This step ensures that the core characteristics of the data are well-represented for further processing. The use of batch normalization and ReLU activation enhances stability and non-linearity.
- A specialized noise-focused branch is incorporated to target and capture subtle variations in the data. By reducing the feature dimension and employing a Tanh activation function, this branch emphasizes small deviations while minimizing overfitting with dropout.
- A residual pathway is added to preserve the original feature representation from the extractor. This helps the model maintain key input information while refining the features for the final prediction.
- The model combines features from the noise-sensitive branch and the residual pathway through concatenation, creating a rich feature set for prediction. This enables the model to balance sensitivity to noise with robustness in prediction.
- The combined features are passed through a fully connected layer with batch normalization and ReLU activation before producing the final output. This ensures effective learning of complex patterns while maintaining regularization.


In [None]:
# Define the noise-resilient model
class FiberOpticFNN4(nn.Module):
    def __init__(self, input_dim, hidden_dim, output_dim, dropout):
        super(FiberOpticFNN4, self).__init__()

        # Initial feature extraction
        self.feature_extractor = nn.Sequential(
            nn.Linear(input_dim, hidden_dim),
            nn.BatchNorm1d(hidden_dim),
            nn.ReLU()
        )

        # Noise-focused branch (captures small deviations)
        self.noise_branch = nn.Sequential(
            nn.Linear(hidden_dim, hidden_dim // 2),
            nn.BatchNorm1d(hidden_dim // 2),
            nn.Tanh(),
            nn.Dropout(dropout)
        )

        # Residual connection for refined outputs
        self.residual = nn.Linear(hidden_dim, hidden_dim)

        # Final layer combining noise and refined features
        self.combined = nn.Sequential(
            nn.Linear(hidden_dim + hidden_dim // 2, hidden_dim),
            nn.BatchNorm1d(hidden_dim),
            nn.ReLU(),
            nn.Linear(hidden_dim, output_dim)
        )

    def forward(self, x):
        # Extract features
        features = self.feature_extractor(x)

        # Process noise-sensitive features
        noise_features = self.noise_branch(features)

        # Add residual connection
        refined_features = features + self.residual(features)

        # Combine noise-sensitive and refined features
        combined_input = torch.cat((refined_features, noise_features), dim=1)

        # Final output
        output = self.combined(combined_input)
        return output

<h4> model 6: Residual Connection Model </h4>

- This is a neural network model designed to improve the learning process using residual connections.
- Starts with a hidden layer that applies batch normalization and ReLU activation for non-linearity, allowing the model to capture complex patterns.
- Introduces a residual connection between the input and hidden layer, enabling the model to retain original features while learning refined representations, which helps prevent vanishing gradients and enhances training stability.
- The output layer generates predictions by mapping the hidden features to the target output dimension.
- Evaluates the effectiveness of residual connections in maintaining feature integrity while refining complex data representations, particularly useful for fiber optic signal processing tasks.


In [None]:
# Define the Residual Connections model
class FiberOpticFNN5(nn.Module):
    def __init__(self, input_dim, hidden_dim, output_dim):
        super(FiberOpticFNN5, self).__init__()
        self.input_layer = nn.Sequential(
            nn.Linear(input_dim, hidden_dim),
            nn.BatchNorm1d(hidden_dim),
            nn.ReLU(),
        )
        self.hidden_layer = nn.Sequential(
            nn.Linear(hidden_dim, hidden_dim),
            nn.BatchNorm1d(hidden_dim),
            nn.ReLU(),
        )
        self.output_layer = nn.Sequential(
            nn.Linear(hidden_dim, output_dim)
        )

    def forward(self, x):
        x = self.input_layer(x)
        x = x + self.hidden_layer(x)  # Residual connection
        return self.output_layer(x)

<h3> Step 4 - Training loop </h3>

In [None]:
def train_model(model, train_loader, val_loader, criterion, optimizer, num_epochs=100, patience=10):

    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    model = model.to(device)

    train_losses = []
    val_losses = []
    best_weights = None

    best_val_loss = float('inf')
    patience_counter = 0

    for epoch in range(num_epochs):
        # Training phase
        model.train()
        running_loss = 0.0
        for X_batch, y_batch in train_loader:

            # Move data to GPU
            X_batch = X_batch.to(device)
            y_batch = y_batch.to(device)

            optimizer.zero_grad()
            predictions = model(X_batch)
            loss = criterion(predictions, y_batch)
            loss.backward()
            optimizer.step()
            running_loss += loss.item()

        train_losses.append(running_loss / len(train_loader))

        # Validation phase
        model.eval()
        val_loss = 0.0
        with torch.no_grad():
            for X_batch, y_batch in val_loader:

                # Move data to GPU
                X_batch = X_batch.to(device)
                y_batch = y_batch.to(device)

                predictions = model(X_batch)
                loss = criterion(predictions, y_batch)
                val_loss += loss.item()

        val_losses.append(val_loss / len(val_loader))

        print(f"Epoch {epoch + 1}/{num_epochs}, Train Loss: {train_losses[-1]:.4f}, Val Loss: {val_losses[-1]:.4f}")

        # Early stopping
        if val_losses[-1] < best_val_loss:
            best_val_loss = val_losses[-1]
            patience_counter = 0
            best_weights = {"model weights": model.state_dict()}
        else:
            patience_counter += 1
            if patience_counter >= patience:
                print("Early stopping triggered")
                break

    return train_losses, val_losses, best_weights

<h3> Step 5 - Define Objective functions </h3>

In [None]:
# Basic
def objective0(trial):

    best_val_loss = float("inf")

    # Ensure directory exists
    base_dir = "model results/basic/QPSK"
    os.makedirs(base_dir, exist_ok=True)

    # Hyperparameters to tune
    hidden_dim = trial.suggest_int("hidden dim", 128, 320, step=16)
    lr = trial.suggest_float("lr", 1e-5, 1e-2, log=True)
    weight_decay = trial.suggest_float("weight decay", 1e-6, 1e-3, log=True)
    dropout_rate = trial.suggest_float("dropout rate", 0.1, 0.5)

    # Instantiate the model
    model = FiberOpticFNN0(X_train.shape[1], hidden_dim, 1, dropout_rate)
    model = model.float()

    criterion = nn.MSELoss()  # Define loss function
    optimizer = optim.Adam(model.parameters(), lr=lr, weight_decay=weight_decay)  # Define optimizer

    # Train the model
    train_losses, val_losses, best_weights = train_model(
        model,
        train_loader=train_loader,
        val_loader=val_loader,
        criterion=criterion,
        optimizer=optimizer,
        num_epochs=100,
        patience=15
    )

    if min(val_losses) < best_val_loss:
        best_val_loss = min(val_losses)

        # Save model weights
        weights_path = os.path.join(base_dir, "model_weights.pth")
        torch.save(best_weights, weights_path)

        # Save best params
        best_params = {
            "hidden_dim": hidden_dim,
            "lr": lr,
            "weight_decay": weight_decay,
            "dropout_rate": dropout_rate,
            "train_losses": train_losses,
            "val_losses": val_losses
        }
        params_path = os.path.join(base_dir, "best_params.json")
        with open(params_path, "w") as json_file:
            json.dump(best_params, json_file, indent=4)
        print(f"Files saved successfully: {weights_path}, {params_path}")

    return best_val_loss

#deeper
def objective1(trial):

    best_val_loss = float("inf")

    # Hyperparameters to tune
    hidden_dim = trial.suggest_int("hidden dim", 128, 320, step= 16)
    lr = trial.suggest_float("lr", 1e-5, 1e-2, log = True)
    weight_decay = trial.suggest_float("weight decay", 1e-6, 1e-3, log =True)
    dropout_rate = trial.suggest_float("dropout rate", 0.1, 0.5)

    #instatiate the model
    model = FiberOpticFNN1(X_train.shape[1], hidden_dim, 1, dropout_rate)
    model = model.float()

    criterion = nn.MSELoss() #define loss function
    optimizer = optim.Adam(model.parameters(), lr=lr, weight_decay=weight_decay) #define optimizer

    # Train the model
    train_losses, val_losses, best_weights = train_model(
        model,
        train_loader=train_loader,
        val_loader=val_loader,
        criterion=criterion,
        optimizer=optimizer,
        num_epochs=100,
        patience=15
    )

    if min(val_losses) < best_val_loss:
        best_val_loss = min(val_losses)
        torch.save(best_weights, "model results/deeper/QPSK/model_weights.pth")
        best_params = {"hidden_dim": hidden_dim,
                       "lr": lr,
                       "weight_decay": weight_decay,
                       "dropout_rate": dropout_rate,
                       "train_losses": train_losses,
                       "val_losses": val_losses
                       }
        with open("model results/deeper/QPSK/best_params.json", "w") as json_file:
            json.dump(best_params, json_file, indent=4)

    return best_val_loss

# Wider
def objective2(trial):
    best_val_loss = float("inf")

    # Hyperparameters to tune
    hidden_dim = trial.suggest_int("hidden dim", 128, 320, step= 16)
    lr = trial.suggest_float("lr", 1e-5, 1e-2, log = True)
    weight_decay = trial.suggest_float("weight decay", 1e-6, 1e-3, log =True)
    dropout_rate = trial.suggest_float("dropout rate", 0.1, 0.5)

    #instatiate the model
    model = FiberOpticFNN2(X_train.shape[1], hidden_dim, 1, dropout_rate)
    model = model.float()

    criterion = nn.MSELoss() #define loss function
    optimizer = optim.Adam(model.parameters(), lr=lr, weight_decay=weight_decay) #define optimizer

    # Train the model
    train_losses, val_losses, best_weights = train_model(
        model,
        train_loader=train_loader,
        val_loader=val_loader,
        criterion=criterion,
        optimizer=optimizer,
        num_epochs=100,
        patience=15
    )

    if min(val_losses) < best_val_loss:
        best_val_loss = min(val_losses)
        torch.save(best_weights, "model results/wider/QPSK/model_weights.pth")
        best_params = {"hidden_dim": hidden_dim,
                       "lr": lr,
                       "weight_decay": weight_decay,
                       "dropout_rate": dropout_rate,
                       "train_losses": train_losses,
                       "val_losses": val_losses
                       }
        with open("model results/wider/QPSK/best_params.json", "w") as json_file:
            json.dump(best_params, json_file, indent=4)

    return best_val_loss

# Dynamic
def objective3(trial):
    best_val_loss = float("inf")

    # Hyperparameters to tune
    hidden_dim = trial.suggest_int("hidden dim", 128, 320, step= 16)
    lr = trial.suggest_float("lr", 1e-5, 1e-2, log = True)
    weight_decay = trial.suggest_float("weight decay", 1e-6, 1e-3, log =True)
    dropout_rate = trial.suggest_float("dropout rate", 0.1, 0.5)

    #instatiate the model
    model = FiberOpticFNN3(X_train.shape[1], hidden_dim, 1, dropout_rate)
    model = model.float()

    criterion = nn.MSELoss() #define loss function
    optimizer = optim.Adam(model.parameters(), lr=lr, weight_decay=weight_decay) #define optimizer

    # Train the model
    train_losses, val_losses, best_weights = train_model(
        model,
        train_loader=train_loader,
        val_loader=val_loader,
        criterion=criterion,
        optimizer=optimizer,
        num_epochs=100,
        patience=15
    )

    if min(val_losses) < best_val_loss:
        best_val_loss = min(val_losses)
        torch.save(best_weights, "model results/dynamic/QPSK/model_weights.pth")
        best_params = {"hidden_dim": hidden_dim,
                       "lr": lr,
                       "weight_decay": weight_decay,
                       "dropout_rate": dropout_rate,
                       "train_losses": train_losses,
                       "val_losses": val_losses
                       }
        with open("model results/dynamic/QPSK/best_params.json", "w") as json_file:
            json.dump(best_params, json_file, indent=4)

    return best_val_loss

# noise resilient
def objective4(trial):
    best_val_loss = float("inf")

    # Hyperparameters to tune
    hidden_dim = trial.suggest_int("hidden dim", 128, 320, step= 16)
    lr = trial.suggest_float("lr", 1e-5, 1e-2, log = True)
    weight_decay = trial.suggest_float("weight decay", 1e-6, 1e-3, log =True)
    dropout_rate = trial.suggest_float("dropout rate", 0.1, 0.5)

    #instatiate the model
    model = FiberOpticFNN4(X_train.shape[1], hidden_dim, 1, dropout_rate)
    model = model.float()

    criterion = nn.MSELoss() #define loss function
    optimizer = optim.Adam(model.parameters(), lr=lr, weight_decay=weight_decay) #define optimizer

    # Train the model
    train_losses, val_losses, best_weights = train_model(
        model,
        train_loader=train_loader,
        val_loader=val_loader,
        criterion=criterion,
        optimizer=optimizer,
        num_epochs=100,
        patience=15
    )

    if min(val_losses) < best_val_loss:
        best_val_loss = min(val_losses)
        torch.save(best_weights, "model results/NR/QPSK/model_weights.pth")
        best_params = {"hidden_dim": hidden_dim,
                       "lr": lr,
                       "weight_decay": weight_decay,
                       "dropout_rate": dropout_rate,
                       "train_losses": train_losses,
                       "val_losses": val_losses
                       }
        with open("model results/NR/QPSK/best_params.json", "w") as json_file:
            json.dump(best_params, json_file, indent=4)

    return best_val_loss

#Residual Connections
def objective5(trial):
    best_val_loss = float("inf")

    # Hyperparameters to tune
    hidden_dim = trial.suggest_int("hidden dim", 128, 320, step= 16)
    lr = trial.suggest_float("lr", 1e-5, 1e-2, log = True)
    weight_decay = trial.suggest_float("weight decay", 1e-6, 1e-3, log =True)
    #dropout_rate = trial.suggest_float("dropout rate", 0.1, 0.5)

    #instatiate the model
    model = FiberOpticFNN5(X_train.shape[1], hidden_dim, 1)
    model = model.float()

    criterion = nn.MSELoss() #define loss function
    optimizer = optim.Adam(model.parameters(), lr=lr, weight_decay=weight_decay) #define optimizer

    # Train the model
    train_losses, val_losses, best_weights = train_model(
        model,
        train_loader=train_loader,
        val_loader=val_loader,
        criterion=criterion,
        optimizer=optimizer,
        num_epochs=100,
        patience=15
    )
    if min(val_losses) < best_val_loss:
        best_val_loss = min(val_losses)
        torch.save(best_weights, "model results/residual/QPSK/model_weights.pth")
        best_params = {"hidden_dim": hidden_dim,
                       "lr": lr,
                       "weight_decay": weight_decay,
                       #"dropout_rate": dropout_rate,
                       "train_losses": train_losses,
                       "val_losses": val_losses
                       }
        with open("model results/residual/QPSK/best_params.json", "w") as json_file:
            json.dump(best_params, json_file, indent=4)

    return best_val_loss

<h3> Step 6 - Run the studies </h3>

In [None]:
#Create a studies for hyperparameter optimization
study0 = optuna.create_study(direction="minimize")  # Minimize the validation loss
study0.optimize(objective0, n_trials=100)   #Run 100 trials

In [None]:

study1 = optuna.create_study(direction="minimize")
study1.optimize(objective1, n_trials=100)

In [None]:

study2 = optuna.create_study(direction="minimize")
study2.optimize(objective2, n_trials=100)

study3 = optuna.create_study(direction="minimize")
study3.optimize(objective3, n_trials=100)

study4 = optuna.create_study(direction="minimize")
study4.optimize(objective4, n_trials=100)

study5 = optuna.create_study(direction="minimize")
study5.optimize(objective5, n_trials=100)

Epoch 10/100, Train Loss: 0.8029, Val Loss: 1.0514
Epoch 11/100, Train Loss: 0.7905, Val Loss: 1.0509
Epoch 12/100, Train Loss: 0.7791, Val Loss: 1.0683
Epoch 13/100, Train Loss: 0.7680, Val Loss: 1.0774
Epoch 14/100, Train Loss: 0.7561, Val Loss: 1.0702
Epoch 15/100, Train Loss: 0.7452, Val Loss: 1.0784


[I 2025-01-23 15:05:00,510] Trial 10 finished with value: 0.9951167145332733 and parameters: {'hidden dim': 320, 'lr': 6.392957632566066e-05, 'weight decay': 8.09785188900409e-06}. Best is trial 7 with value: 0.9894521944708639.


Epoch 16/100, Train Loss: 0.7351, Val Loss: 1.0835
Early stopping triggered
Epoch 1/100, Train Loss: 0.9547, Val Loss: 0.9938
Epoch 2/100, Train Loss: 0.9146, Val Loss: 1.0010
Epoch 3/100, Train Loss: 0.8985, Val Loss: 1.0004
Epoch 4/100, Train Loss: 0.8848, Val Loss: 1.0062
Epoch 5/100, Train Loss: 0.8712, Val Loss: 1.0123
Epoch 6/100, Train Loss: 0.8586, Val Loss: 1.0159
Epoch 7/100, Train Loss: 0.8464, Val Loss: 1.0269
Epoch 8/100, Train Loss: 0.8344, Val Loss: 1.0304
Epoch 9/100, Train Loss: 0.8236, Val Loss: 1.0401
Epoch 10/100, Train Loss: 0.8119, Val Loss: 1.0453
Epoch 11/100, Train Loss: 0.8006, Val Loss: 1.0571
Epoch 12/100, Train Loss: 0.7904, Val Loss: 1.0567
Epoch 13/100, Train Loss: 0.7794, Val Loss: 1.0777
Epoch 14/100, Train Loss: 0.7704, Val Loss: 1.0850
Epoch 15/100, Train Loss: 0.7599, Val Loss: 1.0770


[I 2025-01-23 15:06:54,125] Trial 11 finished with value: 0.993777049439294 and parameters: {'hidden dim': 272, 'lr': 6.775026468049846e-05, 'weight decay': 8.073469108958549e-06}. Best is trial 7 with value: 0.9894521944708639.


Epoch 16/100, Train Loss: 0.7509, Val Loss: 1.0851
Early stopping triggered
Epoch 1/100, Train Loss: 0.9650, Val Loss: 0.9960
Epoch 2/100, Train Loss: 0.9140, Val Loss: 1.0002
Epoch 3/100, Train Loss: 0.8975, Val Loss: 1.0026
Epoch 4/100, Train Loss: 0.8835, Val Loss: 1.0131
Epoch 5/100, Train Loss: 0.8705, Val Loss: 1.0179
