### Unsupervised GRU-VAE training  

In [4]:
import torch 
import torch.nn as nn
import torch.nn.functional as F
import numpy as np # For standard deviation calculation
from modbus import ModbusDataset,ModbusFlowStream
from sklearn.metrics import f1_score, accuracy_score
import torch.optim as optim
import pickle
import os 
from torch.utils.data import DataLoader
import time
import random
from utils import load_scalers
from random import SystemRandom

def compute_threshold(mse_values):
    """
    Computes the anomaly detection threshold (for marking sample as Intrusion if the IS was greater )
    based on the mean and standard deviation of Mean Squared Error (MSE) values.
    Formula: thr = mean(MSE) + std(MSE)

    Args:
        mse_values (torch.Tensor or list/np.array): A tensor or list of MSE values
                                                    obtained from the validation set.

    Returns:
        float: The calculated threshold.
    """
    if not isinstance(mse_values, torch.Tensor):
        mse_values = torch.tensor(mse_values, dtype=torch.float32)

    if mse_values.numel() == 0:
        return 0.0 
    mean_mse = torch.mean(mse_values)
    std_mse = torch.std(mse_values)

    threshold = mean_mse + std_mse
    return threshold.item() 




In [5]:

# AutoEncoder (AE)
class AE(nn.Module):
    """
    Encoder: (89-64-32)
    Decoder: (32-64-89)
    """
    def __init__(self):
        super(AE, self).__init__()
        self.encoder = nn.Sequential(
            nn.Linear(89, 64),
            nn.ReLU(),
            nn.Linear(64, 32),
            nn.ReLU()
        )
        self.decoder = nn.Sequential(
            nn.Linear(32, 64),
            nn.ReLU(),
            nn.Linear(64, 89),
            nn.ReLU()
        )

    def forward(self, x):
        z = self.encoder(x)
        x_recon = self.decoder(z)
        return x_recon

# GRU-VAE
class GRUVAE(nn.Module):
    """
    Gated Recurrent Unit : num_layers=2, hidden_size=256, dropout=0.01,window size (seq_len)= 40
    """
    def __init__(self, input_dim=89, hidden_dim=256, latent_dim=32, num_layers=2, dropout=0.01):
        super(GRUVAE, self).__init__()
        self.encoder_gru = nn.GRU(
            input_size=input_dim,
            hidden_size=hidden_dim,
            num_layers=num_layers,
            dropout=dropout,
            batch_first=True
        )
        self.fc_mu = nn.Linear(hidden_dim, latent_dim)
        self.fc_logvar = nn.Linear(hidden_dim, latent_dim)
        self.fc_z_to_hidden = nn.Linear(latent_dim, hidden_dim)
        self.decoder_gru = nn.GRU(
            input_size=input_dim,
            hidden_size=hidden_dim,
            num_layers=num_layers,
            dropout=dropout,
            batch_first=True
        )
        self.fc_out = nn.Linear(hidden_dim, input_dim)

    def reparameterize(self, mu, logvar):
        std = torch.exp(0.5 * logvar)
        eps = torch.randn_like(std)
        return mu + eps * std

    def forward(self, x):
        # x shape: [batch_size, seq_len, input_dim=89]
        _, hidden = self.encoder_gru(x) 
        h = hidden[-1]  # [batch_size, hidden_dim]
        mu = self.fc_mu(h)  
        logvar = self.fc_logvar(h)  
        z = self.reparameterize(mu, logvar)  # [batch_size, latent_dim]
        # repeat and feed latent z as input trick
        h0 = self.fc_z_to_hidden(z).unsqueeze(0).repeat(self.encoder_gru.num_layers, 1, 1)  # [num_layers, batch_size, hidden_dim]
        # Initialize decoder input with zeros 
        decoder_input = torch.zeros_like(x)
        output, _ = self.decoder_gru(decoder_input, h0)  # [batch_size, seq_len, hidden_dim]
        x_recon = self.fc_out(output)  # [batch_size, seq_len, input_dim]
        return x_recon, mu, logvar

In [3]:
dataset_directory = "./dataset" # change this to the folder contain benign and attack subdirs
modbus = ModbusDataset(dataset_directory,"ready")
modbus.summary_print()

 The CIC Modbus Dataset contains network (pcap) captures and attack logs from a simulated substation network.
                The dataset is categorized into two groups: an attack dataset and a benign dataset
                The attack dataset includes network traffic captures that simulate various types of Modbus protocol attacks in a substation environment.
                The attacks are reconnaissance, query flooding, loading payloads, delay response, modify length parameters, false data injection, stacking Modbus frames, brute force write and baseline replay.
                These attacks are based of some techniques in the MITRE ICS ATT&CK framework.
                On the other hand, the benign dataset consists of normal network traffic captures representing legitimate Modbus communication within the substation network.
                The purpose of this dataset is to facilitate research, analysis, and development of intrusion detection systems, anomaly detection algorithms and

In [7]:
loaded_scalers = load_scalers("fitted_scalers")

AE_dataset=ModbusFlowStream( 
    shuffle=True,chunk_size=1,batch_size=64,csv_files=modbus.dataset["benign_dataset_dir"][0:2],scalers=loaded_scalers['network-wide']['min_max_scalers'],window_size=1
)
AE_dataloader=DataLoader(AE_dataset,batch_size=1,shuffle=False)
csv_files=modbus.dataset["benign_dataset_dir"][0:5]
print(len(AE_dataloader))
device=torch.device("cuda" if torch.cuda.is_available() else "cpu")
AE_model = AE().to(device)
lr = 0.01
wd= 1e-4
shuffle_files =True
AE_optimizer = optim.Adam(AE_model.parameters(), lr=lr, weight_decay=wd)
criterion = nn.MSELoss(reduction='sum').to(device)



Successfully loaded scalers for 'network-wide'
4779


In [None]:

for epoch in range(3):
    time_1 = time.time()
    train_loss = 0
    AE_model.train()
    if shuffle_files:
        sys_rand = SystemRandom()
        sys_rand.shuffle(AE_dataset.file_order_indices)
    for sequences, _ in AE_dataloader:
        sequences=sequences.squeeze().to(device)
        AE_optimizer.zero_grad()
        recon = AE_model(sequences)
        loss = criterion(recon, sequences) 
        loss.backward()
        AE_optimizer.step()
        train_loss += loss.item()
    
    print("time",time.time()-time_1,f"Epoch {epoch}, Train Loss: {train_loss / len(AE_dataloader)}")



time 16.045210599899292 Epoch 0, Train Loss: 47.10480882214013
time 15.386170387268066 Epoch 1, Train Loss: 24.302919655720242
time 15.964890480041504 Epoch 2, Train Loss: 1.5458311491427024


KeyboardInterrupt: 

In [10]:

# Variational AutoEncoder (VAE)
class VAE(nn.Module):
    """
    Encoder: (89-64-64-32 for mu and log_var)
    Decoder: (32-64-64-89)
    return x_recon, mu, logvar
    """
    def __init__(self):
        super(VAE, self).__init__()
        self.encoder = nn.Sequential(
            nn.Linear(89, 64),
            nn.ReLU(),
            nn.Linear(64, 64),
            nn.ReLU()
        )
        self.fc_mu = nn.Linear(64, 32)
        self.fc_logvar = nn.Linear(64, 32)
        self.decoder = nn.Sequential(
            nn.Linear(32, 64),
            nn.ReLU(),
            nn.Linear(64, 64),
            nn.ReLU(),
            nn.Linear(64, 89),
            nn.ReLU()
        )

    def reparameterize(self, mu, logvar):
        std = torch.exp(0.5 * logvar)
        eps = torch.randn_like(std)
        return mu + eps * std

    def forward(self, x):
        h = self.encoder(x)
        mu = self.fc_mu(h)
        logvar = self.fc_logvar(h)
        z = self.reparameterize(mu, logvar)
        x_recon = self.decoder(z)
        return x_recon, mu, logvar


def vae_loss_function(recon_x, x, mu, logvar,beta =1):
    """
    VAE loss function.
    """
    BCE = nn.functional.mse_loss(recon_x, x, reduction='sum')
    KLD = -0.5 * torch.sum(1 + logvar - mu.pow(2) - logvar.exp())
    return (BCE + beta*KLD)
# #Example of iterating
# GRU_dataset=ModbusFlowStream(
#     shuffle=False,chunk_size=1,batch_size=64,csv_files=modbus.dataset["benign_dataset_dir"][0:2],scalers=loaded_scalers['network-wide']['min_max_scalers'],window_size=30
# )

# GRU_dataloader=DataLoader(GRU_dataset,batch_size=1,shuffle=False)


In [13]:
lr = 0.0001
wd= 1e-4

VAE_model = VAE().to(device=device)
VAE_optimizer = optim.Adam(VAE_model.parameters(), lr=lr, weight_decay=wd)

AE_dataset=ModbusFlowStream( 
    shuffle=True,chunk_size=1,batch_size=64,csv_files=modbus.dataset["benign_dataset_dir"][0:2],scalers=loaded_scalers['network-wide']['min_max_scalers'],window_size=1
)
AE_dataloader=DataLoader(AE_dataset,batch_size=1,shuffle=False)

for epoch in range(3):
    time_1 = time.time()
    train_loss = 0
    AE_model.train()
    if shuffle_files:
        sys_rand = SystemRandom()
        sys_rand.shuffle(AE_dataset.file_order_indices)
    for sequences, _ in AE_dataloader:
        sequences = sequences.squeeze().to(device)
        VAE_optimizer.zero_grad()
        recon, mu, logvar = VAE_model(sequences)
        loss = vae_loss_function(recon, sequences, mu, logvar)
        loss.backward()
        VAE_optimizer.step()
        train_loss += loss.item()
    print("time",time.time()-time_1,f"Epoch {epoch}, Train Loss: {train_loss / len(AE_dataloader)}")


time 20.744476318359375 Epoch 0, Train Loss: 143.65623077564993
time 19.662742614746094 Epoch 1, Train Loss: 110.98179194946673
time 19.732792854309082 Epoch 2, Train Loss: 94.82658667383795


In [None]:

# Adversarial AutoEncoder (AAE)
class AAE(nn.Module):
    """
    Encoder:(89-16-4-2)
    Decoder: (2-4-16-89)
    Discriminator: (16-4-2)
    Activation: LeakyReLU
    """
    def __init__(self):
        super(AAE, self).__init__()
        self.encoder = nn.Sequential(
            nn.Linear(89, 16),
            nn.LeakyReLU(),
            nn.Linear(16, 4),
            nn.LeakyReLU(),
            nn.Linear(4, 2),
            nn.LeakyReLU()
        )
        self.decoder = nn.Sequential(
            nn.Linear(2, 4),
            nn.LeakyReLU(),
            nn.Linear(4, 16),
            nn.LeakyReLU(),
            nn.Linear(16, 89),
            nn.LeakyReLU()
        )
        self.discriminator = nn.Sequential(
            nn.Linear(2, 16),
            nn.LeakyReLU(),
            nn.Linear(16, 4),
            nn.LeakyReLU(),
            nn.Linear(4, 2), # Output for binary classification (real/fake)
            nn.Sigmoid()
        )

    def forward(self, x):
        z = self.encoder(x)
        x_recon = self.decoder(z)
        return x_recon, z

    def discriminate(self, z):
        return self.discriminator(z)