In [1]:
import torch
import torch.nn as nn

class CANnoloAutoencoder(nn.Module):
    def __init__(self, embedding_dim, lstm_units, dense_units, dropout_rate, num_embeddings):
        super(CANnoloAutoencoder, self).__init__()

        # Encoder
        self.embedding = nn.Embedding(num_embeddings, embedding_dim)
        self.encoder_dense = nn.Linear(embedding_dim+45, dense_units)
        self.encoder_dropout = nn.Dropout(dropout_rate)
        self.encoder_lstm = nn.LSTM(input_size=dense_units, hidden_size=lstm_units, num_layers=2, batch_first=True)

        # Decoder
        self.decoder_lstm = nn.LSTM(input_size=lstm_units, hidden_size=lstm_units, num_layers=2, batch_first=True)
        self.decoder_dense = nn.Linear(lstm_units, 45)
        self.decoder_output = nn.Sigmoid()  # To reconstruct the original packets

    def forward(self, can_ids, features):
        # Encoding
        embedded_ids = self.embedding(can_ids)
        # You might need to concatenate the embedded IDs with other features
        x = torch.cat([embedded_ids, features], dim=1)
        x = torch.tanh(self.encoder_dense(x))
        x = self.encoder_dropout(x)
        x, _ = self.encoder_lstm(x)

        # Decoding
        x, _ = self.decoder_lstm(x)
        x = self.decoder_dense(x)
        reconstructed = self.decoder_output(x)

        return reconstructed

        # return reconstructed


In [2]:
from CanDataLoader import CanDataLoader
from dotenv import load_dotenv
import os

load_dotenv()
data_path = os.getenv('DATA_PATH')
dataset = CanDataLoader(data_path, log_verbosity=1)


Found ambient and attack directories.
Loading CAN metadata...
Parquet files found...
Found processed parquet files...
Loading processed parquet files...
Loading processing data into 'CanData' structure


In [23]:
config = {
    "batch_size": 32,
    "delta_time_last_msg": {
        "specific_to_can_id": False,
        "records_back": 30
    },
    "delta_time_last_same_aid": {
        "specific_to_can_id": True,
        "records_back": 15
    },
}

ambient_loader, validation_loader, attack_loader = dataset.prepare_data(config)

In [24]:
num_can_ids = len(dataset.get_unique_can_ids())
window_size = ambient_loader.features_len

In [25]:
print(num_can_ids)

105


In [26]:
# Hyperparameters
embedding_dim = num_can_ids  # Example value
lstm_units = 128
dense_units = 256
dropout_rate = 0.2
num_embeddings = max(dataset.get_unique_can_ids()) + 1 

# Model
model = CANnoloAutoencoder(embedding_dim, lstm_units, dense_units, dropout_rate, num_embeddings)

# Training parameters
batch_size = 32
optimizer = torch.optim.Adam(model.parameters())
loss_fn = nn.BCELoss()  # Binary Cross-Entropy Loss


In [14]:
for i in range(100):
    ambient_loader.__getitem__(0)

In [19]:
test_batch_can_ids, test_batch_features = next(iter(ambient_loader))



In [20]:
print(test_batch_can_ids[0])
print(test_batch_features[0])

tensor(263)
tensor([9.5367e-07, 1.1921e-06, 1.0140e-03, 9.5367e-07, 9.5367e-07, 9.5367e-07,
        9.9611e-04, 9.5367e-07, 9.5367e-07, 9.5367e-07, 1.0190e-03, 1.0362e-03,
        1.9073e-06, 9.9897e-04, 1.2989e-03, 1.0111e-03, 1.2510e-03, 9.9897e-04,
        9.5367e-07, 9.5367e-07, 1.1921e-06, 1.0159e-03, 2.2759e-03, 1.0021e-03,
        9.5367e-07, 9.5367e-07, 1.1921e-06, 1.0359e-03, 1.5974e-05, 9.5367e-07,
        2.0255e-02, 1.9710e-02, 1.9943e-02, 2.0122e-02, 1.9951e-02, 2.0268e-02,
        1.9752e-02, 1.9815e-02, 2.0156e-02, 2.0015e-02, 2.0244e-02, 1.9769e-02,
        1.9776e-02, 2.0198e-02, 2.0007e-02])


In [8]:
# Running a forward pass with a batch of data
test_batch_can_ids, test_batch_features = next(iter(ambient_loader))
reconstructed_output = model(test_batch_can_ids, test_batch_features)

mse_loss = torch.nn.MSELoss()
error = mse_loss(reconstructed_output, test_batch_features)
print("Reconstruction Error:", error.item())

Reconstruction Error: 0.2311510145664215


In [32]:
def validate_model(model, validation_loader, loss_fn):
    model.eval()  # Set the model to evaluation mode
    total_loss = 0
    with torch.no_grad():  # No need to track gradients during validation
        # for can_ids, features in validation_loader:
        #     output = model(can_ids, features)
        #     loss = loss_fn(output, features)  # Adjust depending on your exact setup
        #     total_loss += loss.item()

        for i, (can_ids, features) in enumerate(validation_loader):
            if i % 1000 == 0:
                break
            print(f"{i}\r")
            # Forward pass: compute the model output
            reconstructed = model(can_ids, features)
            # Compute the loss
            loss = loss_fn(reconstructed, features)  # Ensure correct target is used
            total_train_loss += loss.item()

    avg_loss = total_loss / len(validation_loader)
    return avg_loss

In [None]:
loss_fn = torch.nn.MSELoss()  # Example loss function
optimizer = torch.optim.Adam(model.parameters(), lr=0.001)  # Example optimizer

In [36]:
num_epochs = 2
train_loader = ambient_loader
validation_interval = 1000 


model.train()  # Set the model to training mode
total_train_loss = 0
pseudo_epoch_size = 5000
pseudo_epoch = 1

for i, (can_ids, features) in enumerate(train_loader):
    print(f"{i}", end="\r")

    # Forward pass: compute the model output
    reconstructed = model(can_ids, features)

    # Compute the loss
    loss = loss_fn(reconstructed, features)  # Ensure correct target is used
    total_train_loss += loss.item()

    # Backward pass and optimization
    optimizer.zero_grad()  # Clear existing gradients
    loss.backward()  # Compute gradients
    optimizer.step()  # Update weights

    if i % pseudo_epoch_size == 0:
        avg_train_loss = total_train_loss / len(train_loader)
        print(f"Epoch {pseudo_epoch}, Average Training Loss: {avg_train_loss}")
        pseudo_epoch += 1
        if pseudo_epoch > num_epochs:
            break

    if i % validation_interval == 0:
        # Validate the model at specified intervals
        validation_loss = validate_model(model, validation_loader, loss_fn)
        print(f"Psuedo Epoch {pseudo_epoch}, Validation Loss: {validation_loss}")
        


Epoch 1, Average Training Loss: 1.1226476892860105e-07
Psuedo Epoch 2, Validation Loss: 0.0
Psuedo Epoch 2, Validation Loss: 0.0
Psuedo Epoch 2, Validation Loss: 0.0
2440