#### Load Data

In [5]:
from CanDataset import CanDataset
from dotenv import load_dotenv
import torch
import torch.nn as nn
import os

load_dotenv()
data_path = os.getenv('DATA_PATH')
dataset = CanDataset(data_path, log_verbosity=4)

ModuleNotFoundError: No module named 'CanDataset'

#### ML model

In [None]:
class CANnoloAutoencoder(nn.Module):
    def __init__(self, embedding_dim, lstm_units, dense_units, dropout_rate, num_embeddings, feature_vec_length):
        super(CANnoloAutoencoder, self).__init__()

        # Encoder
        self.embedding = nn.Embedding(num_embeddings, embedding_dim)
        self.encoder_dense = nn.Linear(embedding_dim + feature_vec_length, dense_units)
        self.encoder_dropout = nn.Dropout(dropout_rate)
        self.encoder_lstm = nn.LSTM(input_size=dense_units, hidden_size=lstm_units, num_layers=2, batch_first=True)

        # Decoder
        self.decoder_lstm = nn.LSTM(input_size=lstm_units, hidden_size=lstm_units, num_layers=2, batch_first=True)
        self.decoder_dense = nn.Linear(lstm_units, feature_vec_length) 
        self.decoder_output = nn.Sigmoid()  # To reconstruct the original packets

    def forward(self, can_ids, features):
        # Encoding
        embedded_ids = self.embedding(can_ids)
        # You might need to concatenate the embedded IDs with other features
        x = torch.cat([embedded_ids, features], dim=1)
        x = torch.tanh(self.encoder_dense(x))
        x = self.encoder_dropout(x)
        x, _ = self.encoder_lstm(x)

        # Decoding
        x, _ = self.decoder_lstm(x)
        x = self.decoder_dense(x)
        reconstructed = self.decoder_output(x)

        return reconstructed



In [None]:
# CANID   -
# f1      -        -  reconstructed_f1
# f2      -   -    -  reconstructed_f2
# f3      -        -  reconstructed_f3

In [None]:
dataset.attack_data.max_engine_coolant_temp_attack

Unnamed: 0,time,aid,data,filename,delta_time_last_msg,delta_time_last_same_aid,actual_attack
0,0.000000e+00,354,00080003EA11F4CE,max_engine_coolant_temp_attack.log,0.000000e+00,,False
1,9.536743e-07,167,2010FA24D125C0A0,max_engine_coolant_temp_attack.log,9.536743e-07,,False
2,9.961128e-04,813,00000424CC010000,max_engine_coolant_temp_attack.log,9.951591e-04,,False
3,2.017021e-03,1398,7FF800003FFC1FC0,max_engine_coolant_temp_attack.log,1.020908e-03,,False
4,5.083084e-03,403,00080803E6E80000,max_engine_coolant_temp_attack.log,3.066063e-03,,False
...,...,...,...,...,...,...,...
58013,2.587254e+01,167,0010FC64C12820A0,max_engine_coolant_temp_attack.log,1.192093e-06,0.011015,False
58014,2.587353e+01,722,0000480000000000,max_engine_coolant_temp_attack.log,9.939671e-04,0.100365,False
58015,2.587353e+01,996,0134000005000000,max_engine_coolant_temp_attack.log,2.861023e-06,0.100365,False
58016,2.587354e+01,1628,4011D26750000000,max_engine_coolant_temp_attack.log,1.192093e-06,0.099327,False


#### Define config
This is what we feed to the CanDataset object to create a dataloader.

In [None]:
config = {
    "batch_size": 32,
    "delta_time_last_msg": {
        "specific_to_can_id": False,
        "records_back": 30
    },
    "delta_time_last_same_aid": {
        "specific_to_can_id": True,
        "records_back": 15
    },
}


use `get_dataloaders` on CanDataset object to get the data loaders

In [None]:
ambient_loader, validation_loader, attack_loader = dataset.get_dataloaders(config)

In [None]:
unique_can_ids = dataset.get_unique_can_ids()
num_can_ids = len(unique_can_ids)
feature_vec_length = ambient_loader.features_len - 1 
print(f"Number of CAN IDs: {num_can_ids}")
print(f"Feature vector length: {feature_vec_length}") 

Number of CAN IDs: 105
Feature vector length: 25


In [None]:
# Hyperparameters
embedding_dim = num_can_ids  # embedding dimension should be equal to the number of CAN IDs
lstm_units = 128 # defined in canolo paper
dense_units = 256 # defined in canolo paper
dropout_rate = 0.2 # defined in canolo paper
num_embeddings = max(unique_can_ids) + 1 # not sure why + 1 rn but it works

# Model
model = CANnoloAutoencoder(embedding_dim, lstm_units, dense_units, dropout_rate, num_embeddings, feature_vec_length)

# Training parameters
batch_size = ambient_loader.batch_size
optimizer = torch.optim.Adam(model.parameters())
loss_fn = nn.BCELoss()  # Binary Cross-Entropy Loss


#### Defining our loss function and optimizer

In [None]:
loss_fn = torch.nn.MSELoss()  # Example loss function
optimizer = torch.optim.Adam(model.parameters(), lr=0.001)  # Example optimizer

In [None]:
from TorchLoader import CANDataLoader
from copy import deepcopy

config_copy = deepcopy(config)
batch_size = config_copy.pop("batch_size", None) # ensure batch_size is in config
if not batch_size:
    raise Exception("Config needs `batch_size`")
    

attack_loaders = []
for df in attack_loader.can_data:
    attack_loaders.append(CANDataLoader([df], config_copy, batch_size))



In [None]:
# for i, loader in enumerate(attack_loaders):
#     print(f"Attack Loader {i}")
#     print(f"\tNumber of batches: {len(loader)}")
#     try:

#         attacks = loader.can_data[0].actual_attack.sum()
#         print(f"\tNumber of attacks: {attacks}")
#     except:
#         attack_loaders.pop(i)
#         print("\tNo attack data")

In [3]:
from tqdm import tqdm
import numpy as np

class CANnoloAttackDetector:
    def __init__(self, model_path, threshold, config):
        # Hyperparameters
        embedding_dim = num_can_ids  # embedding dimension should be equal to the number of CAN IDs
        lstm_units = 128 # defined in canolo paper
        dense_units = 256 # defined in canolo paper
        dropout_rate = 0.2 # defined in canolo paper
        num_embeddings = 1789  # hardcoded

        feature_vec_length = config["delta_time_last_msg"]["records_back"] + config["delta_time_last_same_aid"]["records_back"] 

        # Model
        self.model = CANnoloAutoencoder(embedding_dim, lstm_units, dense_units, dropout_rate, num_embeddings, feature_vec_length)

        state_dict = torch.load(model_path)
        self.model.load_state_dict(state_dict)
        
        self.threshold = threshold

    def detect_attacks(self, data_loader):
        self.model.eval()  # Ensure the model is in evaluation mode
        results = []
        
        with torch.no_grad():
            for batch in tqdm(data_loader):
                can_ids, features, actual_attacks = batch
                
                reconstructed = self.model(can_ids, features)

                # Compute anomaly scores and predict attacks
                scores = self.compute_anomaly_scores(features, reconstructed)
                predicted_attacks = (scores > self.threshold).int()  # Convert to 0 or 1

                # Store predictions and actual labels
                results.extend(zip(predicted_attacks.tolist(), actual_attacks.tolist()))
        
        data_loader.reset()

        return results

    def compute_anomaly_scores(self, original, reconstructed):
        # Compute anomaly scores (e.g., mean squared error) for each instance in the batch
        loss_fn = torch.nn.MSELoss(reduction='none')
        scores = loss_fn(original, reconstructed)
        return scores.mean(dim=1)  # Mean score across features for each instance
    

    def determine_threshold(self, normal_data_loader, percentile):
        self.model.eval() 
        all_scores = []

        with torch.no_grad():
            for batch in tqdm(normal_data_loader):
                can_ids, features, _ = batch  # Assuming normal data does not have actual attacks
                reconstructed = self.model(can_ids, features)
                scores = self.compute_anomaly_scores(features, reconstructed)
                all_scores.extend(scores.tolist())

        # Consider using a high percentile as the threshold
        threshold = np.percentile(all_scores, percentile)  # for example, 95th percentile
        normal_data_loader.reset()
        return threshold


def calculate_metrics(results):
    # Initializing the confusion matrix values
    TP, TN, FP, FN = 0, 0, 0, 0

    for pred, actual in results:
        if pred == actual == 1:
            TP += 1
        elif pred == actual == 0:
            TN += 1
        elif pred == 1 and actual == 0:
            FP += 1
        elif pred == 0 and actual == 1:
            FN += 1

    # Calculating accuracy
    total_predictions = len(results)
    accuracy = (TP + TN) / total_predictions if total_predictions > 0 else 0

    # Creating the confusion matrix
    confusion_matrix = [[TP, FP],
                        [FN, TN]]
    
    # Calculating various metrics
    accuracy = (TP + TN) / (TP + TN + FP + FN)
    precision = TP / (TP + FP) if (TP + FP) > 0 else 0  # Handling division by zero
    recall = TP / (TP + FN) if (TP + FN) > 0 else 0  # Handling division by zero
    f1_score = 2 * (precision * recall) / (precision + recall) if (precision + recall) > 0 else 0  # Handling division by zero


    return accuracy, precision, recall, f1_score, confusion_matrix


# attack_results = detector.detect_attacks(attack_loaders[1])
# for score, is_attack in attack_results:
#     print(f"Anomaly Score: {score}, Attack: {'Yes' if is_attack else 'No'}")


In [4]:
new_model_filename = "/Users/jamescourson/Documents/CANIDS/code/data_prep/saved_model/canolo_model_112.pt"
detector = CANnoloAttackDetector(new_model_filename, threshold=0.08, config=config)

NameError: name 'config' is not defined

In [17]:
# print name of each attack loader
for i, loader in enumerate(attack_loaders):
    print(f"Attack Loader {attack_loader.can_data[i].filename[0]}")


Attack Loader accelerator_attack_drive_1.log
Attack Loader accelerator_attack_drive_2.log
Attack Loader accelerator_attack_reverse_1.log
Attack Loader accelerator_attack_reverse_2.log
Attack Loader correlated_signal_attack_1.log
Attack Loader correlated_signal_attack_2.log
Attack Loader correlated_signal_attack_3.log
Attack Loader fuzzing_attack_1.log
Attack Loader fuzzing_attack_2.log
Attack Loader fuzzing_attack_3.log
Attack Loader max_engine_coolant_temp_attack.log
Attack Loader max_speedometer_attack_1.log
Attack Loader max_speedometer_attack_2.log
Attack Loader max_speedometer_attack_3.log
Attack Loader reverse_light_off_attack_1.log
Attack Loader reverse_light_off_attack_2.log
Attack Loader reverse_light_off_attack_3.log
Attack Loader reverse_light_on_attack_1.log
Attack Loader reverse_light_on_attack_2.log
Attack Loader reverse_light_on_attack_3.log


In [18]:
for i, loader in enumerate(attack_loaders):
    try:
        loader.can_data[0].actual_attack.sum()
        print(loader.can_data[0].head())
        print(f"Attack Loader {i}: {attack_loader.can_data[i].filename[0]}")
    except:
        pass

       time   aid              data                        filename  \
0  0.000000  1505  893FC00B0A013880  correlated_signal_attack_1.log   
1  0.000001   651  0000000000000000  correlated_signal_attack_1.log   
2  0.000003   167  0010FA24D12E00A0  correlated_signal_attack_1.log   
3  0.000004   208  4A7704600201F000  correlated_signal_attack_1.log   
4  0.000997    51  000698000E4207D0  correlated_signal_attack_1.log   

   delta_time_last_msg  delta_time_last_same_aid  actual_attack  
0         0.000000e+00                       NaN          False  
1         1.072884e-06                       NaN          False  
2         2.026558e-06                       NaN          False  
3         9.536743e-07                       NaN          False  
4         9.930134e-04                       NaN          False  
Attack Loader 4: correlated_signal_attack_1.log
           time   aid              data                        filename  \
0  0.000000e+00  1505  891FA0070A00CC80  correlated_si

In [19]:
# while True:
#     out = attack_loaders[5].__getitem__(0)
#     if out[2].sum() > 0:
#         input()

# out = attack_loaders[5].__getitem__(0)
# print(out)

# attack_loaders[5].reset()

In [203]:
# ambient_loader_test = CANDataLoader([ambient_loader.can_data[3]], config_copy, batch_size)


# detector = CANnoloAttackDetector("/Users/jamescourson/Documents/CANIDS/code/data_prep/saved_model/canolo_model_112.pt", threshold=.16)
# results = detector.detect_attacks(ambient_loader_test)
# accuracy = calculate_accuracy(results)
# print(f"Accuracy: {accuracy}")

100%|█████████▉| 3340/3341 [00:35<00:00, 93.07it/s]


Accuracy: 0.9876122754491018


In [62]:
ambient_loader_test = CANDataLoader([ambient_loader.can_data[4][:80000]], config_copy, batch_size)


In [63]:

detector = CANnoloAttackDetector(new_model_filename, threshold=.16, config=config)
results = detector.determine_threshold(ambient_loader_test)
print(results)

100%|█████████▉| 2497/2500 [00:30<00:00, 81.49it/s] 

6.286359654041014e-05





In [235]:
.0000032280813684337766

3.228081368433776e-06

In [44]:
# for threshold in [0.02, 0.05, 0.08, 0.1, 0.12, 0.14, 0.16]:
for threshold in [1.21549102921e-05,]:
    detector = CANnoloAttackDetector(new_model_filename, threshold=threshold, config=config)
    results = detector.detect_attacks(attack_loaders[5])
    accuracy, precision, recall, f1_score, confusion_matrix = calculate_metrics(results)
    print(f"Threshold: {threshold}, Accuracy: {accuracy}, Precision: {precision}, Recall: {recall}, F1 Score: {f1_score}, Confusion Matrix: {confusion_matrix}")

100%|██████████| 2043/2043 [00:18<00:00, 108.06it/s]

Threshold: 1.21549102921e-05, Accuracy: 0.8862579539892316, Precision: 0.2234753550543024, Recall: 1.0, F1 Score: 0.3653123933082963, Confusion Matrix: [[2140, 7436], [0, 55800]]





In [None]:
# small config

# 1.21549102921e-05, best so far

# 100%|██████████| 2043/2043 [00:18<00:00, 108.09it/s]
# Threshold: 0.0716797549102921, Accuracy: 0.9658284385707293, Precision: 0.0, Recall: 0.0, F1 Score: 0, Confusion Matrix: [[0, 94], [2140, 63142]]
# 100%|██████████| 2043/2043 [00:18<00:00, 107.65it/s]
# Threshold: 0.00716797549102921, Accuracy: 0.9651095203132648, Precision: 0.0, Recall: 0.0, F1 Score: 0, Confusion Matrix: [[0, 141], [2140, 63095]]
# 100%|██████████| 2043/2043 [00:19<00:00, 104.89it/s]
# Threshold: 0.000716797549102921, Accuracy: 0.9601076847772883, Precision: 0.0, Recall: 0.0, F1 Score: 0, Confusion Matrix: [[0, 468], [2140, 62768]]
# 100%|██████████| 2043/2043 [00:19<00:00, 104.07it/s]
# Threshold: 7.16797549102921e-05, Accuracy: 0.9448268477728831, Precision: 0.0006807351940095302, Recall: 0.00046728971962616824, F1 Score: 0.0005541701302299807, Confusion Matrix: [[1, 1468], [2139, 61768]]
# 100%|██████████| 2043/2043 [00:18<00:00, 108.79it/s]
# Threshold: 7.16797549102921e-06, Accuracy: 0.8650881057268722, Precision: 0.19525547445255476, Recall: 1.0, F1 Score: 0.32671755725190843, Confusion Matrix: [[2140, 8820], [0, 54416]]

In [None]:
# new config

# 99%|█████████▉| 2019/2043 [00:19<00:00, 103.71it/s]
# Threshold: 0.00133, Accuracy: 0.9130448241703814, Precision: 0.27180899908172634, Recall: 0.9686769518466574, F1 Score: 0.42450317557877487, Confusion Matrix: [[2072, 5551], [67, 56918]]

In [None]:


# for 5 

# 100%|█████████▉| 2042/2043 [00:36<00:00, 55.75it/s]
# Threshold: 9.8e-06, Accuracy: 0.943560235063663, Precision: 0.3669929589558647, Recall: 0.9990649836372136, F1 Score: 0.536799799045466, Confusion Matrix: [[2137, 3686], [2, 59519]]
# 100%|█████████▉| 2042/2043 [00:38<00:00, 53.15it/s]
# Threshold: 1e-05, Accuracy: 0.9434837169441724, Precision: 0.3664030261348005, Recall: 0.9962599345488546, F1 Score: 0.5357636706473916, Confusion Matrix: [[2131, 3685], [8, 59520]]
# 100%|█████████▉| 2042/2043 [00:39<00:00, 52.00it/s]
# Threshold: 1.2e-05, Accuracy: 0.9522220861900098, Precision: 0.09877551020408164, Recall: 0.0565684899485741, F1 Score: 0.07193816884661118, Confusion Matrix: [[121, 1104], [2018, 62101]]

# 100%|█████████▉| 2042/2043 [00:19<00:00, 105.41it/s]
# Threshold: 9.3e-06, Accuracy: 0.9435449314397649, Precision: 0.3670212765957447, Recall: 1.0, F1 Score: 0.5369649805447471, Confusion Matrix: [[2139, 3689], [0, 59516]]
# 100%|█████████▉| 2042/2043 [00:18<00:00, 108.38it/s]
# Threshold: 9.4e-06, Accuracy: 0.9435449314397649, Precision: 0.3670212765957447, Recall: 1.0, F1 Score: 0.5369649805447471, Confusion Matrix: [[2139, 3689], [0, 59516]]
# 100%|█████████▉| 2042/2043 [00:18<00:00, 111.21it/s]
# Threshold: 9.5e-06, Accuracy: 0.9435908423114594, Precision: 0.36721030042918457, Recall: 1.0, F1 Score: 0.5371672526368658, Confusion Matrix: [[2139, 3686], [0, 59519]]
# 100%|█████████▉| 2042/2043 [00:19<00:00, 102.42it/s]
# Threshold: 9.7e-06, Accuracy: 0.9435908423114594, Precision: 0.36721030042918457, Recall: 1.0, F1 Score: 0.5371672526368658, Confusion Matrix: [[2139, 3686], [0, 59519]]

# 100%|█████████▉| 2042/2043 [00:18<00:00, 108.20it/s]
# Threshold: 8.5e-06, Accuracy: 0.943223555337904, Precision: 0.3657035390665071, Recall: 1.0, F1 Score: 0.5355533299949925, Confusion Matrix: [[2139, 3710], [0, 59495]]
# 100%|█████████▉| 2042/2043 [00:18<00:00, 109.22it/s]
# Threshold: 8.8e-06, Accuracy: 0.9434071988246817, Precision: 0.3664553709097139, Recall: 1.0, F1 Score: 0.5363590772316952, Confusion Matrix: [[2139, 3698], [0, 59507]]
# 100%|█████████▉| 2042/2043 [00:18<00:00, 108.90it/s]
# Threshold: 9.1e-06, Accuracy: 0.9435143241919687, Precision: 0.3668953687821612, Recall: 1.0, F1 Score: 0.5368302170912285, Confusion Matrix: [[2139, 3691], [0, 59514]]
# 100%|█████████▉| 2042/2043 [00:18<00:00, 109.57it/s]
# Threshold: 9.3e-06, Accuracy: 0.9435449314397649, Precision: 0.3670212765957447, Recall: 1.0, F1 Score: 0.5369649805447471, Confusion Matrix: [[2139, 3689], [0, 59516]]
# 100%|█████████▉| 2042/2043 [00:18<00:00, 110.75it/s]
# Threshold: 9.8e-06, Accuracy: 0.943560235063663, Precision: 0.3669929589558647, Recall: 0.9990649836372136, F1 Score: 0.536799799045466, Confusion Matrix: [[2137, 3686], [2, 59519]]

# 100%|█████████▉| 2042/2043 [00:18<00:00, 109.65it/s]
# Threshold: 2e-06, Accuracy: 0.7745470127326151, Precision: 0.12678560844051923, Recall: 1.0, F1 Score: 0.22503945291951602, Confusion Matrix: [[2139, 14732], [0, 48473]]
# 100%|█████████▉| 2042/2043 [00:18<00:00, 111.40it/s]
# Threshold: 4e-06, Accuracy: 0.9219515181194907, Precision: 0.2954828014919188, Recall: 1.0, F1 Score: 0.4561740243122201, Confusion Matrix: [[2139, 5100], [0, 58105]]
# 100%|█████████▉| 2042/2043 [00:18<00:00, 109.56it/s]
# Threshold: 6e-06, Accuracy: 0.9294962047012733, Precision: 0.31707678624369995, Recall: 1.0, F1 Score: 0.4814856499718626, Confusion Matrix: [[2139, 4607], [0, 58598]]
# 100%|█████████▉| 2042/2043 [00:18<00:00, 109.21it/s]
# Threshold: 8e-06, Accuracy: 0.9425042850146915, Precision: 0.36278833107191316, Recall: 1.0, F1 Score: 0.5324206596141879, Confusion Matrix: [[2139, 3757], [0, 59448]]
# 100%|█████████▉| 2042/2043 [00:18<00:00, 110.16it/s]
# Threshold: 1e-06, Accuracy: 0.7203568805093046, Precision: 0.10479129923574368, Recall: 1.0, F1 Score: 0.18970333909804443, Confusion Matrix: [[2139, 18273], [0, 44932]]
# 100%|█████████▉| 2042/2043 [00:18<00:00, 110.10it/s]
# Threshold: 1.2e-06, Accuracy: 0.7422257590597453, Precision: 0.11267976610651635, Recall: 1.0, F1 Score: 0.2025376384812044, Confusion Matrix: [[2139, 16844], [0, 46361]]
# 100%|█████████▉| 2042/2043 [00:18<00:00, 110.07it/s]
# Threshold: 1.4e-06, Accuracy: 0.7495255876591577, Precision: 0.11558413487517562, Recall: 1.0, F1 Score: 0.20721724388471785, Confusion Matrix: [[2139, 16367], [0, 46838]]
# 100%|█████████▉| 2042/2043 [00:18<00:00, 110.22it/s]
# Threshold: 1.6e-06, Accuracy: 0.7543921400587659, Precision: 0.11760501429513966, Recall: 1.0, F1 Score: 0.21045899542480445, Confusion Matrix: [[2139, 16049], [0, 47156]]
# 100%|█████████▉| 2042/2043 [00:18<00:00, 109.12it/s]
# Threshold: 1.8e-06, Accuracy: 0.759794319294809, Precision: 0.119932716568545, Recall: 1.0, F1 Score: 0.21417843196155, Confusion Matrix: [[2139, 15696], [0, 47509]]

# 100%|█████████▉| 2042/2043 [00:18<00:00, 110.26it/s]
# Threshold: 0.001, Accuracy: 0.9611440989226249
# 100%|█████████▉| 2042/2043 [00:18<00:00, 109.64it/s]
# Threshold: 0.008, Accuracy: 0.9631947845249755
# 100%|█████████▉| 2042/2043 [00:18<00:00, 109.20it/s]
# Threshold: 0.01, Accuracy: 0.9631947845249755
# 100%|█████████▉| 2042/2043 [00:19<00:00, 106.59it/s]
# Threshold: 0.12, Accuracy: 0.9672655484818805
# 100%|█████████▉| 2042/2043 [00:18<00:00, 110.60it/s]
# Threshold: 0.14, Accuracy: 0.9672655484818805
# 100%|█████████▉| 2042/2043 [00:18<00:00, 109.82it/s]
# Threshold: 0.16, Accuracy: 0.9672655484818805
# 100%|█████████▉| 2042/2043 [00:18<00:00, 111.24it/s]
# Threshold: 0.5, Accuracy: 0.9672655484818805
# 100%|█████████▉| 2042/2043 [00:18<00:00, 110.56it/s]
# Threshold: 1, Accuracy: 0.9672655484818805
# 100%|█████████▉| 2042/2043 [00:18<00:00, 109.57it/s]
# Threshold: 2, Accuracy: 0.9672655484818805
# 100%|█████████▉| 2042/2043 [00:18<00:00, 110.48it/s]
# Threshold: 6, Accuracy: 0.9672655484818805
# 100%|█████████▉| 2042/2043 [00:18<00:00, 110.01it/s]
# Threshold: 10, Accuracy: 0.9672655484818805
# 100%|█████████▉| 2042/2043 [00:18<00:00, 110.44it/s]
# Threshold: 100, Accuracy: 0.9672655484818805

In [22]:
results

[]

In [19]:
print(detector.detect_attacks(attack_loaders[1]))

  0%|          | 0/2043 [00:13<?, ?it/s]

0



