In [8]:
from keras.models import Model
from keras.layers import Input, Dense, LSTM, Dropout, RepeatVector, TimeDistributed, Embedding, Reshape, Concatenate, Flatten, concatenate


def create_cannolo_autoencoder(input_features, window_size=40, num_can_ids=100, embedding_size=10):
    # Input layers
    input_layer = Input(shape=(window_size,))  # Adjust the input shape to be 1D for the embedding layer
    can_id_input = Input(shape=(1,), dtype='int32')  # Assuming CAN ID is a single integer

    # Embedding for CAN ID
    can_id_embedding = Embedding(input_dim=num_can_ids, output_dim=embedding_size, input_length=1)(can_id_input)
    can_id_embedding = Reshape(target_shape=(embedding_size,))(can_id_embedding)

    # Combine CAN ID embedding with other features
    combined_input = Concatenate(axis=-1)([input_layer, can_id_embedding])

    # Encoder
    encoder = Dense(256, activation='tanh')(input_layer)
    encoder = Dropout(0.2)(encoder)
    encoder = LSTM(128, return_sequences=True)(encoder)
    encoder = LSTM(128, return_sequences=False)(encoder)

    # Repeat the encoding
    repeater = RepeatVector(window_size)(encoder)

    # Decoder
    decoder = LSTM(128, return_sequences=True)(repeater)
    decoder = LSTM(128, return_sequences=True)(decoder)
    output_layer = TimeDistributed(Dense(input_features, activation='sigmoid'))(decoder)

    # This creates a model that includes the Input layer and three Dense layers
    autoencoder = Model(inputs=input_layer, outputs=output_layer)

    # Compile the model
    autoencoder.compile(optimizer='adam', loss='binary_crossentropy')

    return autoencoder


In [9]:
def create_cannolo_dense_autoencoder(num_other_features, encoding_dim=128, embedding_size=10):
    # # Input layer for CAN IDs
    # can_id_input = Input(shape=(1,), dtype='int32', name='can_id_input')
    # # Embedding for CAN IDs
    # can_id_embedding = Embedding(input_dim=num_can_ids, output_dim=embedding_size, input_length=1, name='can_id_embedding')(can_id_input)
    # can_id_embedding = Flatten()(can_id_embedding)

    # # Input layer for other features
    other_features_input = Input(shape=(num_other_features,), name='other_features_input')

    # # Combine CAN ID embedding with other features
    # combined = concatenate([can_id_embedding, other_features_input])

    # Encoding layers
    encoded = Dense(encoding_dim, activation='tanh')(other_features_input)
    encoded = Dropout(0.2)(encoded)
    encoded = Dense(encoding_dim//2, activation='tanh')(encoded)
    
    # Decoding layers
    decoded = Dense(encoding_dim//2, activation='tanh')(encoded)
    decoded = Dense(encoding_dim, activation='tanh')(decoded)
    
    # Output layer for reconstruction
    output_layer = Dense(num_other_features, activation='sigmoid', name='output_layer')(decoded)

    # This creates a model that includes the input layers and the dense layers
    autoencoder = Model(inputs=other_features_input, outputs=output_layer)

    # Compile the model
    autoencoder.compile(optimizer='adam', loss='mean_squared_error')

    return autoencoder

In [10]:
from CanDataLoader import CanDataLoader
from dotenv import load_dotenv
import os

load_dotenv()
data_path = os.getenv('DATA_PATH')
dataset = CanDataLoader(data_path, log_verbosity=1)


Found ambient and attack directories.
Loading CAN metadata...
Parquet files found...
Found processed parquet files...
Loading processed parquet files...
Loading processing data into 'CanData' structure


In [11]:
config = {
    "batch_size": 1,
    "delta_time_last_msg": {
        "specific_to_can_id": False,
        "records_back": 30
    },
    "delta_time_last_same_aid": {
        "specific_to_can_id": True,
        "records_back": 15
    },
}

ambient_loader, attack_loader = dataset.prepare_data(config)

In [5]:
num_can_ids = len(dataset.get_unique_can_ids())
window_size = ambient_loader.features_len

In [6]:
autoencoder = create_cannolo_dense_autoencoder(num_can_ids,window_size-1)

2023-11-10 00:53:48.174163: I metal_plugin/src/device/metal_device.cc:1154] Metal device set to: Apple M2 Pro
2023-11-10 00:53:48.174184: I metal_plugin/src/device/metal_device.cc:296] systemMemory: 32.00 GB
2023-11-10 00:53:48.174193: I metal_plugin/src/device/metal_device.cc:313] maxCacheSize: 10.67 GB
2023-11-10 00:53:48.174228: I tensorflow/core/common_runtime/pluggable_device/pluggable_device_factory.cc:306] Could not identify NUMA node of platform GPU ID 0, defaulting to 0. Your kernel may not have been built with NUMA support.
2023-11-10 00:53:48.174245: I tensorflow/core/common_runtime/pluggable_device/pluggable_device_factory.cc:272] Created TensorFlow device (/job:localhost/replica:0/task:0/device:GPU:0 with 0 MB memory) -> physical PluggableDevice (device: 0, name: METAL, pci bus id: <undefined>)


In [7]:
autoencoder.fit(ambient_loader, epochs=10, batch_size=32)

IndexError: tuple index out of range

In [7]:
# Assuming we have a determined number of bits for each CAN packet after optimization
autoencoder = create_cannolo_autoencoder(ambient_loader, window_size, num_can_ids)


2023-11-09 23:57:37.146829: I metal_plugin/src/device/metal_device.cc:1154] Metal device set to: Apple M2 Pro
2023-11-09 23:57:37.146851: I metal_plugin/src/device/metal_device.cc:296] systemMemory: 32.00 GB
2023-11-09 23:57:37.146857: I metal_plugin/src/device/metal_device.cc:313] maxCacheSize: 10.67 GB
2023-11-09 23:57:37.147069: I tensorflow/core/common_runtime/pluggable_device/pluggable_device_factory.cc:306] Could not identify NUMA node of platform GPU ID 0, defaulting to 0. Your kernel may not have been built with NUMA support.
2023-11-09 23:57:37.147421: I tensorflow/core/common_runtime/pluggable_device/pluggable_device_factory.cc:272] Created TensorFlow device (/job:localhost/replica:0/task:0/device:GPU:0 with 0 MB memory) -> physical PluggableDevice (device: 0, name: METAL, pci bus id: <undefined>)


ValueError: Input 0 of layer "lstm" is incompatible with the layer: expected ndim=3, found ndim=2. Full shape received: (None, 256)