In [11]:
from json import load
from dotenv import load_dotenv
from helpers import *
import os

load_dotenv()

data_path = os.getenv('DATA_PATH')
ambient_dir = f'{data_path}/ambient' 
attack_dir = f'{data_path}/attacks'

ambient_metadata_file = os.path.join(ambient_dir, 'capture_metadata.json')
attack_metadata_file = os.path.join(attack_dir, 'capture_metadata.json')

with open(ambient_metadata_file) as f:
    ambient_metadata = load(f)

with open(attack_metadata_file) as f:
    attack_metadata = load(f)

In [15]:

# Extract Ambient Data
ambient_dfs = {}
for log_file in os.listdir(ambient_dir):
    if log_file.endswith('.log'):
        log_filepath = os.path.join(ambient_dir, log_file)
        df = make_can_df(log_filepath)
        ambient_dfs[log_file[:-4]] = df[['time', 'aid', 'data']]

# Extract Attack Data
attack_dfs = {}
for log_file in os.listdir(attack_dir):
    if log_file.endswith('.log'):
        log_filepath = os.path.join(attack_dir, log_file)
        df = make_can_df(log_filepath)
        attack_dfs[log_file[:-4]] = df[['time', 'aid', 'data']]


In [16]:
ambient_dfs

{'ambient_dyno_drive_benign_anomaly':                 time   aid              data
 0       0.000000e+00   737  0000000000000004
 1       9.536743e-07   852  1FFF40000003A780
 2       1.013994e-03   403  00080803E6280000
 3       1.014948e-03  1505  893FE0070A000480
 4       1.016021e-03   526  4E2003A0003FAFFF
 ...              ...   ...               ...
 771512  4.564519e+02   651  0000000000000000
 771513  4.564529e+02  1760  0000000000000000
 771514  4.564529e+02   167  2010FA24F12B30A0
 771515  4.564529e+02    61  0001F48000000000
 771516  4.564539e+02   705  01F32FC7CB1F1CDE
 
 [720927 rows x 3 columns],
 'ambient_dyno_drive_basic_long':                 time   aid              data
 0           0.000000   813  0000042758010000
 1           0.001019  1694  0440047E1FC01542
 2           0.001020   293  9000401F41BE7960
 3           0.002916   737  0000000000000004
 4           0.003937   852  1FFF40000003B680
 ...              ...   ...               ...
 2991265  1250.942507  150

In [24]:

for df_keys in ambient_dfs.keys():
    if 'masquerade' not in df_keys:
        print(df_keys)
    
print()

for df_keys in attack_dfs.keys():
    if 'masquerade' not in df_keys:
        print(df_keys)

ambient_keys = [
                "ambient_dyno_drive_benign_anomaly", 
                "ambient_dyno_drive_basic_long",
                "ambient_highway_street_driving_long",
                "ambient_dyno_reverse",
                "ambient_dyno_idle_radio_infotainment",
                "ambient_dyno_drive_radio_infotainment",
                "ambient_dyno_drive_winter",
                "ambient_dyno_exercise_all_bits",
                "ambient_dyno_drive_extended_short",
                "ambient_dyno_drive_basic_short",
                "ambient_dyno_drive_extended_long",
                "ambient_highway_street_driving_diagnostics"
                ]

attack_keys = [
                "accelerator_attack_reverse_1",
                "accelerator_attack_drive_1",
                "accelerator_attack_drive_2",
                "accelerator_attack_reverse_2",
                "fuzzing_attack_1",
                "fuzzing_attack_2",
                "fuzzing_attack_3",
                "correlated_signal_attack_1",
                "correlated_signal_attack_2",
                "correlated_signal_attack_3",
                "reverse_light_on_attack_1",
                "reverse_light_on_attack_2",
                "reverse_light_on_attack_3",
                "reverse_light_off_attack_1",
                "reverse_light_off_attack_2",
                "reverse_light_off_attack_3",
                "max_speedometer_attack_1",
                "max_speedometer_attack_2",
                "max_speedometer_attack_3",
                "max_engine_coolant_temp_attack",
                ]


ambient_dyno_drive_benign_anomaly
ambient_dyno_drive_basic_long
ambient_highway_street_driving_long
ambient_dyno_reverse
ambient_dyno_idle_radio_infotainment
ambient_dyno_drive_radio_infotainment
ambient_dyno_drive_winter
ambient_dyno_exercise_all_bits
ambient_dyno_drive_extended_short
ambient_dyno_drive_basic_short
ambient_dyno_drive_extended_long
ambient_highway_street_driving_diagnostics

accelerator_attack_reverse_1
fuzzing_attack_3
accelerator_attack_drive_1
accelerator_attack_drive_2
accelerator_attack_reverse_2
fuzzing_attack_1
reverse_light_on_attack_1
correlated_signal_attack_1
reverse_light_on_attack_3
max_speedometer_attack_3
max_speedometer_attack_1
correlated_signal_attack_2
reverse_light_off_attack_3
fuzzing_attack_2
correlated_signal_attack_3
reverse_light_on_attack_2
reverse_light_off_attack_2
reverse_light_off_attack_1
max_engine_coolant_temp_attack
max_speedometer_attack_2


In [17]:
for df_keys in ambient_dfs.keys():
    ambient_parquet_file = os.path.join(ambient_dir, f'{df_keys}.parquet')
    ambient_dfs[df_keys].to_parquet(ambient_parquet_file, index=False)


for df_keys in attack_dfs.keys():
    attack_parquet_file = os.path.join(attack_dir, f'{df_keys}.parquet')
    attack_dfs[df_keys].to_parquet(attack_parquet_file, index=False)


In [26]:
# load parquet files into dataframes
ambient_dfs = {}
for parquet_file in ambient_keys:
    parquet_filepath = os.path.join(ambient_dir, f'{parquet_file}.parquet')
    df = pd.read_parquet(parquet_filepath)
    ambient_dfs[parquet_file[:-8]] = df

attack_dfs = {}
for parquet_file in attack_keys:
    parquet_filepath = os.path.join(attack_dir, f'{parquet_file}.parquet')
    df = pd.read_parquet(parquet_filepath)
    attack_dfs[parquet_file[:-8]] = df

In [27]:
attack_dfs

{'accelerator_attack_r':               time   aid              data
 0         0.000000   192  0000000000000000
 1         0.000994   996  0128000085000000
 2         0.000996   403  00080803E8000000
 3         0.000997   526  4E2003A0003F6FFF
 4         0.000998  1314  DFFFD0007AD0001E
 ...            ...   ...               ...
 234021  105.436338  1076  0088E0A2775E7AB0
 234022  105.437336  1760  0000000000000000
 234023  105.437338   354  00080003EA11F4C6
 234024  105.437339   167  0090FA008105B0A0
 234025  105.437340  1372  101C000044883000
 
 [234026 rows x 3 columns],
 'accelerator_attack':                 time   aid              data
 0       0.000000e+00  1124  000240000090145A
 1       9.536743e-07   470  020F828030240580
 2       2.145767e-06   560  F1000002F4001C00
 3       1.071930e-03   705  01F38FC7C99F5AA0
 4       2.094984e-03   737  0000000000000004
 ...              ...   ...               ...
 161364  7.286370e+01  1505  893FE00802000080
 161365  7.286371e+01   354 

In [None]:
# ambient_dfs["name"] = add_time_diff_per_aid_col(ambient_dfs["name"], order_by_time=True)
