In [1]:

from json import load
from dotenv import load_dotenv
from helpers import *
import os

load_dotenv()

data_path = os.getenv('DATA_PATH')
ambient_dir = f'{data_path}/ambient' 
attack_dir = f'{data_path}/attacks'

ambient_metadata_file = os.path.join(ambient_dir, 'capture_metadata.json')
attack_metadata_file = os.path.join(attack_dir, 'capture_metadata.json')

with open(ambient_metadata_file) as f:
    ambient_metadata = load(f)

with open(attack_metadata_file) as f:
    attack_metadata = load(f)

ambient_keys = [
                "ambient_dyno_drive_benign_anomaly", 
                "ambient_dyno_drive_basic_long",
                "ambient_highway_street_driving_long",
                "ambient_dyno_reverse",
                "ambient_dyno_idle_radio_infotainment",
                "ambient_dyno_drive_radio_infotainment",
                "ambient_dyno_drive_winter",
                "ambient_dyno_exercise_all_bits",
                "ambient_dyno_drive_extended_short",
                "ambient_dyno_drive_basic_short",
                "ambient_dyno_drive_extended_long",
                "ambient_highway_street_driving_diagnostics"
]

attack_keys = [
                "accelerator_attack_reverse_1",
                "accelerator_attack_drive_1",
                "accelerator_attack_drive_2",
                "accelerator_attack_reverse_2",
                "fuzzing_attack_1",
                "fuzzing_attack_2",
                "fuzzing_attack_3",
                "correlated_signal_attack_1",
                "correlated_signal_attack_2",
                "correlated_signal_attack_3",
                "reverse_light_on_attack_1",
                "reverse_light_on_attack_2",
                "reverse_light_on_attack_3",
                "reverse_light_off_attack_1",
                "reverse_light_off_attack_2",
                "reverse_light_off_attack_3",
                "max_speedometer_attack_1",
                "max_speedometer_attack_2",
                "max_speedometer_attack_3",
                "max_engine_coolant_temp_attack",
]

# load parquet files into dataframes
ambient_dfs = {}
for parquet_file in ambient_keys:
    parquet_filepath = os.path.join(ambient_dir, f'{parquet_file}.parquet')
    df = pd.read_parquet(parquet_filepath)
    ambient_dfs[parquet_file[:-8]] = df

attack_dfs = {}
for parquet_file in attack_keys:
    parquet_filepath = os.path.join(attack_dir, f'{parquet_file}.parquet')
    df = pd.read_parquet(parquet_filepath)
    attack_dfs[parquet_file[:-8]] = df

In [4]:
ambient_dfs

{'ambient_dyno_drive_benign':                 time   aid              data
 0       0.000000e+00   737  0000000000000004
 1       9.536743e-07   852  1FFF40000003A780
 2       1.013994e-03   403  00080803E6280000
 3       1.014948e-03  1505  893FE0070A000480
 4       1.016021e-03   526  4E2003A0003FAFFF
 ...              ...   ...               ...
 720922  4.564519e+02   651  0000000000000000
 720923  4.564529e+02  1760  0000000000000000
 720924  4.564529e+02   167  2010FA24F12B30A0
 720925  4.564529e+02    61  0001F48000000000
 720926  4.564539e+02   705  01F32FC7CB1F1CDE
 
 [720927 rows x 3 columns],
 'ambient_dyno_drive_ba':                 time   aid              data
 0           0.000000   813  0000042758010000
 1           0.001019  1694  0440047E1FC01542
 2           0.001020   293  9000401F41BE7960
 3           0.002916   737  0000000000000004
 4           0.003937   852  1FFF40000003B680
 ...              ...   ...               ...
 2802426  1250.942507  1505  893FE00B0A000

In [4]:
from helpers import add_time_diff_since_last_msg_col

ambient_dfs_with_time_diff = {} 
for key, ambient_file_df in ambient_dfs.items():
    ambient_dfs_with_time_diff[key] = add_time_diff_per_aid_col(ambient_file_df, True)
    ambient_dfs_with_time_diff[key] = add_time_diff_since_last_msg_col(ambient_file_df, True)

attack_dfs_with_time_diff = {}
for key, attack_file_df in attack_dfs.items():
    attack_dfs_with_time_diff[key] = add_time_diff_per_aid_col(attack_file_df, True)
    attack_dfs_with_time_diff[key] = add_time_diff_since_last_msg_col(attack_file_df, True)

In [5]:
ambient_dfs_with_time_diff

{'ambient_dyno_drive_benign':                 time   aid              data  time_diffs  \
 0       0.000000e+00   737  0000000000000004 -456.419053   
 1       9.536743e-07   852  1FFF40000003A780 -456.404208   
 2       1.013994e-03   403  00080803E6280000 -456.443165   
 3       1.014948e-03  1505  893FE0070A000480 -456.386205   
 4       1.016021e-03   526  4E2003A0003FAFFF -456.422088   
 ...              ...   ...               ...         ...   
 720922  4.564519e+02   651  0000000000000000    0.019981   
 720923  4.564529e+02  1760  0000000000000000    0.008721   
 720924  4.564529e+02   167  2010FA24F12B30A0    0.007702   
 720925  4.564529e+02    61  0001F48000000000    0.099969   
 720926  4.564539e+02   705  01F32FC7CB1F1CDE    0.104799   
 
         time_diff_since_last_msg  
 0                            NaN  
 1                   9.536743e-07  
 2                   1.013041e-03  
 3                   9.536743e-07  
 4                   1.072884e-06  
 ...                 

In [None]:
for df_keys in ambient_dfs_with_time_diff.keys():
    ambient_parquet_file = os.path.join(ambient_dir, f'{df_keys}_with_time_diffs.parquet')
    ambient_dfs[df_keys].to_parquet(ambient_parquet_file, index=False)


for df_keys in attack_dfs_with_time_diff.keys():
    attack_parquet_file = os.path.join(attack_dir, f'{df_keys}_with_time_diffs.parquet')
    attack_dfs[df_keys].to_parquet(attack_parquet_file, index=False)