In [1]:
%load_ext autoreload
%autoreload 2
import os
import sys
sys.path.append(os.path.abspath('../../src/'))
sys.path.append(os.path.abspath('../../'))

In [2]:
import numpy as np
from preprocessing.train_test_split import train_test_split
import itertools
from pathlib import Path

from src.preprocessing.train_test_split import Writer
from src.preprocessing.train_test_split import write_to_file

In [3]:
data_dir = '../../../Data/LISA/'
car_models = ['Kia', 'BMW', 'Tesla']
classes = ['Normal', 'Fuzzy', 'Replay']

## Generate train/val/test file

In [4]:
for cm, c in itertools.product(car_models, classes):
    file_name = Path(data_dir) / f'{cm}/{c}.npz'
    data = np.load(file_name)
    X, y = data['X'], data['y']
    print(f'Car: {cm} - Class {c} size = {len(X)}')
    indices_lists = train_test_split(len(X), test_fraction=0.2, val_fraction=0.1) 
    prefix = ['train', 'val', 'test']
    for prefix, indices in zip(prefix, indices_lists):
        print(f'{prefix} size: ', len(indices))
        X_subset = X[indices] 
        y_subset = y[indices]
        save_file = Path(data_dir) / f'{cm}/{prefix}_{c}.npz'
        # np.savez_compressed(save_file, X=X_subset, y=y_subset)

Car: Kia - Class Normal size = 80658
train size:  56462
val size:  8065
test size:  16131
Car: Kia - Class Fuzzy size = 35868
train size:  25109
val size:  3586
test size:  7173
Car: Kia - Class Replay size = 1282
train size:  898
val size:  128
test size:  256
Car: BMW - Class Normal size = 43194
train size:  30237
val size:  4319
test size:  8638
Car: BMW - Class Fuzzy size = 80938
train size:  56658
val size:  8093
test size:  16187
Car: BMW - Class Replay size = 38493
train size:  26946
val size:  3849
test size:  7698
Car: Tesla - Class Normal size = 157987
train size:  110592
val size:  15798
test size:  31597
Car: Tesla - Class Fuzzy size = 34960
train size:  24472
val size:  3496
test size:  6992
Car: Tesla - Class Replay size = 5998
train size:  4200
val size:  599
test size:  1199


## Generate train/val/test folder

In [5]:
def generate_indices(N, portion=None, size=None):
    if portion is None: S = size
    else: S = int(N * portion)
    S = min(S, N)
    replace = False #(S > N)
    indices = np.random.choice(N, S, replace=replace)
    return indices

def extract_and_write_sample(filename, out_dir, writer, 
                                portion=None, size=None):
    if ('Normal' in str(filename)) and not (size is None): 
            size = size * 2
    data = np.load(filename)
    X, y = data['X'], data['y']
    indices = generate_indices(len(X), portion=portion, size=size)
    X, y = X[indices], y[indices]
    print('Size: ', len(X))
    return write_to_file(writer, X, y)

In [6]:
def create_folder_from_npz_file(in_path, out_path, dir_type, portion=None, size=None):
    writer = Writer(outdir=out_path, type_name=dir_type + ("" if size is None else str(size)))
    attack_list = ['Normal', 'Fuzzy', 'Replay']
    for a in attack_list:
        filename = f'{dir_type}_{a}.npz'
        filename = in_path / filename
        extract_and_write_sample(filename, out_path, writer, portion=portion, size=size)

In [8]:
out_dir = '../../../Data/LISA/{}/'
in_dir = '../../../Data/LISA/{}/'
for car_model in car_models:
    print(f'CAR: {car_model} ===============')
    in_path = Path(in_dir.format(car_model))
    out_path = Path(out_dir.format(car_model))
    print('TEST')
    create_folder_from_npz_file(in_path=in_path, out_path=out_path, dir_type='test', portion=1.0)
    print('VAL')
    create_folder_from_npz_file(in_path=in_path, out_path=out_path, dir_type='val', portion=1.0)
    print('TRAIN')
    create_folder_from_npz_file(in_path=in_path, out_path=out_path, dir_type='train', portion=1.0)

TEST
Size:  16131
Start writing to:  ../../../Data/LISA/Kia/test


16131it [00:11, 1433.01it/s]


Size:  7173
Start writing to:  ../../../Data/LISA/Kia/test


7173it [00:04, 1473.56it/s]


Size:  256
Start writing to:  ../../../Data/LISA/Kia/test


256it [00:00, 1429.27it/s]


VAL
Size:  8065
Start writing to:  ../../../Data/LISA/Kia/val


8065it [00:05, 1430.80it/s]


Size:  3586
Start writing to:  ../../../Data/LISA/Kia/val


3586it [00:02, 1488.57it/s]


Size:  128
Start writing to:  ../../../Data/LISA/Kia/val


128it [00:00, 1454.17it/s]


TRAIN
Size:  2000
Start writing to:  ../../../Data/LISA/Kia/1000


2000it [00:01, 1677.16it/s]


Size:  1000
Start writing to:  ../../../Data/LISA/Kia/1000


1000it [00:00, 1702.15it/s]


Size:  1000
Start writing to:  ../../../Data/LISA/Kia/1000


1000it [00:00, 1667.87it/s]


TEST
Size:  8638
Start writing to:  ../../../Data/LISA/BMW/test


8638it [00:05, 1630.62it/s]


Size:  16187
Start writing to:  ../../../Data/LISA/BMW/test


16187it [00:09, 1652.00it/s]


Size:  7698
Start writing to:  ../../../Data/LISA/BMW/test


7698it [00:04, 1638.43it/s]


VAL
Size:  4319
Start writing to:  ../../../Data/LISA/BMW/val


4319it [00:02, 1608.42it/s]


Size:  8093
Start writing to:  ../../../Data/LISA/BMW/val


8093it [00:04, 1655.79it/s]


Size:  3849
Start writing to:  ../../../Data/LISA/BMW/val


3849it [00:02, 1585.46it/s]


TRAIN
Size:  2000
Start writing to:  ../../../Data/LISA/BMW/1000


2000it [00:01, 1626.03it/s]


Size:  1000
Start writing to:  ../../../Data/LISA/BMW/1000


1000it [00:00, 1687.50it/s]


Size:  1000
Start writing to:  ../../../Data/LISA/BMW/1000


1000it [00:00, 1654.77it/s]


TEST
Size:  31597
Start writing to:  ../../../Data/LISA/Tesla/test


31597it [00:19, 1601.52it/s]


Size:  6992
Start writing to:  ../../../Data/LISA/Tesla/test


6992it [00:04, 1648.22it/s]


Size:  1199
Start writing to:  ../../../Data/LISA/Tesla/test


1199it [00:00, 1570.43it/s]


VAL
Size:  15798
Start writing to:  ../../../Data/LISA/Tesla/val


6270it [00:03, 1597.19it/s]


Size:  3496
Start writing to:  ../../../Data/LISA/Tesla/val


3496it [00:02, 1644.08it/s]


Size:  599
Start writing to:  ../../../Data/LISA/Tesla/val


599it [00:00, 1634.02it/s]


TRAIN
Size:  2000
Start writing to:  ../../../Data/LISA/Tesla/1000


2000it [00:01, 1616.96it/s]


Size:  1000
Start writing to:  ../../../Data/LISA/Tesla/1000


1000it [00:00, 1675.99it/s]


Size:  1000
Start writing to:  ../../../Data/LISA/Tesla/1000


1000it [00:00, 1612.08it/s]
