In [1]:
import pandas as pd
import numpy as np
import torch
import torch.nn as nn
from tqdm.notebook import tqdm
import gc

In [None]:
files = [
    # ('test', True),
    ('train', False),
    ('train_full_genuine', False),
    ('train_95_genuine', False),
    ('train_90_genuine', False),
]


fields = [
    'snd_pos_x',
    'snd_pos_y',
    'snd_spd_x',
    'snd_spd_y',
    'snd_acl_x',
    'snd_acl_y',
    'snd_hed_x',
    'snd_hed_y',
    'rcv_pos_x',
    'rcv_pos_y',
    'rcv_spd_x',
    'rcv_spd_y',
    'rcv_acl_x',
    'rcv_acl_y',
    'rcv_hed_x',
    'rcv_hed_y',
    'delta_time',
]

max_window_size = 100

for file, save_labels in files:
    sequences = []
    if save_labels:
        labels = []
    df = pd.read_feather(f'out_veremi/{file}.feather')
    grouped = df.groupby(['dataset_id', 'sender', 'receiver', 'attack_type'])
    sorted_order = grouped.size().sort_values(ascending=False).index
    
    for k in tqdm(sorted_order):
        t = grouped.get_group(k)
        attack_type = k[3]
        
        array = t[fields].to_numpy()

        num_windows = len(t) // max_window_size
        final_window_size = len(t) % max_window_size
        for i in range(num_windows):
            window = array[i * max_window_size : (i + 1) * max_window_size]
            sequences.append(torch.tensor(window))
            if save_labels:
                labels.append(attack_type)

        if final_window_size > 0:
            final_window = array[-final_window_size:]
            sequences.append(torch.tensor(final_window))
            if save_labels:
                labels.append(attack_type)
        del t
    
    del sorted_order
    del df
    
    collected = gc.collect()
    
    if save_labels:
        labels = sorted(enumerate(labels), key=lambda x: sequences[x[0]].shape[0], reverse=True)
        labels_tensor = torch.tensor(labels)
        torch.save(labels_tensor, f'out_veremi/{file}-packed-labels-arst.pt')
        del labels_tensor
        del labels
        
    sequences.sort(key=lambda x: x.shape[0], reverse=True)
    packed = nn.utils.rnn.pack_sequence(sequences)
    torch.save(packed, f'out_veremi/{file}-packed-arst.pt')
    
    del packed
    del sequences

  0%|          | 0/626366 [00:00<?, ?it/s]