In [1]:
import numpy as np
import pickle
import math
from tqdm.auto import tqdm

In [2]:
# from cuda import cuda
# try:
#    mp.set_start_method('spawn', force=True)
# except RuntimeError:
#    pass

In [3]:
#steam_df = pd.read_csv("https://zenodo.org/record/4273921/files/STUMPY_Basics_steamgen.csv?download=1")

raw_data = np.load("../NoDef.npz")
raw_labels = raw_data['labels']
raw_traces = raw_data['traces']

In [4]:
trace_ids = list(set(raw_labels))

In [5]:
'''
Convert a raw packet trace into a processed version
trace: 1-D numpy array containing a packet trace
mode:
    'f': flatten the trace using packet arrival time
    'p': only include positive
    'n': only include negative
    'z': only remove zeroes
granularity: in 'f' mode, the smallest distance between packet arrival times to be considered
remove_zeroes: remove any zero values from the packet trace before processing (excluding z mode)
maxlen: reshape input list to be this length after processing by padding with 0
'''
def process(trace, mode='f', granularity=0.01, remove_zeroes=True, maxlen=10000):
    if remove_zeroes:
        trace = trace[trace != 0]
    
    if mode == 'f':
        processed_trace = []
        for i,x in enumerate(trace):
            distance = abs(x) - abs(trace[i-1])
            num_zeroes = math.ceil(distance/granularity)
            processed_trace += [0] * num_zeroes
            processed_trace.append(np.sign(x))
        return processed_trace + [0.0] * (maxlen - len(processed_trace))
    elif mode == 'p':
        return trace[trace >= 0]
    elif mode == 'n':
        return trace[trace <= 0]
    elif mode == 'z':
        return trace[trace != 0]
    elif mode == 'ipt':
        processed_trace = [0] * len(trace)
        signs = np.sign(trace)
        for i, x in enumerate(trace):
            distance = abs(x) - abs(trace[i-1])
            value = (signs[i] * distance) * 10
            processed_trace[i] = round(value, 4)
        return processed_trace
    else:
        raise TypeError("mode must be one of: f,p,n,z")

In [6]:
traces = {id: [None] * 4500 for id in trace_ids}
indices = {id: 0 for id in trace_ids}

In [7]:
#print(raw_traces[345][0:20])
#print(len(raw_traces[345]))
#test = process(raw_traces[345], mode='f', granularity=0.01, remove_zeroes=True)
#print(len(test))
#print(test[9950:])

In [None]:
# dictionary of all the traces as 2D numpy arrays
# keeping track of index for each one and setting value is necessary
# simple append is way too slow (>30 mins runtime)

for i in tqdm(range(len(raw_traces))):
    processed_trace = process(raw_traces[i], mode='ipt', remove_zeroes=True)
    
    traces[raw_labels[i]][indices[raw_labels[i]]] = processed_trace
    indices[raw_labels[i]] += 1

  0%|          | 0/450000 [00:00<?, ?it/s]

In [None]:
print(len(traces))
print(len(traces[0]))
# print(len(traces[45]))
print(len(traces[38][8]))
# print(len(traces[45][50]))

In [None]:
with open('../ipt_traces.npy', 'wb') as f:
    pickle.dump(traces, f)