In [8]:
from scipy.signal import argrelextrema

def calc_scalogram(data, scales):
    S,scales= scalogramCWT(data,scales)
    return S
def show_scalo(data, scales, colors):
    for i in range (0, len(data)):
        plt.plot(scales, data[i], colors[i], lw=3)
    plt.show()
    
# Get top X spikes from scalogram, sorted by value
def get_spikes(scalo, comparator):
    len(scalo)
    spikes = deque([(-1,-1)] * 5, maxlen=5)
    #aux = argrelextrema(scalo, comparator, order=int(len(scalo)/10))
    aux = argrelextrema(scalo, comparator)
    if aux[0].size:
        for x in np.nditer(aux) or []:
            spikes.append((scalo[x], scales[x]))
    ordered = sorted(spikes, key=lambda x: x[1], reverse=True)
    values = np.hstack(zip(*ordered))
    return values

In [9]:
from itertools import groupby
from scipy import stats
def get_stats_numpy(data):
    mean = np.mean(data)
    median = np.median(data)
    std = np.std(data)
    var = np.var(data)
    skew = stats.skew(data)
    kurt = stats.kurtosis(data)
    pc = [25,50,75,90]
    percentiles = np.array(np.percentile(data, pc))
    silences = np.count_nonzero(np.asarray(data)==0.0)
    silence_mean = np.mean(list(sum(1 for _ in g) for k, g in groupby(data) if k==0))
    longest_silence = max(sum(1 for _ in g) for k, g in groupby(data) if k==0) if silences > 0 else 0
    shortest_silence = min(sum(1 for _ in g) for k, g in groupby(data) if k==0) if silences > 0 else 0
    
    #print("Mean: " + str(mean))
    #print("Media: " + str(median))
    #print("StdDev: " + str(std))
    #print("Variance: " + str(var))
    #print("Skewness: " + str(skew))
    #print("Kurtosis: " + str(kurt))
    #print("Pc25: " + str(percentiles[0]))
    #print("Pc50: " + str(percentiles[1]))
    #print("Pc75: " + str(percentiles[2]))
    
    features = np.hstack((mean, median, std, var, skew, kurt, percentiles, silences, silence_mean, longest_silence, shortest_silence))
    return features

In [10]:
# Put it all on a numpy array
def get_features_numpy(info, local_max_up_bytes, local_min_up_bytes, local_max_up_packet, local_min_up_packet, 
                       local_max_down_bytes, local_min_down_bytes, local_max_down_packet, local_min_down_packet):
    np.set_printoptions(suppress=True)

    result = []

    for idx in range(0, len(info)):
        result.append(
            np.hstack(
                (
                 get_stats_numpy(info[idx]['up']['byte_count']),
                 get_stats_numpy(info[idx]['up']['packet_count']),
                 local_max_up_bytes[idx], local_min_up_bytes[idx],
                 local_max_up_packet[idx], local_min_up_packet[idx],
                 get_stats_numpy(info[idx]['down']['byte_count']),
                 get_stats_numpy(info[idx]['down']['packet_count']),
                 local_max_down_bytes[idx], local_min_down_bytes[idx],
                 local_max_down_packet[idx], local_min_down_packet[idx],

            ))
        )
    return result


In [15]:
def get_info(data):
    info = []
    up = defaultdict(list)
    down = defaultdict(list)
    result = {}
    count = 0
    for second in data:
        up['packet_count'].append(second[0])
        up['byte_count'].append(second[1])
        down['packet_count'].append(second[2])
        down['byte_count'].append(second[3])
        count+=1
        if count >= sample_size:
            result['up'] = up
            result['down'] = down
            info.append(result)
            up = defaultdict(list)
            down = defaultdict(list)
            result = {}
            count = 0
    return info

In [16]:
import os
import numpy as np
from collections import defaultdict, deque
from scalogram import *
import pandas as pd

base_url = '../../../shared/normal/video/'
url_extension = 'dat/'
services = ['netflix', 'youtube', 'twitch']
windows = ['01s', '1s']
sample_size = 30 # number of intervals for each sample
#[up_n_packets, up_n_bytes, up_flag, down_n_packets, down_n_bytes, down_flag]
data = np.loadtxt(os.path.join(base_url, file))
data = np.delete(data, [2,5], 1)

# Normalize
from sklearn.preprocessing import StandardScaler
scaler = StandardScaler()
data = scaler.fit_transform(data)

# Split data into categories
info = get_info(data)

#Scalogram
N = sample_size
dj=1/128
s0=2
J=1/dj * np.log2(0.5*N/s0)
scales=s0*2**(np.arange(J)*dj)

scalos_up = []
scalos_down = []
for idx, sample in enumerate(info):
        scalos_up.append(
            (calc_scalogram(np.asarray(sample['up']['byte_count']), scales),
             calc_scalogram(np.asarray(sample['up']['packet_count']), scales))
        )
        scalos_down.append(
            (calc_scalogram(np.asarray(sample['down']['byte_count']), scales),
             calc_scalogram(np.asarray(sample['down']['packet_count']), scales))
        )

local_max_up_bytes = []
local_min_up_bytes = []
local_max_up_packet = []
local_min_up_packet = []
local_max_down_bytes = []
local_min_down_bytes = []
local_max_down_packet = []
local_min_down_packet = []

for scalo in scalos_up:
    local_max_up_bytes.append(get_spikes(scalo[0], np.greater))
    local_min_up_bytes.append(get_spikes(scalo[0], np.less))
    local_max_up_packet.append(get_spikes(scalo[1], np.greater))
    local_min_up_packet.append(get_spikes(scalo[1], np.less))

for scalo in scalos_down:
    local_max_down_bytes.append(get_spikes(scalo[0], np.greater))
    local_min_down_bytes.append(get_spikes(scalo[0], np.less))
    local_max_down_packet.append(get_spikes(scalo[1], np.greater))
    local_min_down_packet.append(get_spikes(scalo[1], np.less))

# Put it in a pandas Dataframe and save it to csv
samples = get_features_numpy(info, local_max_up_bytes, local_min_up_bytes, local_max_up_packet, local_min_up_packet,
                            local_max_down_bytes, local_min_down_bytes, local_max_down_packet, local_min_down_packet)


names = [
    'up_bytes_mean', 'up_bytes_median', 'up_bytes_std', 'up_bytes_var', 'up_bytes_skew', 'up_bytes_kurt',
    'up_bytes_perc25', 'up_bytes_perc50', 'up_bytes_perc75', 'up_bytes_perc90',
    'up_bytes_silences', 'up_bytes_silence_mean', 'up_bytes_longest_silence', 'up_bytes_shortest_silence',
    'up_packet_mean', 'up_packet_median', 'up_packet_std', 'up_packet_var', 'up_packet_skew', 'up_packet_kurt',
    'up_packet_perc25', 'up_packet_perc50', 'up_packet_perc75', 'up_packet_perc90',
    'up_packet_silences', 'up_packet_silence_mean', 'up_packet_longest_silence', 'up_packet_shortest_silence',
    'up_bytes_1max_y', 'up_bytes_2max_y', 'up_bytes_3max_y', 'up_bytes_4max_y', 'up_bytes_5max_y',
    'up_bytes_1max_x', 'up_bytes_2max_x', 'up_bytes_3max_x', 'up_bytes_4max_x', 'up_bytes_5max_x',
    'up_bytes_1min_y', 'up_bytes_2min_y', 'up_bytes_3min_y', 'up_bytes_4min_y', 'up_bytes_5min_y',
    'up_bytes_1min_x', 'up_bytes_2min_x', 'up_bytes_3min_x', 'up_bytes_4min_x', 'up_bytes_5min_x',
    'up_packet_1max_y', 'up_packet_2max_y', 'up_packet_3max_y', 'up_packet_4max_y', 'up_packet_5max_y',
    'up_packet_1max_x', 'up_packet_2max_x', 'up_packet_3max_x', 'up_packet_4max_x', 'up_packet_5max_x',
    'up_packet_1min_y', 'up_packet_2min_y', 'up_packet_3min_y', 'up_packet_4min_y', 'up_packet_5min_y',
    'up_packet_1min_x', 'up_packet_2min_x', 'up_packet_3min_x', 'up_packet_4min_x', 'up_packet_5min_x',

    'down_bytes_mean', 'down_bytes_median', 'down_bytes_std', 'down_bytes_var', 'down_bytes_skew', 'down_bytes_kurt',
    'down_bytes_perc25', 'down_bytes_perc50', 'down_bytes_perc75', 'down_bytes_perc90',
    'down_bytes_silences', 'down_bytes_silence_mean', 'down_bytes_longest_silence', 'down_bytes_shortest_silence',
    'down_packet_mean', 'down_packet_median', 'down_packet_std', 'down_packet_var', 'down_packet_skew', 'down_packet_kurt',
    'down_packet_perc25', 'down_packet_perc50', 'down_packet_perc75', 'down_packet_perc90',
    'down_packet_silences', 'down_packet_silence_mean', 'down_packet_longest_silence', 'down_packet_shortest_silence',
    'down_bytes_1max_y', 'down_bytes_2max_y', 'down_bytes_3max_y', 'down_bytes_4max_y', 'down_bytes_5max_y',
    'down_bytes_1max_x', 'down_bytes_2max_x', 'down_bytes_3max_x', 'down_bytes_4max_x', 'down_bytes_5max_x',
    'down_bytes_1min_y', 'down_bytes_2min_y', 'down_bytes_3min_y', 'down_bytes_4min_y', 'down_bytes_5min_y',
    'down_bytes_1min_x', 'down_bytes_2min_x', 'down_bytes_3min_x', 'down_bytes_4min_x', 'down_bytes_5min_x',
    'down_packet_1max_y', 'down_packet_2max_y', 'down_packet_3max_y', 'down_packet_4max_y', 'down_packet_5max_y',
    'down_packet_1max_x', 'down_packet_2max_x', 'down_packet_3max_x', 'down_packet_4max_x', 'down_packet_5max_x',
    'down_packet_1min_y', 'down_packet_2min_y', 'down_packet_3min_y', 'down_packet_4min_y', 'down_packet_5min_y',
    'down_packet_1min_x', 'down_packet_2min_x', 'down_packet_3min_x', 'down_packet_4min_x', 'down_packet_5min_x'
]


df = pd.DataFrame(samples, columns=names)

  out=out, **kwargs)
  ret = ret.dtype.type(ret / rcount)


In [17]:
df

Unnamed: 0,up_bytes_mean,up_bytes_median,up_bytes_std,up_bytes_var,up_bytes_skew,up_bytes_kurt,up_bytes_perc25,up_bytes_perc50,up_bytes_perc75,up_bytes_perc90,...,down_packet_1min_y,down_packet_2min_y,down_packet_3min_y,down_packet_4min_y,down_packet_5min_y,down_packet_1min_x,down_packet_2min_x,down_packet_3min_x,down_packet_4min_x,down_packet_5min_x
0,1.490274,1.146471,1.870742,3.499677,1.159543,1.187847,-0.089983,1.146471,2.612128,4.065991,...,0.010871,0.003963,0.003223,-1.0,-1.0,12.676306,7.375866,3.589418,-1.0,-1.0
1,0.71139,-0.176941,1.658951,2.752119,2.003447,4.674233,-0.406819,-0.176941,1.619804,2.613997,...,0.000741,0.001973,-1.0,-1.0,-1.0,6.690841,3.291511,-1.0,-1.0,-1.0
2,-0.18479,-0.3819,0.338289,0.114439,1.801492,2.697196,-0.405781,-0.3819,-0.046115,0.174605,...,0.000932,0.001589,0.001902,-1.0,-1.0,12.204785,9.617254,5.780723,-1.0,-1.0
3,-0.238235,-0.406819,0.437151,0.191101,4.312918,18.917031,-0.406819,-0.406819,-0.196668,-0.062624,...,0.000173,0.002261,0.003,-1.0,-1.0,13.454343,6.236018,2.483716,-1.0,-1.0
4,-0.182021,-0.390414,0.316998,0.100488,1.543937,1.639733,-0.406819,-0.390414,-0.085103,0.225876,...,0.000558,0.000525,0.002222,-1.0,-1.0,12.539757,6.236018,2.456961,-1.0,-1.0
5,-0.300086,-0.406819,0.269456,0.072606,3.857492,15.564506,-0.406819,-0.406819,-0.294112,-0.141223,...,0.002305,0.00097,0.003238,0.003337,-1.0,12.337687,7.871085,5.47601,2.483716,-1.0
6,-0.274052,-0.39872,0.243524,0.059304,2.776448,8.942831,-0.406819,-0.39872,-0.115681,-0.054121,...,0.000514,0.000839,0.000893,-1.0,-1.0,12.271056,9.462278,6.236018,-1.0,-1.0
7,-0.288028,-0.406819,0.237938,0.056615,2.624991,7.539477,-0.406819,-0.406819,-0.213125,-0.059104,...,0.000747,0.001419,0.002069,-1.0,-1.0,12.472035,9.309799,6.874477,-1.0,-1.0
8,-0.273734,-0.402666,0.248094,0.06155,2.655538,8.171379,-0.406819,-0.402666,-0.126064,-0.028869,...,0.001629,0.001699,0.001338,-1.0,-1.0,11.943262,8.915094,6.036658,-1.0,-1.0
9,-0.327988,-0.406819,0.148613,0.022086,1.567846,0.910844,-0.406819,-0.406819,-0.402666,-0.122949,...,0.000811,0.002084,0.00205,-1.0,-1.0,12.676306,9.36035,5.53564,-1.0,-1.0
