In [1]:
import pandas as pd
import numpy as np
import os
import sys

### Concatenate All Raw Results

In [2]:
sampling_dir = '/Users/jujohnson/git/DDOS-Classification/combined-experiments/data-methods/tests'
baselines_dir = '/Users/jujohnson/git/DDOS-Classification/combined-experiments/baselines/tests/'
results_file = 'optimal-results.csv'
output_file = 'all-raw-optimal-results.csv'

In [10]:
# create a list of paths to all results

data_methods = [
    dirname for dirname in
    os.listdir(sampling_dir)
    if os.path.isdir(os.path.join(sampling_dir, dirname))
]

rus_methods = [os.path.join(sampling_dir, dirname) for dirname in data_methods if 'ros' not in dirname]
rus_d2_methods = [dirname for dirname in rus_methods if 'd2' in dirname]
rus_d4_methods = [dirname for dirname in rus_methods if 'd4' in dirname]

baseline_d2_method = ['/Users/jujohnson/git/DDOS-Classification/combined-experiments/baselines/tests/d2_w32']
baseline_d4_method = ['/Users/jujohnson/git/DDOS-Classification/combined-experiments/baselines/tests/d4_w32']

In [11]:
# concatenate all of the results

configs = [
    (rus_d2_methods, 2, 'rus'),
    (rus_d4_methods, 4, 'rus'),
    (baseline_d2_method, 2, 'baseline'),
    (baseline_d4_method, 4, 'baseline')
]

results = []

for method_list, layers, method_name in configs:
    for method in method_list:
        print(method.split('/')[-1])
        path = os.path.join(method, results_file)
        temp = pd.read_csv(path)
        temp['layers'] = layers
        temp['method'] = method_name
        temp['method_name'] = method.split('/')[-1]
        results.append(temp)


all_results = pd.concat(results, ignore_index=True)
all_results.tail()

rus_0pt00203_d2
rus_4_d2
rus_0pt4044_d2
rus_0pt2_d2
rus_0pt0183_d2
rus_0pt00087_d2
rus_0pt00135_d2
rus_0pt00051_d2
rus_0pt2689_d2
rus_0pt8109_d2
rus_8_d2
rus_2_d2
rus_0pt0047_d2
rus_0pt00022_d2
rus_0pt0386_d2
rus_0pt0081_d2
rus_0pt00305_d2
d2_w32
d4_w32


Unnamed: 0,minority_size,tp,fp,tn,fn,tpr,tnr,roc_auc,geometric_mean,arithmetic_mean,f1_score,precision,layers,method,method_name
552,0.002028,1326,647194,6851,3,0.997743,0.010475,0.992149,0.102231,0.504109,0.004081,0.002045,4,baseline,d4_w32
553,0.002028,1323,636240,17805,6,0.995485,0.027223,0.99155,0.164621,0.511354,0.004142,0.002075,4,baseline,d4_w32
554,0.002028,1328,650343,3702,1,0.999248,0.00566,0.992382,0.075206,0.502454,0.004067,0.002038,4,baseline,d4_w32
555,0.002028,1325,644456,9589,4,0.99699,0.014661,0.988721,0.120901,0.505826,0.004095,0.002052,4,baseline,d4_w32
556,0.002028,1329,652231,1814,0,1.0,0.002774,0.992403,0.052664,0.501387,0.004059,0.002033,4,baseline,d4_w32


### Fix Minority Sizes

In [12]:
all_results['minority_size'].value_counts()

0.002028    60
0.010000    30
0.902257    30
0.099957    30
0.001000    30
0.799429    30
0.700250    30
0.000500    30
0.399895    30
0.005000    30
0.301879    30
0.002500    30
0.200581    30
0.600859    30
0.007501    30
0.050018    30
0.500282    30
0.000250    17
Name: minority_size, dtype: int64

In [13]:
corrections = [
    (0.002028, 0.002),
    (0.902257, 0.9),
    (0.099957, 0.1),
    (0.799429, 0.8),
    (0.700250, 0.7),
    (0.399895, 0.4),
    (0.301879, 0.3),
    (0.200581, 0.2),
    (0.600589, 0.6),
    (0.007501, 0.0075),
    (0.050018, 0.05),
    (0.500282, 0.5),
    (0.600859, 0.6),
]

In [14]:
all_results['minority_size'] = all_results['minority_size'].round(6)
for old, new in corrections:
    all_results.loc[all_results['minority_size'] == old, 'minority_size'] = new

In [15]:
all_results['minority_size'].value_counts()

0.00200    60
0.20000    30
0.30000    30
0.00100    30
0.00050    30
0.00500    30
0.70000    30
0.10000    30
0.05000    30
0.40000    30
0.01000    30
0.00250    30
0.00750    30
0.60000    30
0.90000    30
0.80000    30
0.50000    30
0.00025    17
Name: minority_size, dtype: int64

In [16]:
all_results.to_csv(output_file)

In [18]:
all_results['method_name'].value_counts()

rus_0pt00135_d2    30
rus_0pt2_d2        30
rus_0pt00203_d2    30
rus_0pt00305_d2    30
d2_w32             30
rus_0pt0081_d2     30
rus_0pt8109_d2     30
rus_0pt0386_d2     30
rus_0pt2689_d2     30
rus_0pt00087_d2    30
rus_0pt00051_d2    30
rus_0pt0183_d2     30
rus_0pt0047_d2     30
rus_0pt4044_d2     30
rus_2_d2           30
d4_w32             30
rus_0pt00022_d2    30
rus_4_d2           30
rus_8_d2           17
Name: method_name, dtype: int64