# Dataset Generation

Here we will generate 2 batches of datasets for our multi-class classification experiments. 

First we produce our baseline datasets containing most of the features present in CIC_DDoS2019, and then we produce our time-based feature datasets, each containing only the 25 time-based features as well as a label

We start by importing relavent libraries, setting a seed for reproducibility, and by printing out the versions of the libraries we are using for reproducibility.

In [1]:
import os, platform, pprint, sys
import matplotlib as mpl
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd

seed: int = 14

# set up pretty printer for easier data evaluation
pretty = pprint.PrettyPrinter(indent=4, width=30).pprint

print(
    f'''
    python:\t{platform.python_version()}

    \tmatplotlib:\t{mpl.__version__}
    \tnumpy:\t\t{np.__version__}
    \tpandas:\t\t{pd.__version__}
    '''
)


    python:	3.7.10

    	matplotlib:	3.3.4
    	numpy:		1.20.3
    	pandas:		1.2.5
    


## Preliminaries

Next, we do some preliminary set up. We list the data files we will be using and a list of new column names for the datasets that is more readable and understandable.

In [2]:
data_set_1: list = [
    'DrDoS_DNS.csv'   , 'DrDoS_LDAP.csv'    ,
    'DrDoS_MSSQL.csv' , 'DrDoS_NetBIOS.csv' ,
    'DrDoS_NTP.csv'   , 'DrDoS_SNMP.csv'    ,
    'DrDoS_SSDP.csv'  , 'DrDoS_UDP.csv'     ,
    'Syn.csv'         , 'TFTP.csv'          ,
    'UDPLag.csv'      ,    
]
    
data_set_2: list = [
    'LDAP.csv'    ,'MSSQL.csv'   ,
    'NetBIOS.csv' ,'Portmap.csv' ,   
    'Syn.csv'     ,'UDP.csv'     ,
    'UDPLag.csv'  ,
]

data_set: list = data_set_1 + data_set_2


# a list of DDoS attack types with indicies that map to the indicies of data_set
data_location: list = [ 
    'DNS' , 'LDAP'  , 'MSSQL', 'NetBIOS', 'NTP'    , 'SNMP'   , 'SSDP', 'UDP', 'Syn'   , 
    'TFTP', 'UDPLag', 'LDAP' , 'MSSQL'  , 'NetBIOS', 'Portmap', 'Syn' , 'UDP', 'UDPLag',
]


# standardized column names for our data
new_column_names: dict = {
    'Unnamed: 0'                :'Unnamed'                  , 'Flow ID'                     :'Flow ID'                      ,
    ' Source IP'                :'Source IP'                , ' Source Port'                :'Source Port'                  ,
    ' Destination IP'           :'Destination IP'           , ' Destination Port'           :'Destination Port'             ,
    ' Protocol'                 :'Protocol'                 , ' Total Length of Bwd Packets':'Total Length of Bwd Packets'  ,     
    ' Flow Duration'            :'Flow Duration'            , ' Total Fwd Packets'          :'Total Fwd Packets'            , 
    ' Total Backward Packets'   :'Total Backward Packets'   , 'Total Length of Fwd Packets' :'Total Length of Fwd Packets'  ,
    ' Timestamp'                :'Timestamp'                , ' Init_Win_bytes_backward'    :'Init Win bytes backward'      ,
    ' Fwd Packet Length Max'    :'Fwd Packet Length Max'    , ' Fwd Packet Length Min'      :'Fwd Packet Length Min'        ,
    ' Fwd Packet Length Mean'   :'Fwd Packet Length Mean'   , ' Fwd Packet Length Std'      :'Fwd Packet Length Std'        ,
    'Bwd Packet Length Max'     :'Bwd Packet Length Max'    , ' Bwd Packet Length Min'      :'Bwd Packet Length Min'        ,
    ' Bwd Packet Length Mean'   :'Bwd Packet Length Mean'   , ' Bwd Packet Length Std'      :'Bwd Packet Length Std'        ,
    'Flow Bytes/s'              :'Flow Bytes/s'             , ' Flow Packets/s'             :'Flow Packets/s'               ,
    ' Flow IAT Mean'            :'Flow IAT Mean'            , ' Flow IAT Std'               :'Flow IAT Std'                 ,
    ' Flow IAT Max'             :'Flow IAT Max'             , ' Flow IAT Min'               :'Flow IAT Min'                 ,
    'Fwd IAT Total'             :'Fwd IAT Total'            , ' Fwd IAT Mean'               :'Fwd IAT Mean'                 ,
    ' Fwd IAT Std'              :'Fwd IAT Std'              , ' Fwd IAT Max'                :'Fwd IAT Max'                  ,
    ' Fwd IAT Min'              :'Fwd IAT Min'              , 'Bwd IAT Total'               :'Bwd IAT Total'                ,    
    ' Bwd IAT Mean'             :'Bwd IAT Mean'             , ' Bwd IAT Std'                :'Bwd IAT Std'                  ,
    ' Bwd IAT Max'              :'Bwd IAT Max'              , ' Bwd IAT Min'                :'Bwd IAT Min'                  ,
    'Fwd PSH Flags'             :'Fwd PSH Flags'            , ' Bwd PSH Flags'              :'Bwd PSH Flags'                , 
    ' Fwd URG Flags'            :'Fwd URG Flags'            , ' Bwd URG Flags'              :'Bwd URG Flags'                ,
    ' Fwd Header Length'        :'Fwd Header Length'        , ' Bwd Header Length'          :'Bwd Header Length'            , 
    'Fwd Packets/s'             :'Fwd Packets/s'            , ' Bwd Packets/s'              :'Bwd Packets/s'                , 
    ' Min Packet Length'        :'Min Packet Length'        , ' Max Packet Length'          :'Max Packet Length'            , 
    ' Packet Length Mean'       :'Packet Length Mean'       , ' Packet Length Std'          :'Packet Length Std'            , 
    ' Packet Length Variance'   :'Packet Length Variance'   , 'FIN Flag Count'              :'FIN Flag Count'               ,
    ' SYN Flag Count'           :'SYN Flag Count'           , ' RST Flag Count'             :'RST Flag Count'               ,
    ' PSH Flag Count'           :'PSH Flag Count'           , ' ACK Flag Count'             :'ACK Flag Count'               , 
    ' URG Flag Count'           :'URG Flag Count'           , ' CWE Flag Count'             :'CWE Flag Count'               , 
    ' ECE Flag Count'           :'ECE Flag Count'           , ' Down/Up Ratio'              :'Down/Up Ratio'                ,
    ' Average Packet Size'      :'Average Packet Size'      , ' Avg Fwd Segment Size'       :'Avg Fwd Segment Size'         ,
    ' Avg Bwd Segment Size'     :'Avg Bwd Segment Size'     , ' Fwd Header Length.1'        :'Fwd Header Length.1'          , 
    'Fwd Avg Bytes/Bulk'        :'Fwd Avg Bytes/Bulk'       , ' Inbound'                    :'Inbound'                      , 
    ' Fwd Avg Packets/Bulk'     :'Fwd Avg Packets/Bulk'     , ' Fwd Avg Bulk Rate'          :'Fwd Avg Bulk Rate'            , 
    ' Bwd Avg Bytes/Bulk'       :'Bwd Avg Bytes/Bulk'       , ' Bwd Avg Packets/Bulk'       :'Bwd Avg Packets/Bulk'         ,
    'Bwd Avg Bulk Rate'         :'Bwd Avg Bulk Rate'        , 'Subflow Fwd Packets'         :'Subflow Fwd Packets'          ,
    ' Subflow Fwd Bytes'        :'Subflow Fwd Bytes'        , ' Subflow Bwd Packets'        :'Subflow Bwd Packets'          ,
    ' Subflow Bwd Bytes'        :'Subflow Bwd Bytes'        , 'Init_Win_bytes_forward'      :'Init Win bytes forward'       ,
    ' act_data_pkt_fwd'         :'act data pkt fwd'         , ' min_seg_size_forward'       :'min seg size forward'         ,     
    'Active Mean'               :'Active Mean'              , ' Active Std'                 :'Active Std'                   ,
    ' Active Max'               :'Active Max'               , ' Active Min'                 :'Active Min'                   , 
    'Idle Mean'                 :'Idle Mean'                , ' Idle Std'                   :'Idle Std'                     ,
    ' Idle Max'                 :'Idle Max'                 , ' Idle Min'                   :'Idle Min'                     ,
    'SimillarHTTP'              :'SimillarHTTP'             , ' Label'                      :'Label'                        ,
}


In [3]:
def get_file_path(directory: str):
    '''
        Closure that will return a function that returns the filepath to the directory given to the closure
    '''

    def func(file: str) -> str:
        return os.path.join(directory, file)

    return func


# locations of the data files relative to current directory
data_path_1: str = './original/01-12/'
data_path_2: str = './original/03-11/'


# use the get_file_path closure to create a function that will return the path to a file
file_path_1 = get_file_path(data_path_1)
file_path_2 = get_file_path(data_path_2)


# a list of all complete filepaths relative to current directory with indicies mapped to the indicies of data_set
file_set: list = list(map(file_path_1, data_set_1))
file_set.extend(list(map(file_path_2, data_set_2)))


print(f'We will be cleaning {len(file_set)} files:')
print(f'Benign samples will be grabbed from each dataset and saved separately\n')
pretty(file_set)

We will be cleaning 18 files:
Benign samples will be grabbed from each dataset and saved separately

[   './original/01-12/DrDoS_DNS.csv',
    './original/01-12/DrDoS_LDAP.csv',
    './original/01-12/DrDoS_MSSQL.csv',
    './original/01-12/DrDoS_NetBIOS.csv',
    './original/01-12/DrDoS_NTP.csv',
    './original/01-12/DrDoS_SNMP.csv',
    './original/01-12/DrDoS_SSDP.csv',
    './original/01-12/DrDoS_UDP.csv',
    './original/01-12/Syn.csv',
    './original/01-12/TFTP.csv',
    './original/01-12/UDPLag.csv',
    './original/03-11/LDAP.csv',
    './original/03-11/MSSQL.csv',
    './original/03-11/NetBIOS.csv',
    './original/03-11/Portmap.csv',
    './original/03-11/Syn.csv',
    './original/03-11/UDP.csv',
    './original/03-11/UDPLag.csv']


Now that we have our file paths, we set up a list of features to prune during our preprocessing phase

In [4]:
# prune is a list of all features we know we don't want to use
# Unnamed is eliminated because it is un-labeled and we cannot verify what it qualities of the data if describes
# Fwd Header Length.1 is eliminated because it is a duplicate
# all the other features are eliminated because they are string values and cannot be used for classification
prune: list = [
    'Fwd Header Length.1',
    'Unnamed',
    'Source Port',
    'Destination Port',
    'Flow ID',
    'Source IP',
    'Destination IP',
    'Timestamp',
    'SimillarHTTP'
] 

Maranhao et al. found in their study 'Tensor based framework for Distributed Denial of Service attack detection' that nine features were filled with only 0 values for every data collection in the dataset. Since an empty column of zeros will not contribute to the model's performance, we will remove those columns.

In [5]:
# toPrune is a list of features with empty columns of 0s
toPrune: list = [
    'Fwd URG Flags',
    'Bwd URG Flags',
    'Fwd PSH Flags',
    'Fwd Avg Bytes/Bulk',
    'Fwd Avg Packets/Bulk',
    'Fwd Avg Bulk Rate',
    'Bwd Avg Bytes/Bulk',
    'Bwd Avg Packets/Bulk',
    'Bwd Avg Bulk Rate'
]

for i in toPrune:
    if i not in prune:
        prune.append(i)

print(f'We will be pruning {len(prune)} features')
for i, x in enumerate(prune):
    print(f'\t{i+1}:\t{x}')

We will be pruning 18 features
	1:	Fwd Header Length.1
	2:	Unnamed
	3:	Source Port
	4:	Destination Port
	5:	Flow ID
	6:	Source IP
	7:	Destination IP
	8:	Timestamp
	9:	SimillarHTTP
	10:	Fwd URG Flags
	11:	Bwd URG Flags
	12:	Fwd PSH Flags
	13:	Fwd Avg Bytes/Bulk
	14:	Fwd Avg Packets/Bulk
	15:	Fwd Avg Bulk Rate
	16:	Bwd Avg Bytes/Bulk
	17:	Bwd Avg Packets/Bulk
	18:	Bwd Avg Bulk Rate


## Preprocessing and Data Cleaning

Now that the preliminaries are done, we start processing the data. First we define some functions to load and clean the data, then we combine the data into dataframes based on their DDoS attack type. We keep the data manageable by sampling it down to sets of a million samples, using our seed to ensure that the results are reproducible.

In [7]:
def clean_data(df: pd.DataFrame) -> pd.DataFrame:
    '''
        Function will take a dataframe and remove the values from prune 
        Inf values will also be removed from Flow Bytes/s and Flow Packets/s
        once appropriate rows and columns have been removed, we will return
        the dataframe with the appropriate values
    '''

    # remove the features in the prune list    
    for col in prune:
        if col in df.columns:
            df.drop(columns=[col], inplace=True)
            
    
    # drop missing values/NaN etc.
    df.dropna(inplace=True)

    
    # Search through dataframe for any Infinite or NaN values in various forms that were not picked up previously
    invalid_values: list = [
        np.inf, np.nan, 'Infinity', 'inf', 'NaN', 'nan'
    ]
    
    for col in df.columns:
        for value in invalid_values:
            indexNames = df[df[col] == value].index
            if not indexNames.empty:
                print(f'deleting {len(indexNames)} rows with Infinity in column {col}')
                df.drop(indexNames, inplace=True)


    # Standardize the contents of the Label column
    df = df.replace( ['DrDoS_DNS'], 'DNS')
    df = df.replace( ['DrDoS_LDAP'], 'LDAP')
    df = df.replace( ['DrDoS_MSSQL'], 'MSSQL')
    df = df.replace( ['DrDoS_NetBIOS'], 'NetBIOS')
    df = df.replace( ['DrDoS_NTP'], 'NTP')
    df = df.replace( ['DrDoS_SNMP'], 'SNMP')
    df = df.replace( ['DrDoS_SSDP'], 'SSDP')
    df = df.replace( ['DrDoS_UDP'], 'UDP')

    
    return df


def load_data(filePath: str) -> tuple:
    '''
        Loads the Dataset from the given filepath and caches it for quick access in the future
        Function will only work when filepath is a .csv file
        After the data is loaded, the benign samples are split and saved in a list
        the malicious samples are split and saved in a dictionary of lists indexed by attack type
        only the top million malicious samples are kept
    '''

    # slice off the ./CSV/ from the filePath
    if filePath[0] == '.' and filePath[1] == '/':
        filePathClean: str = filePath[11::]
        pickleDump: str = f'./cache/{filePathClean}.pickle'
    else:
        pickleDump: str = f'./cache/{filePath}.pickle'
    
    print(f'Loading Dataset: {filePath}')
    print(f'\tTo Dataset Cache: {pickleDump}\n')
    
    # check if data already exists within cache
    if os.path.exists(pickleDump):
        df = pd.read_pickle(pickleDump)
        
    # if not, load data and clean it before caching it
    else:
        df = pd.read_csv(filePath, low_memory=True)
        df.to_pickle(pickleDump)

    df = df.rename(columns=new_column_names)
    
    # split the data into benign and malicious samples, keeping only the top 1 milliion
    # (+ 200 thousand to replace samples removed by cleaning) malicious samples
    benignSamples = df[df['Label'] == 'BENIGN']
    maliciousSamples = df[df['Label'] != 'BENIGN']
    if maliciousSamples.shape[0] > 1200000:
        maliciousSamples = maliciousSamples.sample(n=1200000, random_state=seed)

    print(f'\tLoaded {df.shape[0]} Samples as {benignSamples.shape[0]} Benign samples and {maliciousSamples.shape[0]} Malicious samples\n')

    return (benignSamples, maliciousSamples)

In [8]:
# set up a dictionary to hold all the malicious samples
malicious_dict: dict = {}
for i in range(len(data_location)):
    malicious_dict[data_location[i]] = []


# load the data and save the samples in a dictionary or list for further processing
for i in range(len(data_set)):
    benignSamples, maliciousSamples = load_data(file_set[i])
    benignSamples = clean_data(benignSamples)
    maliciousSamples = clean_data(maliciousSamples)
    print()

    if i == 0:
        benign_list: list = [benignSamples]
    else:
        benign_list.append(benignSamples)

    malicious_dict[data_location[i]].append(maliciousSamples)


# save the benign samples as a single dataframe
benign_df: pd.DataFrame = pd.concat(benign_list, ignore_index=True)
print(f'Benign Samples: {benign_df.shape[0]}')

Loading Dataset: ./original/01-12/DrDoS_DNS.csv
	To Dataset Cache: ./cache/01-12/DrDoS_DNS.csv.pickle

	Loaded 5074413 Samples as 3402 Benign samples and 1200000 Malicious samples

deleting 26 rows with Infinity in column Flow Bytes/s
deleting 38356 rows with Infinity in column Flow Bytes/s

Loading Dataset: ./original/01-12/DrDoS_LDAP.csv
	To Dataset Cache: ./cache/01-12/DrDoS_LDAP.csv.pickle

	Loaded 2181542 Samples as 1612 Benign samples and 1200000 Malicious samples

deleting 10 rows with Infinity in column Flow Bytes/s
deleting 21382 rows with Infinity in column Flow Bytes/s

Loading Dataset: ./original/01-12/DrDoS_MSSQL.csv
	To Dataset Cache: ./cache/01-12/DrDoS_MSSQL.csv.pickle

	Loaded 4524498 Samples as 2006 Benign samples and 1200000 Malicious samples

deleting 9 rows with Infinity in column Flow Bytes/s
deleting 33524 rows with Infinity in column Flow Bytes/s

Loading Dataset: ./original/01-12/DrDoS_NetBIOS.csv
	To Dataset Cache: ./cache/01-12/DrDoS_NetBIOS.csv.pickle

	Load

In [9]:
for key in malicious_dict.keys():
    for entry in malicious_dict[key]:
        print(key, ':', entry.shape)

DNS : (1161642, 70)
LDAP : (1178617, 70)
LDAP : (1169393, 70)
MSSQL : (1166476, 70)
MSSQL : (1158109, 70)
NetBIOS : (1161918, 70)
NetBIOS : (1154624, 70)
NTP : (1193062, 70)
SNMP : (1197484, 70)
SSDP : (1180613, 70)
UDP : (1184567, 70)
UDP : (1175358, 70)
Syn : (1046564, 70)
Syn : (1120924, 70)
TFTP : (1166188, 70)
UDPLag : (330518, 70)
UDPLag : (670447, 70)
Portmap : (177197, 70)


In [15]:
attack_samples: dict = {}
for key in malicious_dict.keys():
    new_df = pd.concat(malicious_dict[key], ignore_index=True)
    if new_df.shape[0] > 1000000: 
        attack_samples[key] = new_df.sample(n=1000000, random_state=seed)
    else:
        attack_samples[key] = new_df
    del new_df

print('Benign', ':', benign_df.shape)
for key in attack_samples.keys():
    print(key, ':', attack_samples[key].shape)

Benign : (112731, 70)
DNS : (1000000, 70)
LDAP : (1000000, 70)
MSSQL : (1000000, 70)
NetBIOS : (1000000, 70)
NTP : (1000000, 70)
SNMP : (1000000, 70)
SSDP : (1000000, 70)
UDP : (1000000, 70)
Syn : (1000000, 70)
TFTP : (1000000, 70)
UDPLag : (1000000, 70)
Portmap : (177197, 70)


## Baseline dataset generation

Now that we have all of our data in 13 dataframes, we can begin to create our datasets. We will form 13 datasets for one-vs-all multi-class classification.


The first dataset will be the Benign vs DDoS dataset. It will be a 50/50 split of the Benign vs DDoS samples. The DDoS samples will be equal parts of each DDoS attack type. Since we have around 112 thousand benign samples, we will use 112,000 benign samples and 112,000 samples of each DDoS attack type. 


Datasets 2-12 will each be one of the DDoS attack types (except Portmap since it has less samples) vs a basket of all the other DDoS attack types and the benign samples. Since each of the DDoS attack types has a million samples, each of the datasets will have a 2 million samples.


Dataset 13 will be the Portmap vs. all dataset. Since we have around 177 thousand Portmap samples, we will use 177,000 Portmap samples and 177,000 samples of each of the other DDoS attack types and the benign samples.

In [24]:
# First we make the first dataset, benign vs DDoS. It will be a 50/50 split between 
# benign and DDoS samples where the DDoS samples are chosen equally from a pool of
# all the DDoS attack types.

total_benign = benign_df.shape[0]
total_each_attack_type = int(total_benign/12)

DDoS_list = []
for key in attack_samples.keys():
    DDoS_list.append(attack_samples[key].sample(n=total_each_attack_type, random_state=seed))

ddos_df = pd.concat(DDoS_list, ignore_index=True)

to_replace = attack_samples.keys()
ddos_df.replace(to_replace=to_replace, value="DDOS", inplace=True)


Benign_vs_DDoS = pd.concat([benign_df, ddos_df], ignore_index=True)

Benign_vs_DDoS.to_csv("./prepared/baseline/Benign_vs_DDoS.csv", index=False)

In [26]:
for key in attack_samples.keys():
    
    total_attacks = attack_samples[key].shape[0]
    total_each_other_type = int(total_attacks / 12)
    
    other_type_list = []
    for attack in attack_samples.keys():
        if attack != key:
            other_type_list.append(attack_samples[attack].sample(n=total_each_other_type, random_state=seed))
    
    other_type_list.append(benign_df.sample(n=total_each_other_type, random_state=seed))

    other_df = pd.concat(other_type_list, ignore_index=True)

    to_replace = list(attack_samples.keys())
    to_replace.append('BENIGN')

    other_df.replace(to_replace, value=f'NOT{key}', inplace=True)

    attack_df = attack_samples[key]

    Attack_vs_all = pd.concat([attack_df, other_df], ignore_index=True)

    Attack_vs_all.to_csv(f'./prepared/baseline/{key}_vs_all.csv', index=False)

In [27]:
Benign_vs_DDoS.shape

(225459, 70)

## Time-Based Dataset Generation

Since one of our research directions is investigating the use of time-based features as a methodology to detect and classify DDoS traffic like they have been used to detect and classify Tor traffic, we will now create datasets containing only the time-based features. Lashkari et al. used a set of 23 time based features given by the pic below, but in addition to those 23, there are 2 more:
 * Forward Inter Arival Time Total (Fwd IAT Total)
 * Backward Inter Arrival Time Total (Bwd IAT Total)

![Feature descriptions used by Lashkari et al, 2017 in their conference paper -- Characterization of Tor Traffic using Time based Features](./assets/CIC_feature_descriptions.png "Feature descriptions used by Lashkari et al, 2017 in their conference paper -- Characterization of Tor Traffic using Time based Features")

In [28]:
# a list of all the time based features, as they are given in the dataframes we are dealing with. 
# We also add Label to make a total of 26 features
time_based_features: list = [
    'Fwd IAT Mean'  , 'Fwd IAT Std'    , 'Fwd IAT Max'   , 'Fwd IAT Min'  , 
    'Bwd IAT Mean'  , 'Bwd IAT Std'    , 'Bwd IAT Max'   , 'Bwd IAT Min'  , 
    'Flow IAT Mean' , 'Flow IAT Std'   , 'Flow IAT Max'  , 'Flow IAT Min' , 
    'Active Mean'   , 'Active Std'     , 'Active Max'    , 'Active Min'   , 
    'Idle Mean'     , 'Idle Std'       , 'Idle Max'      , 'Idle Min'     ,  
    'Flow Bytes/s'  , 'Flow Packets/s' , 'Flow Duration' ,
    'Fwd IAT Total' , 'Bwd IAT Total'  , 'Label'
]

In [30]:

Time_Based_Benign_vs_DDoS = Benign_vs_DDoS[time_based_features]

Time_Based_Benign_vs_DDoS.to_csv("./prepared/timebased/Benign_vs_DDoS.csv", index=False)

In [31]:
Time_Based_Benign_vs_DDoS.shape

(225459, 26)

In [32]:
for key in attack_samples.keys():
    
    total_attacks = attack_samples[key].shape[0]
    total_each_other_type = int(total_attacks / 12)
    
    other_type_list = []
    for attack in attack_samples.keys():
        if attack != key:
            other_type_list.append(attack_samples[attack].sample(n=total_each_other_type, random_state=seed))
    
    other_type_list.append(benign_df.sample(n=total_each_other_type, random_state=seed))

    other_df = pd.concat(other_type_list, ignore_index=True)

    to_replace = list(attack_samples.keys())
    to_replace.append('BENIGN')

    other_df.replace(to_replace, value=f'NOT{key}', inplace=True)

    attack_df = attack_samples[key]

    Attack_vs_all = pd.concat([attack_df, other_df], ignore_index=True)

    Time_Based_Attack_vs_all = Attack_vs_all[time_based_features]

    Time_Based_Attack_vs_all.to_csv(f'./prepared/timebased/{key}_vs_all.csv', index=False)