# Creation of DET-TEST
The test dataset used to evaluate local and global models trained with the UNSW-NB15 dataset contains: 
* Benign samples - 45% 
* Known attacks - 40% 
* Unknown attacks - 15% 


In [None]:
import numpy as np
import pandas as pd
from sklearn.preprocessing import StandardScaler

"UNSW-NB15-Test-Basic.csv" and "UNSW-NB15-Test+.csv" account for the datasets included in "Transfer-Learning-Based Intrusion Detection Framwork in IoT Networks" by Rodríguez E. et al. (GitHub: https://github.com/polvalls9/Transfer-Learning-Based-Intrusion-Detection-in-5G-and-IoT-Networks.git)

In [None]:
test_basic = pd.read_csv("../UNSW-NB15-Test-Basic.csv", low_memory = False)
test_plus = pd.read_csv('../UNSW-NB15-Test+.csv', low_memory = False)

In [None]:
def filter(dataset): 
    """
    Function used to filter out all state and proto values that are unwanted. 
    Wanted values are: 
        proto: igmp, arp, icmp, udp, tcp, ipv6-icmp, rarp
        state: PAR, ACC, ECO, CON, FIN, INT, REQ, RST
    
    """

    dataset = dataset[dataset['state'].isin(['PAR', 'ACC', 'ECO', 'CON', 'FIN', 'INT', 'REQ', 'RST'])]
    dataset = dataset[dataset['proto'].isin(['igmp', 'arp', 'icmp', 'udp', 'tcp', 'ipv6-icmp', 'rarp'])]
    return dataset

In [None]:
test_basic = filter(test_basic)
test_plus = filter(test_plus)

In [None]:
test_basic_n = test_basic[test_basic['label'] == 0] # Normal traffic from test_basic
test_plus_n = test_plus[test_plus['label'] == 0] # Normal traffic from test_plus
test_n = pd.concat([test_basic_n, test_plus_n]) # Merge all normal samples

In [None]:
test_ka = test_basic[test_basic['label'] == 1] # Known attacks extracted from test_basic

In [None]:
test_ua = test_plus[test_plus['label'] == 1] # Unknown attacks extracted from test_plus

In [None]:
test_n = test_n.sample(n = 72542) # Represents 45% of the dataset 

In [None]:
test_ka = test_ka.sample(n=64482) # Represents 40% of the dataset 

In [None]:
test_ua = test_ua.sample(n=24180) # Represents 15% of the dataset 

In [None]:
test_complete = pd.concat([test_ka, test_n, test_ua]) #  Merge all wanted samples 

In [None]:
test_complete.to_csv('../DET_TEST.csv', index=False)