# 01: Merge UNSW-NB15 Datasets
This notebook merges raw UNSW-NB15 CSV files into a CSV file.

## 1.1: Import Library

In [1]:
import pandas as pd

## 1.2: Load Features

In [2]:
df_features = pd.read_csv('../data/raw/UNSW-NB15 - Features.csv', encoding='latin-1')

In [3]:
display(df_features)

Unnamed: 0,No.,Name,Type,Description
0,1,srcip,nominal,Source IP address
1,2,sport,integer,Source port number
2,3,dstip,nominal,Destination IP address
3,4,dsport,integer,Destination port number
4,5,proto,nominal,Transaction protocol
5,6,state,nominal,Indicates to the state and its dependent proto...
6,7,dur,Float,Record total duration
7,8,sbytes,Integer,Source to destination transaction bytes
8,9,dbytes,Integer,Destination to source transaction bytes
9,10,sttl,Integer,Source to destination time to live value


## 1.3: Load Network Data

In [4]:
df_nd_1 = pd.read_csv('../data/raw/UNSW-NB15 - 1.csv', header=None, low_memory=False)
df_nd_2 = pd.read_csv('../data/raw/UNSW-NB15 - 2.csv', header=None, low_memory=False)
df_nd_3 = pd.read_csv('../data/raw/UNSW-NB15 - 3.csv', header=None, low_memory=False)
df_nd_4 = pd.read_csv('../data/raw/UNSW-NB15 - 4.csv', header=None, low_memory=False)

## 1.4: Combine DataFrames (Network Data)

In [5]:
df = pd.concat([df_nd_1, df_nd_2, df_nd_3, df_nd_4], ignore_index=True)

In [6]:
df.head()

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,39,40,41,42,43,44,45,46,47,48
0,59.166.0.0,1390,149.171.126.6,53,udp,CON,0.001055,132,164,31,...,0,3,7,1,3,1,1,1,,0
1,59.166.0.0,33661,149.171.126.9,1024,udp,CON,0.036133,528,304,31,...,0,2,4,2,3,1,1,2,,0
2,59.166.0.6,1464,149.171.126.7,53,udp,CON,0.001119,146,178,31,...,0,12,8,1,2,2,1,1,,0
3,59.166.0.5,3593,149.171.126.5,53,udp,CON,0.001209,132,164,31,...,0,6,9,1,1,1,1,1,,0
4,59.166.0.3,49664,149.171.126.0,53,udp,CON,0.001169,146,178,31,...,0,7,9,1,1,1,1,1,,0


## 1.5: Set Column Headers

In [7]:
column_headers = df_features["Name"].tolist()
df.columns = column_headers

In [8]:
df.head()

Unnamed: 0,srcip,sport,dstip,dsport,proto,state,dur,sbytes,dbytes,sttl,...,ct_ftp_cmd,ct_srv_src,ct_srv_dst,ct_dst_ltm,ct_src_ ltm,ct_src_dport_ltm,ct_dst_sport_ltm,ct_dst_src_ltm,attack_cat,Label
0,59.166.0.0,1390,149.171.126.6,53,udp,CON,0.001055,132,164,31,...,0,3,7,1,3,1,1,1,,0
1,59.166.0.0,33661,149.171.126.9,1024,udp,CON,0.036133,528,304,31,...,0,2,4,2,3,1,1,2,,0
2,59.166.0.6,1464,149.171.126.7,53,udp,CON,0.001119,146,178,31,...,0,12,8,1,2,2,1,1,,0
3,59.166.0.5,3593,149.171.126.5,53,udp,CON,0.001209,132,164,31,...,0,6,9,1,1,1,1,1,,0
4,59.166.0.3,49664,149.171.126.0,53,udp,CON,0.001169,146,178,31,...,0,7,9,1,1,1,1,1,,0


## 1.6: Export CSV File

In [9]:
df.to_csv('../data/processed/UNSW-NB15.csv', index=False)