# This file extracts feature sets from pcap files.

### Input & Output

`Input Files`: All files with the pcap extension in the “./pcaps/” folder is read.

`Output Files`: Fingerprint result file named *FP_MAIN.csv*.

###  importing relevant libraries

In [5]:
from scapy.all import*
import math
import pandas as pd
import os
import numpy as np


### Discovering pcap extension files under "pcaps" folder.

In [3]:
def find_the_way(path,file_format):
    count=0
    files_add = []
    for r, d, f in os.walk(path):
        for file in f:
            if file_format in file:
                files_add.append(os.path.join(r,file))  
    return files_add
files_add=find_the_way('./pcaps/','.pcap')

### List of pcap files to be processed

In [4]:
files_add

['./pcaps/august-hub-01.pcap',
 './pcaps/wyze-cam-01.pcap',
 './pcaps/nest-doorbell-01.pcap',
 './pcaps/blink-cam-01.pcap',
 './pcaps/smartthings-hub-01.pcap',
 './pcaps/geeni-doorbell-01.pcap',
 './pcaps/simplisafe-d1.pcap',
 './pcaps/merkury-cam-01.pcap',
 './pcaps/geeni-cam-03.pcap',
 './pcaps/arlo-hub-01.pcap',
 './pcaps/nightowl-doorbell-02.pcap',
 './pcaps/geeni-awarecam-2.pcap',
 './pcaps/ultraloq-hub-01.pcap',
 './pcaps/hub-02.pcap',
 './pcaps/nightowl-doorbell-01.pcap',
 './pcaps/geeni-awarecam-1.pcap',
 './pcaps/geeni-cam-01.pcap',
 './pcaps/blink-hub-01.pcap',
 './pcaps/ring-hub-01.pcap',
 './pcaps/simplisafe-d2.pcap',
 './pcaps/merkury-doorbell-01.pcap',
 './pcaps/sifely-hub-01.pcap',
 './pcaps/geeni-doorbell-02.pcap',
 './pcaps/blink-cam-02.pcap',
 './pcaps/ring-doorbell-02.pcap',
 './pcaps/simplisafe-hub-01.pcap',
 './pcaps/smartthings-cam-01.pcap',
 './pcaps/lockly-hub-01.pcap',
 './pcaps/hue-hub-01.pcap',
 './pcaps/schlage-lock-01.pcap',
 './pcaps/ring-doorbell-03.pcap'

### Port numbers are classified in this part as:

| Port Numbers | Equivalents |
| :------ | :------ |
|No port| 0|
|Well known ports (between 0 and 1023) |1|
|Rregistered ports (between 1024 and 49151)  |2|
|Dynamic ports (between  49152 and 65535) |3|
# ↓ 

In [4]:
def port_class(port):
    if 0 <= port <= 1023:
        return 1
    elif  1024 <= port <= 49151 :
        return 2
    elif 49152 <=port <= 65535 :
        return 3
    else:
        return 0

### The dictionary to be used for MAC address and device matching.
#### Datasets, their MAC addresses and Devices are given separately.

In [9]:
MAC_list={
 '9c:3d:cf:94:aa:c3': 'arlo-hub-01', 
 '18:b4:30:8f:ec:23': 'nest-cam-01', 
 '64:16:66:73:e6:e0': 'nest-doorbell-01', 
 '4c:17:44:a4:0b:30': 'amazon-show-01', 
 '08:84:9d:09:4a:69': 'amazon-dot-01', 
 'fc:49:2d:76:33:a5': 'amazon-dot-02', 
 '58:b3:fc:68:a6:e2': 'geeni-cam-01', 
 '0c:8c:24:72:71:9a': 'geeni-cam-02', 
 '0c:8c:24:7f:34:84': 'geeni-cam-03', 
 'B0:09:DA:12:90:23': 'ring-hub-01', 
 '28:6d:97:7e:a4:25': 'smartthings-hub-01', 
 'ec:b5:fa:0f:15:92': 'hue-hub-01', 
 '98:da:c4:71:ec:9d': 'kasa-cam-01', 
 '54:2b:57:29:92:a9': 'nightowl-doorbell-01', 
 '00:f6:20:7a:fc:b1': 'google-home-01', 
 '3c:e1:a1:62:d8:60': 'ring-doorbell-01', 
 '24:7d:4d:9c:f2:81': 'ring-doorbell-02', 
 '90:e2:02:30:80:a8': 'ring-doorbell-03', 
 '78:db:2f:db:43:48': 'schlage-lock-01', 
 'b8:b7:f1:2a:10:fd': 'august-hub-01', 
 'a4:cf:12:32:5b:88': 'ultraloq-hub-01', 
 'f4:b8:5e:cd:fe:2f': 'blink-cam-03', 
 'f4:cf:a2:eb:59:c4': 'sifely-hub-01', 
 '24:6f:28:d9:a7:64': 'lockly-hub-01', 
 'f4:b8:5e:ff:2b:1b': 'blink-cam-01', 
 'f4:b8:5e:35:67:b0': 'blink-cam-02', 
 '30:45:11:3a:17:ed': 'ring-chime-01', 
 '8c:f7:10:a1:a5:9f': 'simplisafe-d1', 
 '6c:21:a2:90:19:b0': 'simplisafe-d2', 
 '78:b2:13:e4:a6:ec': 'smartthings-cam-01', 
 '7c:a7:b0:dc:a0:1a': 'merkury-cam-01', 
 '2c:aa:8e:a1:27:65': 'wyze-cam-01', 
 'd4:d2:d6:3b:27:51': 'geeni-doorbell-02', 
 '54:2b:57:29:b4:6c': 'nightowl-doorbell-02', 
 'a8:10:87:2a:4e:17': 'ring-light-01', 
 '7c:25:da:2d:a4:70': 'merkury-doorbell-01', 
 '30:4a:26:12:14:f1': 'geeni-doorbell-01', 
 'b8:b7:f1:05:15:39': 'august-hub-02', 
 '00:03:7f:ee:4f:e5': 'blink-hub-01', 
 'c4:6e:7b:41:5f:28': 'geeni-awarecam-1', 
 'c4:6e:7b:0e:62:5c': 'geeni-awarecam-2'
}

In [10]:
len(MAC_list)

41

### Calculating the payload entropy value.


# ↓ 

In [13]:
def pre_entropy(payload):
    characters=[]
    for i in payload:
            characters.append(i)
    return shannon(characters)


def shannon(data):
    freq_dict={} 
    for i in data:
        if i in freq_dict:
            freq_dict[i] += 1
        else:
            freq_dict[i] = 1    
    entropy = 0.0
    logarithm_base = 2
    payload_size = len(data) #
    for key in freq_dict.keys():
        frequency = float(freq_dict[key])/payload_size
        if frequency > 0: 
            entropy = entropy + frequency * math.log(frequency, logarithm_base)
    return -entropy

### This section is the main backbone of our program. In this section, the following operations are performed briefly.


#### - The pcap_files variable contains the addresses of the pcap files. The `for` loop moves through the values of this variable, allowing all files to be processed.


#### - The second for loop examines individual packets in the processed pcap file. All features belonging to a packet are extracted and processed as a new line in the fingerprint file at the end of the second for loop.


#### - All properties are initially assigned a value of 0. These properties are then queried in the packet. If the properties have corresponding data, the data is processed in the variable, otherwise, the value of the variable remains as 0.

In [14]:
count=0
ths = open("./dataset/Main_hub.csv", "w")
header="ARP,LLC,EAPOL,IP,ICMP,ICMP6,TCP,UDP,TCP_w_size,HTTP,HTTPS,DHCP,BOOTP,SSDP,DNS,MDNS,NTP,IP_padding,IP_add_count,IP_ralert,Portcl_src,Portcl_dst,Pck_size,Pck_rawdata,payload_l,Entropy,Label,MAC\n"
ths.write(header)
dst_ip_list={}

for i in MAC_list:
    dst_ip_list[i]=[]
import time

for i in files_add:
    print(i)
    pkt = PcapReader(i)
    print("\n\n"+"========"+ i[8:]+"========"+"\n" )
    print(pkt)

    for jj,j in enumerate(pkt):
   
        ip_add_count=0
        layer_2_arp = 0
        layer_2_llc = 0

        layer_3_eapol = 0
        layer_3_ip = 0
        layer_3_icmp = 0
        layer_3_icmp6 = 0



        layer_4_tcp = 0
        layer_4_udp = 0
        layer_4_tcp_ws=0


        layer_7_http = 0
        layer_7_https = 0
        layer_7_dhcp = 0
        layer_7_bootp = 0
        layer_7_ssdp = 0
        layer_7_dns = 0
        layer_7_mdns = 0
        layer_7_ntp = 0

        ip_padding = 0
        ip_ralert = 0


        port_class_src = 0
        port_class_dst = 0

        pck_size = 0
        pck_rawdata = 0
        entropy=0

        layer_4_payload_l=0

        try:

            pck_size=j.len

        except:pass

        try:

            if j[IP]:

                layer_3_ip = 1
            temp=str(j[IP].dst)
            if temp not in dst_ip_list[j.src]:
                dst_ip_list[j.src].append(temp)
            ip_add_count=len(dst_ip_list[j.src])

            port_class_src = port_class(j[IP].sport)
            port_class_dst = port_class(j[IP].dport)

        except:pass

        temp=str(j.show)

        if "ICMPv6" in temp:

            layer_3_icmp6 = 1

        try:
            if j[IP].ihl >5:
                if IPOption_Router_Alert(j):
                    pad=str(IPOption_Router_Alert(j).show)
                    if "Padding" in pad:
                        ip_padding=1
                    ip_ralert = 1
        except:pass

        if j.haslayer(ICMP):
            layer_3_icmp = 1


        if j.haslayer(Raw):
            pck_rawdata = 1

        if j.haslayer(UDP):

            layer_4_udp = 1
            if j[UDP].sport==68 or j[UDP].sport==67:
                layer_7_dhcp = 1
                layer_7_bootp = 1
            if j[UDP].sport==53 or j[UDP].dport==53:
                layer_7_dns = 1
            if j[UDP].sport==5353 or j[UDP].dport==5353:
                layer_7_mdns = 1
            if j[UDP].sport==1900 or j[UDP].dport==1900:
                layer_7_ssdp = 1
            if j[UDP].sport==123 or j[UDP].dport==123:
                layer_7_ntp = 1

        try:
            if j[UDP].payload:
                layer_4_payload_l=len(j[UDP].payload)
        except:pass
        if j.haslayer(TCP):
            layer_4_tcp = 1
            layer_4_tcp_ws=j[TCP].window
            if j[TCP].sport==80 or j[TCP].dport==80:
                layer_7_http = 1
            if j[TCP].sport==443 or j[TCP].dport==443:
                layer_7_https = 1
            try:
                if j[TCP].payload:
                    layer_4_payload_l=len(j[TCP].payload)
            except:pass

        if j.haslayer(ARP):
            layer_2_arp = 1

        if j.haslayer(LLC):
            layer_2_llc = 1

        if j.haslayer(EAPOL):
            layer_3_eapol = 1
        try: 
            entropy=pre_entropy(j[Raw].original)
        except:pass
        if j.src in MAC_list:
            label=MAC_list[j.src]
        line=[layer_2_arp, layer_2_llc, layer_3_eapol, layer_3_ip, layer_3_icmp, layer_3_icmp6, layer_4_tcp, layer_4_udp, layer_4_tcp_ws, layer_7_http, layer_7_https, layer_7_dhcp, layer_7_bootp, layer_7_ssdp, layer_7_dns, layer_7_mdns, layer_7_ntp, ip_padding, ip_add_count, ip_ralert, port_class_src, port_class_dst, pck_size, pck_rawdata,layer_4_payload_l,entropy, label,j.src]  
        line=str(line).replace("[","")
        line=str(line).replace("]","")
        line=str(line).replace(", ",",")
        line=str(line).replace("\'","")
        if label!="unknown":
            ths.write(str(line)+"\n")  
    
ths.close()


./pcaps/august-hub-01.pcap



<scapy.utils.PcapReader object at 0x7fc922a31d00>
./pcaps/wyze-cam-01.pcap



<scapy.utils.PcapReader object at 0x7fc9500c0dc0>
./pcaps/nest-doorbell-01.pcap



<scapy.utils.PcapReader object at 0x7fc9118ab0d0>
./pcaps/blink-cam-01.pcap



<scapy.utils.PcapReader object at 0x7fc922a17fd0>
./pcaps/smartthings-hub-01.pcap



<scapy.utils.PcapReader object at 0x7fc922a39f70>
./pcaps/geeni-doorbell-01.pcap



<scapy.utils.PcapReader object at 0x7fc95015f130>
./pcaps/simplisafe-d1.pcap



<scapy.utils.PcapReader object at 0x7fc960f36d90>
./pcaps/merkury-cam-01.pcap



<scapy.utils.PcapReader object at 0x7fc922a39dc0>
./pcaps/geeni-cam-03.pcap



<scapy.utils.PcapReader object at 0x7fc95010ddf0>
./pcaps/arlo-hub-01.pcap



<scapy.utils.PcapReader object at 0x7fc922ae6760>
./pcaps/nightowl-doorbell-02.pcap



<scapy.utils.PcapReader object at 0x7fc94044af40>
./pcaps/geeni-awarecam-2.pcap



<scapy.utils.PcapReader object at 0x7fc960f3b8e0>
./pcaps/ultraloq-hub-01

### Input & Output

`Input Files`: MAIN_Sentinel.csv

`Output Files`: IPAssess.csv

In [1]:
dataset="dataset/Main_hub.csv"
df=pd.read_csv(dataset)

NameError: name 'pd' is not defined

In [7]:
df

Unnamed: 0,ARP,LLC,EAPOL,IP,ICMP,ICMP6,TCP,UDP,TCP_w_size,HTTP,...,IP_add_count,IP_ralert,Portcl_src,Portcl_dst,Pck_size,Pck_rawdata,payload_l,Entropy,Label,MAC
0,0,0,0,1,0,0,1,0,13638,0,...,1,0,3,1,40,0,6,0.0,august-hub-01,b8:b7:f1:2a:10:fd
1,0,0,0,1,0,0,1,0,65535,0,...,0,0,0,0,40,0,2,0.0,august-hub-01,00:1c:7f:53:d0:28
2,0,0,0,1,0,0,1,0,14000,0,...,1,0,3,1,40,0,6,0.0,august-hub-01,b8:b7:f1:2a:10:fd
3,0,0,0,1,0,0,1,0,65535,0,...,0,0,0,0,40,0,2,0.0,august-hub-01,00:1c:7f:53:d0:28
4,0,0,0,1,0,0,1,0,13638,0,...,1,0,3,1,40,0,6,0.0,august-hub-01,b8:b7:f1:2a:10:fd
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
21997064,0,0,0,1,0,0,1,0,33580,0,...,6,0,3,1,40,0,6,0.0,blink-cam-03,f4:b8:5e:cd:fe:2f
21997065,0,0,0,1,0,0,1,0,65535,0,...,0,0,0,0,40,0,2,0.0,blink-cam-03,00:1c:7f:53:d0:28
21997066,0,0,0,1,0,0,1,0,65535,0,...,0,0,0,0,40,0,2,0.0,blink-cam-03,00:1c:7f:53:d0:28
21997067,0,0,0,1,0,0,1,0,65535,0,...,0,0,0,0,109,0,0,0.0,blink-cam-03,00:1c:7f:53:d0:28


In [9]:
df.MAC.value_counts()

MAC
00:1c:7f:53:d0:28    8345940
54:2b:57:29:b4:6c    2448284
54:2b:57:29:92:a9    2433098
30:4a:26:12:14:f1    1392734
58:b3:fc:68:a6:e2    1109171
2c:aa:8e:a1:27:65    1079925
24:7d:4d:9c:f2:81     489332
90:e2:02:30:80:a8     462201
9c:3d:cf:94:aa:c3     449330
64:16:66:73:e6:e0     395944
8c:f7:10:a1:a5:9f     390982
c4:6e:7b:0e:62:5c     317491
c4:6e:7b:41:5f:28     312744
28:6d:97:7e:a4:25     307966
78:b2:13:e4:a6:ec     278451
6c:21:a2:90:19:b0     214809
7c:25:da:2d:a4:70     202245
0c:8c:24:7f:34:84     201002
d4:d2:d6:3b:27:51     194967
7c:a7:b0:dc:a0:1a     171826
ec:b5:fa:0f:15:92     158217
b8:b7:f1:2a:10:fd     145031
b8:b7:f1:05:15:39     144921
00:03:7f:ee:4f:e5     103725
b0:09:da:12:90:23      87446
a4:cf:12:32:5b:88      34734
24:6f:28:d9:a7:64      34405
cc:50:e3:a2:99:60      26998
f4:b8:5e:cd:fe:2f      15535
f4:b8:5e:ff:2b:1b      14889
f4:b8:5e:35:67:b0      13640
f4:cf:a2:eb:59:c4      12089
78:db:2f:db:43:48       6989
78:e3:b5:14:51:fc          8
Name: coun

## Correlation matrix on Main Feature Set i.e FP_Main

In [8]:
dataframe1 = df
del dataframe1["MAC"]
del dataframe1["Label"]
matrix = dataframe1.corr()
matrix = matrix['IP']
matrix

ARP            -0.960623
LLC            -0.057294
EAPOL          -0.266114
IP              1.000000
ICMP            0.033161
ICMP6          -0.022623
TCP             0.147141
UDP             0.080937
TCP_w_size      0.043570
HTTP            0.006433
HTTPS           0.122633
DHCP            0.002658
BOOTP           0.002658
SSDP            0.006965
DNS             0.020500
MDNS           -0.017933
NTP             0.005026
IP_padding           NaN
IP_add_count    0.054589
IP_ralert            NaN
Portcl_src      0.132233
Portcl_dst      0.120746
Pck_size        0.072588
Pck_rawdata     0.032776
payload_l       0.027232
Entropy              NaN
Name: IP, dtype: float64

In [53]:
df_IOT_Privacy_Hub=matrix[(matrix['IP']<=(-0.05)) | (matrix['IP']>=0.05)]
df_IOT_Privacy_Hub

Unnamed: 0,ARP,LLC,EAPOL,IP,ICMP,ICMP6,TCP,UDP,TCP_w_size,HTTP,...,NTP,IP_padding,IP_add_count,IP_ralert,Portcl_src,Portcl_dst,Pck_size,Pck_rawdata,payload_l,Entropy
ARP,1.0,-0.000792,-0.003679,-0.960623,-0.031855,-0.000313,-0.141347,-0.077894,-0.041855,-0.00618,...,-0.004828,,-0.05244,,-0.127027,-0.115991,-0.071332,-0.032937,-0.027074,
LLC,-0.000792,1.0,-0.000219,-0.057294,-0.0019,-1.9e-05,-0.00843,-0.004646,-0.002496,-0.000369,...,-0.000288,,-0.003128,,-0.007576,-0.006918,-0.004173,0.024045,-0.001615,
EAPOL,-0.003679,-0.000219,1.0,-0.266114,-0.008825,-8.7e-05,-0.039156,-0.021578,-0.011595,-0.001712,...,-0.001337,,-0.014527,,-0.035189,-0.032132,-0.013707,-0.009124,-0.0075,
IP,-0.960623,-0.057294,-0.266114,1.0,0.033161,-0.022623,0.147141,0.080937,0.04357,0.006433,...,0.005026,,0.054589,,0.132233,0.120746,0.072588,0.032776,0.027232,
TCP,-0.141347,-0.00843,-0.039156,0.147141,-0.339079,-0.003329,1.0,-0.829145,0.296112,0.043723,...,-0.05139,,-0.187628,,0.011595,-0.271069,0.124525,-0.350554,-0.241796,
UDP,-0.077894,-0.004646,-0.021578,0.080937,-0.186862,-0.001834,-0.829145,1.0,-0.24552,-0.036253,...,0.061979,,0.204889,,0.191183,0.471757,-0.043273,0.418532,0.298603,
HTTPS,-0.117805,-0.007026,-0.032634,0.122633,-0.282603,-0.002774,0.833442,-0.691044,0.23103,-0.054828,...,-0.04283,,-0.155422,,-0.007412,-0.344911,0.075077,-0.292199,-0.200517,
IP_add_count,-0.05244,-0.003128,-0.014527,0.054589,0.013378,-0.001235,-0.187628,0.204889,-0.119217,0.041383,...,-0.005377,,1.0,,0.275221,0.350081,0.161659,0.310324,0.020094,
Portcl_src,-0.127027,-0.007576,-0.035189,0.132233,-0.304614,-0.002992,0.011595,0.191183,-0.049077,0.002532,...,-0.005366,,0.275221,,1.0,0.83829,0.112462,0.203562,-0.007026,
Portcl_dst,-0.115991,-0.006918,-0.032132,0.120746,-0.277925,-0.002732,-0.271069,0.471757,-0.129037,-0.016274,...,-0.017024,,0.350081,,0.83829,1.0,0.116709,0.383967,0.005669,


In [55]:
df_IOT_Privacy_Hub=matrix[(matrix['IP']<=(-0.04)) | (matrix['IP']>=0.04)]
df_IOT_Privacy_Hub

Unnamed: 0,ARP,LLC,EAPOL,IP,ICMP,ICMP6,TCP,UDP,TCP_w_size,HTTP,...,NTP,IP_padding,IP_add_count,IP_ralert,Portcl_src,Portcl_dst,Pck_size,Pck_rawdata,payload_l,Entropy
ARP,1.0,-0.000792,-0.003679,-0.960623,-0.031855,-0.000313,-0.141347,-0.077894,-0.041855,-0.00618,...,-0.004828,,-0.05244,,-0.127027,-0.115991,-0.071332,-0.032937,-0.027074,
LLC,-0.000792,1.0,-0.000219,-0.057294,-0.0019,-1.9e-05,-0.00843,-0.004646,-0.002496,-0.000369,...,-0.000288,,-0.003128,,-0.007576,-0.006918,-0.004173,0.024045,-0.001615,
EAPOL,-0.003679,-0.000219,1.0,-0.266114,-0.008825,-8.7e-05,-0.039156,-0.021578,-0.011595,-0.001712,...,-0.001337,,-0.014527,,-0.035189,-0.032132,-0.013707,-0.009124,-0.0075,
IP,-0.960623,-0.057294,-0.266114,1.0,0.033161,-0.022623,0.147141,0.080937,0.04357,0.006433,...,0.005026,,0.054589,,0.132233,0.120746,0.072588,0.032776,0.027232,
TCP,-0.141347,-0.00843,-0.039156,0.147141,-0.339079,-0.003329,1.0,-0.829145,0.296112,0.043723,...,-0.05139,,-0.187628,,0.011595,-0.271069,0.124525,-0.350554,-0.241796,
UDP,-0.077894,-0.004646,-0.021578,0.080937,-0.186862,-0.001834,-0.829145,1.0,-0.24552,-0.036253,...,0.061979,,0.204889,,0.191183,0.471757,-0.043273,0.418532,0.298603,
TCP_w_size,-0.041855,-0.002496,-0.011595,0.04357,-0.100406,-0.000986,0.296112,-0.24552,1.0,0.048213,...,-0.015217,,-0.119217,,-0.049077,-0.129037,-0.115573,-0.103811,-0.062939,
HTTPS,-0.117805,-0.007026,-0.032634,0.122633,-0.282603,-0.002774,0.833442,-0.691044,0.23103,-0.054828,...,-0.04283,,-0.155422,,-0.007412,-0.344911,0.075077,-0.292199,-0.200517,
IP_add_count,-0.05244,-0.003128,-0.014527,0.054589,0.013378,-0.001235,-0.187628,0.204889,-0.119217,0.041383,...,-0.005377,,1.0,,0.275221,0.350081,0.161659,0.310324,0.020094,
Portcl_src,-0.127027,-0.007576,-0.035189,0.132233,-0.304614,-0.002992,0.011595,0.191183,-0.049077,0.002532,...,-0.005366,,0.275221,,1.0,0.83829,0.112462,0.203562,-0.007026,


## Create IPAssess


In [9]:
df=pd.read_csv("dataset/Main_hub.csv")
deleted=["SSDP","DNS","MDNS","Pck_rawdata","NTP","DHCP","BOOTP","ICMP","IP_padding","IP_ralert","Entropy"]
name="IPAssess_hub.csv"
df=df.drop(columns=deleted)
df.to_csv('dataset/'+name, index=False)
df.columns

Index(['ARP', 'LLC', 'EAPOL', 'IP', 'ICMP6', 'TCP', 'UDP', 'TCP_w_size',
       'HTTP', 'HTTPS', 'IP_add_count', 'Portcl_src', 'Portcl_dst', 'Pck_size',
       'payload_l', 'Label', 'MAC'],
      dtype='object')

In [10]:
len(df.columns)  # includes label, Mac

17

### Create Main Feature set without IP_padding,IP_alert, Entropy as there values are 0

In [22]:
df=pd.read_csv("dataset/Main_hub.csv")
deleted=["IP_padding","IP_ralert","Entropy"]
name="Main_hub_reduced.csv"
df=df.drop(columns=deleted)
df.to_csv('dataset/'+name, index=False)
df.columns

Index(['ARP', 'LLC', 'EAPOL', 'IP', 'ICMP', 'ICMP6', 'TCP', 'UDP',
       'TCP_w_size', 'HTTP', 'HTTPS', 'DHCP', 'BOOTP', 'SSDP', 'DNS', 'MDNS',
       'NTP', 'IP_add_count', 'Portcl_src', 'Portcl_dst', 'Pck_size',
       'Pck_rawdata', 'payload_l', 'Label', 'MAC'],
      dtype='object')

In [23]:
len(df.columns) # includes label, Mac

25

### Consider features by setting threshold at 0.03 for correlation

In [30]:
df=pd.read_csv("dataset/Main_hub.csv")
deleted=["IP_padding","IP_ralert","Entropy","ICMP6","HTTP","DHCP","BOOTP","SSDP","DNS","NTP","MDNS","NTP","payload_l"]
name="Threshold_0.03.csv"
df=df.drop(columns=deleted)
df.to_csv('dataset/'+name, index=False)
df.columns

Index(['ARP', 'LLC', 'EAPOL', 'IP', 'ICMP', 'TCP', 'UDP', 'TCP_w_size',
       'HTTPS', 'IP_add_count', 'Portcl_src', 'Portcl_dst', 'Pck_size',
       'Pck_rawdata', 'Label', 'MAC'],
      dtype='object')

In [34]:
len(df.columns)

16

### Consider features by setting threshold at 0.04 for correlation

In [48]:
df=pd.read_csv("dataset/Main_hub.csv")
deleted=["IP_padding","IP_ralert","Entropy","ICMP","ICMP6","HTTP","DHCP","BOOTP","SSDP","DNS","NTP","MDNS","NTP","payload_l","Pck_rawdata"]
name="Threshold_0.04.csv"
df=df.drop(columns=deleted)
df.to_csv('dataset/'+name, index=False)
df.columns

Index(['ARP', 'LLC', 'EAPOL', 'IP', 'TCP', 'UDP', 'TCP_w_size', 'HTTPS',
       'IP_add_count', 'Portcl_src', 'Portcl_dst', 'Pck_size', 'Label', 'MAC'],
      dtype='object')

In [49]:
len(df.columns)

14

### Consider features by setting threshold at 0.05 for correlation

In [35]:
df=pd.read_csv("dataset/Main_hub.csv")
deleted=["IP_padding","IP_ralert","Entropy","ICMP","ICMP6","HTTP","TCP_w_size","DHCP","BOOTP","SSDP","DNS","NTP","MDNS","NTP","Pck_rawdata","payload_l"]
name="Threshold_0.05.csv"
df=df.drop(columns=deleted)
df.to_csv('dataset/'+name, index=False)
df.columns

Index(['ARP', 'LLC', 'EAPOL', 'IP', 'TCP', 'UDP', 'HTTPS', 'IP_add_count',
       'Portcl_src', 'Portcl_dst', 'Pck_size', 'Label', 'MAC'],
      dtype='object')

In [36]:
len(df.columns)

13

### Consider features by setting threshold at 0.06 for correlation

In [33]:
df=pd.read_csv("dataset/Main_hub.csv")
deleted=["IP_padding","IP_ralert","Entropy","ICMP","ICMP6","HTTP","SSDP","DNS","MDNS","Pck_rawdata","NTP","DHCP","BOOTP","payload_l"]
name="Threshold_0.06.csv"
df=df.drop(columns=deleted)
df.to_csv('dataset/'+name, index=False)
df.columns

Index(['ARP', 'EAPOL', 'IP', 'TCP', 'UDP', 'TCP_w_size', 'HTTPS',
       'IP_add_count', 'Portcl_src', 'Portcl_dst', 'Pck_size', 'Label', 'MAC'],
      dtype='object')

In [34]:
len(df.columns)

13

### Consider features by setting threshold at 0.075 for correlation

In [41]:
df=pd.read_csv("dataset/Main_hub.csv")
deleted=["IP_add_count","LLC","IP_padding","IP_ralert","Entropy","ICMP","ICMP6","HTTP","SSDP","DNS","MDNS","Pck_rawdata","NTP","DHCP","BOOTP","payload_l"]
name="Threshold_0.075.csv"
df=df.drop(columns=deleted)
df.to_csv('dataset/'+name, index=False)
df.columns

Index(['ARP', 'EAPOL', 'IP', 'TCP', 'UDP', 'TCP_w_size', 'HTTPS', 'Portcl_src',
       'Portcl_dst', 'Pck_size', 'Label', 'MAC'],
      dtype='object')

In [42]:
len(df.columns)

12