# This file extracts feature sets from pcap files.

### Input & Output

`Input Files`: All files with the pcap extension in the “./pcaps/” folder is read.

`Output Files`: Fingerprint result file named *FP_MAIN.csv*.

###  importing relevant libraries

In [1]:
from scapy.all import*
import math
import pandas as pd
import os
import numpy as np




### Discovering pcap extension files under "pcaps" folder.

In [2]:
def find_the_way(path,file_format):
    count=0
    files_add = []
    for r, d, f in os.walk(path):
        for file in f:
            if file_format in file:
                files_add.append(os.path.join(r,file))  
    return files_add
files_add=find_the_way('./pcaps/','.pcap')

### List of pcap files to be processed

In [3]:
files_add

['./pcaps/merkury-doorbell-01.pcapng',
 './pcaps/blink-cam-02.pcapng',
 './pcaps/geeni-awarecam-1.pcapng',
 './pcaps/nest-doorbell-01.pcapng',
 './pcaps/merkury-cam-01.pcapng',
 './pcaps/simplisafe-d1.pcapng',
 './pcaps/geeni-doorbell-02.pcapng',
 './pcaps/schlage-lock-01.pcapng',
 './pcaps/geeni-cam-03.pcapng',
 './pcaps/lockly-hub-01.pcapng',
 './pcaps/blink-cam-03.pcapng',
 './pcaps/simplisafe-d2.pcapng',
 './pcaps/blink-hub-01.pcapng',
 './pcaps/smartthings-cam-01.pcapng',
 './pcaps/arlo-hub-01.pcapng',
 './pcaps/blink-cam-01.pcapng',
 './pcaps/sifely-hub-01.pcapng',
 './pcaps/hub-02.pcapng',
 './pcaps/geeni-awarecam-2.pcapng',
 './pcaps/smartthings-hub-01.pcapng',
 './pcaps/simplisafe-hub-01.pcapng',
 './pcaps/ring-hub-01.pcapng',
 './pcaps/ultraloq-hub-01.pcapng',
 './pcaps/august-hub-01.pcapng',
 './pcaps/ring-doorbell-03.pcapng',
 './pcaps/hue-hub-01.pcapng']

### Port numbers are classified in this part as:

| Port Numbers | Equivalents |
| :------ | :------ |
|No port| 0|
|Well known ports (between 0 and 1023) |1|
|Rregistered ports (between 1024 and 49151)  |2|
|Dynamic ports (between  49152 and 65535) |3|
# ↓ 

In [4]:
def port_class(port):
    if 0 <= port <= 1023:
        return 1
    elif  1024 <= port <= 49151 :
        return 2
    elif 49152 <=port <= 65535 :
        return 3
    else:
        return 0

### The dictionary to be used for MAC address and device matching.
#### Datasets, their MAC addresses and Devices are given separately.

In [5]:
MAC_list={
 '9c:3d:cf:94:aa:c3': 'arlo-hub-01', 
 '18:b4:30:8f:ec:23': 'nest-cam-01', 
 '64:16:66:73:e6:e0': 'nest-doorbell-01', 
 '4c:17:44:a4:0b:30': 'amazon-show-01', 
 '08:84:9d:09:4a:69': 'amazon-dot-01', 
 'fc:49:2d:76:33:a5': 'amazon-dot-02', 
 '58:b3:fc:68:a6:e2': 'geeni-cam-01', 
 '0c:8c:24:72:71:9a': 'geeni-cam-02', 
 '0c:8c:24:7f:34:84': 'geeni-cam-03', 
 'B0:09:DA:12:90:23': 'ring-hub-01', 
 '28:6d:97:7e:a4:25': 'smartthings-hub-01', 
 'ec:b5:fa:0f:15:92': 'hue-hub-01', 
 '98:da:c4:71:ec:9d': 'kasa-cam-01', 
 '54:2b:57:29:92:a9': 'nightowl-doorbell-01', 
 '00:f6:20:7a:fc:b1': 'google-home-01', 
 '3c:e1:a1:62:d8:60': 'ring-doorbell-01', 
 '24:7d:4d:9c:f2:81': 'ring-doorbell-02', 
 '90:e2:02:30:80:a8': 'ring-doorbell-03', 
 '78:db:2f:db:43:48': 'schlage-lock-01', 
 'b8:b7:f1:2a:10:fd': 'august-hub-01', 
 'a4:cf:12:32:5b:88': 'ultraloq-hub-01', 
 'f4:b8:5e:cd:fe:2f': 'blink-cam-03', 
 'f4:cf:a2:eb:59:c4': 'sifely-hub-01', 
 '24:6f:28:d9:a7:64': 'lockly-hub-01', 
 'f4:b8:5e:ff:2b:1b': 'blink-cam-01', 
 'f4:b8:5e:35:67:b0': 'blink-cam-02', 
 '30:45:11:3a:17:ed': 'ring-chime-01', 
 '8c:f7:10:a1:a5:9f': 'simplisafe-d1', 
 '6c:21:a2:90:19:b0': 'simplisafe-d2', 
 '78:b2:13:e4:a6:ec': 'smartthings-cam-01', 
 '7c:a7:b0:dc:a0:1a': 'merkury-cam-01', 
 '2c:aa:8e:a1:27:65': 'wyze-cam-01', 
 'd4:d2:d6:3b:27:51': 'geeni-doorbell-02', 
 '54:2b:57:29:b4:6c': 'nightowl-doorbell-02', 
 'a8:10:87:2a:4e:17': 'ring-light-01', 
 '7c:25:da:2d:a4:70': 'merkury-doorbell-01', 
 '30:4a:26:12:14:f1': 'geeni-doorbell-01', 
 'b8:b7:f1:05:15:39': 'august-hub-02', 
 '00:03:7f:ee:4f:e5': 'blink-hub-01', 
 'c4:6e:7b:41:5f:28': 'geeni-awarecam-1', 
 'c4:6e:7b:0e:62:5c': 'geeni-awarecam-2'
}

In [6]:
len(MAC_list)

41

### Calculating the payload entropy value.


# ↓ 

In [7]:
def pre_entropy(payload):
    characters=[]
    for i in payload:
            characters.append(i)
    return shannon(characters)


def shannon(data):
    freq_dict={} 
    for i in data:
        if i in freq_dict:
            freq_dict[i] += 1
        else:
            freq_dict[i] = 1    
    entropy = 0.0
    logarithm_base = 2
    payload_size = len(data) #
    for key in freq_dict.keys():
        frequency = float(freq_dict[key])/payload_size
        if frequency > 0: 
            entropy = entropy + frequency * math.log(frequency, logarithm_base)
    return -entropy

### This section is the main backbone of our program. In this section, the following operations are performed briefly.


#### - The pcap_files variable contains the addresses of the pcap files. The `for` loop moves through the values of this variable, allowing all files to be processed.


#### - The second for loop examines individual packets in the processed pcap file. All features belonging to a packet are extracted and processed as a new line in the fingerprint file at the end of the second for loop.


#### - All properties are initially assigned a value of 0. These properties are then queried in the packet. If the properties have corresponding data, the data is processed in the variable, otherwise, the value of the variable remains as 0.

In [12]:
count=0
ths = open("./dataset/Main_hub.csv", "w")
header="ARP,LLC,EAPOL,IP,ICMP,ICMP6,TCP,UDP,TCP_w_size,HTTP,HTTPS,DHCP,BOOTP,SSDP,DNS,MDNS,NTP,IP_padding,IP_add_count,IP_ralert,Portcl_src,Portcl_dst,Pck_size,Pck_rawdata,payload_l,Entropy,Label,MAC\n"
ths.write(header)
dst_ip_list={}

for i in MAC_list:
    dst_ip_list[i]=[]
import time

for i in files_add:
    print(i)
    pkt = PcapReader(i)
    print("\n\n"+"========"+ i[8:]+"========"+"\n" )
    print(pkt)

    for jj,j in enumerate(pkt):
   
        ip_add_count=0
        layer_2_arp = 0
        layer_2_llc = 0

        layer_3_eapol = 0
        layer_3_ip = 0
        layer_3_icmp = 0
        layer_3_icmp6 = 0



        layer_4_tcp = 0
        layer_4_udp = 0
        layer_4_tcp_ws=0


        layer_7_http = 0
        layer_7_https = 0
        layer_7_dhcp = 0
        layer_7_bootp = 0
        layer_7_ssdp = 0
        layer_7_dns = 0
        layer_7_mdns = 0
        layer_7_ntp = 0

        ip_padding = 0
        ip_ralert = 0


        port_class_src = 0
        port_class_dst = 0

        pck_size = 0
        pck_rawdata = 0
        entropy=0

        layer_4_payload_l=0

        try:

            pck_size=j.len

        except:pass

        try:

            if j[IP]:

                layer_3_ip = 1
            temp=str(j[IP].dst)
            if temp not in dst_ip_list[j.src]:
                dst_ip_list[j.src].append(temp)
            ip_add_count=len(dst_ip_list[j.src])

            port_class_src = port_class(j[IP].sport)
            port_class_dst = port_class(j[IP].dport)

        except:pass

        temp=str(j.show)

        if "ICMPv6" in temp:

            layer_3_icmp6 = 1

        try:
            if j[IP].ihl >5:
                if IPOption_Router_Alert(j):
                    pad=str(IPOption_Router_Alert(j).show)
                    if "Padding" in pad:
                        ip_padding=1
                    ip_ralert = 1
        except:pass

        if j.haslayer(ICMP):
            layer_3_icmp = 1


        if j.haslayer(Raw):
            pck_rawdata = 1

        if j.haslayer(UDP):

            layer_4_udp = 1
            if j[UDP].sport==68 or j[UDP].sport==67:
                layer_7_dhcp = 1
                layer_7_bootp = 1
            if j[UDP].sport==53 or j[UDP].dport==53:
                layer_7_dns = 1
            if j[UDP].sport==5353 or j[UDP].dport==5353:
                layer_7_mdns = 1
            if j[UDP].sport==1900 or j[UDP].dport==1900:
                layer_7_ssdp = 1
            if j[UDP].sport==123 or j[UDP].dport==123:
                layer_7_ntp = 1

        try:
            if j[UDP].payload:
                layer_4_payload_l=len(j[UDP].payload)
        except:pass
        if j.haslayer(TCP):
            layer_4_tcp = 1
            layer_4_tcp_ws=j[TCP].window
            if j[TCP].sport==80 or j[TCP].dport==80:
                layer_7_http = 1
            if j[TCP].sport==443 or j[TCP].dport==443:
                layer_7_https = 1
            try:
                if j[TCP].payload:
                    layer_4_payload_l=len(j[TCP].payload)
            except:pass

        if j.haslayer(ARP):
            layer_2_arp = 1

        if j.haslayer(LLC):
            layer_2_llc = 1

        if j.haslayer(EAPOL):
            layer_3_eapol = 1
        try: 
            entropy=pre_entropy(j[Raw].original)
        except:pass
        if j.src in MAC_list:
            label=MAC_list[j.src]
        line=[layer_2_arp, layer_2_llc, layer_3_eapol, layer_3_ip, layer_3_icmp, layer_3_icmp6, layer_4_tcp, layer_4_udp, layer_4_tcp_ws, layer_7_http, layer_7_https, layer_7_dhcp, layer_7_bootp, layer_7_ssdp, layer_7_dns, layer_7_mdns, layer_7_ntp, ip_padding, ip_add_count, ip_ralert, port_class_src, port_class_dst, pck_size, pck_rawdata,layer_4_payload_l,entropy, label,j.src]  
        line=str(line).replace("[","")
        line=str(line).replace("]","")
        line=str(line).replace(", ",",")
        line=str(line).replace("\'","")
        if label!="unknown":
            ths.write(str(line)+"\n")  
    
ths.close()


./pcaps/merkury-doorbell-01.pcapng



<scapy.utils.PcapNgReader object at 0x119ea85d0>
./pcaps/blink-cam-02.pcapng



<scapy.utils.PcapNgReader object at 0x11f6107d0>
./pcaps/geeni-awarecam-1.pcapng



<scapy.utils.PcapNgReader object at 0x10fa9ba50>
./pcaps/nest-doorbell-01.pcapng



<scapy.utils.PcapNgReader object at 0x11fc68590>
./pcaps/merkury-cam-01.pcapng



<scapy.utils.PcapNgReader object at 0x11e71af50>
./pcaps/simplisafe-d1.pcapng



<scapy.utils.PcapNgReader object at 0x118806a50>
./pcaps/geeni-doorbell-02.pcapng



<scapy.utils.PcapNgReader object at 0x11f468190>
./pcaps/schlage-lock-01.pcapng



<scapy.utils.PcapNgReader object at 0x11fb3ee50>
./pcaps/geeni-cam-03.pcapng



<scapy.utils.PcapNgReader object at 0x11fd7e3d0>
./pcaps/lockly-hub-01.pcapng



<scapy.utils.PcapNgReader object at 0x1380ba890>
./pcaps/blink-cam-03.pcapng



<scapy.utils.PcapNgReader object at 0x11f81e210>
./pcaps/simplisafe-d2.pcapng



<scapy.utils.PcapNgReader object at 0x11f889090>
./pcaps/blin

### Input & Output

`Input Files`: MAIN_Sentinel.csv

`Output Files`: IPAssess.csv

In [13]:
dataset="dataset/Main_hub.csv"
df=pd.read_csv(dataset)

In [14]:
df

Unnamed: 0,ARP,LLC,EAPOL,IP,ICMP,ICMP6,TCP,UDP,TCP_w_size,HTTP,...,IP_add_count,IP_ralert,Portcl_src,Portcl_dst,Pck_size,Pck_rawdata,payload_l,Entropy,Label,MAC
0,0,0,0,1,0,0,0,1,0,0,...,1,0,2,2,216,0,0,0.0,merkury-doorbell-01,7c:25:da:2d:a4:70
1,0,0,0,1,0,0,1,0,11,0,...,0,0,0,0,52,0,0,0.0,merkury-doorbell-01,00:1c:7f:53:d0:28
2,0,0,0,1,0,0,1,0,10196,0,...,2,0,3,2,52,0,0,0.0,merkury-doorbell-01,7c:25:da:2d:a4:70
3,0,0,0,1,0,0,0,1,0,0,...,2,0,2,2,216,0,0,0.0,merkury-doorbell-01,7c:25:da:2d:a4:70
4,1,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0.0,merkury-doorbell-01,7c:25:da:2d:a4:70
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
7530359,0,0,0,1,0,0,0,1,0,0,...,22,0,2,2,333,0,0,0.0,hue-hub-01,ec:b5:fa:0f:15:92
7530360,0,0,0,1,0,0,0,1,0,0,...,22,0,2,2,342,0,0,0.0,hue-hub-01,ec:b5:fa:0f:15:92
7530361,0,0,0,1,0,0,0,1,0,0,...,22,0,2,2,342,0,0,0.0,hue-hub-01,ec:b5:fa:0f:15:92
7530362,0,0,0,1,0,0,0,1,0,0,...,22,0,2,2,336,0,0,0.0,hue-hub-01,ec:b5:fa:0f:15:92


## Correlation matrix on Main Feature Set i.e FP_Main

In [15]:
dataframe1 = df
del dataframe1["MAC"]
del dataframe1["Label"]
matrix = dataframe1.corr()
matrix

Unnamed: 0,ARP,LLC,EAPOL,IP,ICMP,ICMP6,TCP,UDP,TCP_w_size,HTTP,...,NTP,IP_padding,IP_add_count,IP_ralert,Portcl_src,Portcl_dst,Pck_size,Pck_rawdata,payload_l,Entropy
ARP,1.0,-0.001692,-0.008069,-0.960443,-0.012529,-0.000798,-0.307725,-0.086049,-0.072974,-0.009115,...,-0.011469,,-0.068661,,-0.195467,-0.179102,-0.114558,-0.007578,-0.022337,
LLC,-0.001692,1.0,-0.000462,-0.054956,-0.000717,-4.6e-05,-0.017608,-0.004924,-0.004176,-0.000522,...,-0.000656,,-0.003929,,-0.011185,-0.010248,-0.006446,0.223278,-0.001278,
EAPOL,-0.008069,-0.000462,1.0,-0.262094,-0.003419,-0.000218,-0.083975,-0.023482,-0.019914,-0.002487,...,-0.00313,,-0.018737,,-0.053341,-0.048875,-0.023011,-0.002068,-0.006096,
IP,-0.960443,-0.054956,-0.262094,1.0,0.013045,-0.025906,0.320399,0.089247,0.075979,0.009491,...,0.011941,,0.071489,,0.203517,0.186479,0.116987,-0.004797,0.021496,
ICMP,-0.012529,-0.000717,-0.003419,0.013045,1.0,-0.000338,-0.130386,-0.03646,-0.03092,-0.003862,...,-0.004859,,-0.017141,,-0.082059,-0.073435,-0.036261,-0.003211,-0.009465,
ICMP6,-0.000798,-4.6e-05,-0.000218,-0.025906,-0.000338,1.0,-0.0083,-0.002321,-0.001968,-0.000246,...,-0.000309,,-0.001852,,-0.005272,-0.004831,-0.003089,-0.000204,-0.000603,
TCP,-0.307725,-0.017608,-0.083975,0.320399,-0.130386,-0.0083,1.0,-0.895469,0.237139,0.029621,...,-0.11935,,0.005578,,-0.173892,-0.369003,0.032366,-0.077996,-0.161417,
UDP,-0.086049,-0.004924,-0.023482,0.089247,-0.03646,-0.002321,-0.895469,1.0,-0.212351,-0.026525,...,0.133282,,0.028847,,0.291999,0.49034,0.024882,0.002716,0.184,
TCP_w_size,-0.072974,-0.004176,-0.019914,0.075979,-0.03092,-0.001968,0.237139,-0.212351,1.0,0.009557,...,-0.028303,,-0.129699,,-0.20699,-0.238582,-0.153234,-0.018637,-0.015778,
HTTP,-0.009115,-0.000522,-0.002487,0.009491,-0.003862,-0.000246,0.029621,-0.026525,0.009557,1.0,...,-0.003535,,-0.014494,,-0.012438,-0.026642,-0.008988,-0.000737,-0.000667,


In [39]:
df_IOT_Privacy_Hub=matrix[(matrix['IP']<=(-0.075)) | (matrix['IP']>=0.075)]
df_IOT_Privacy_Hub

Unnamed: 0,ARP,LLC,EAPOL,IP,ICMP,ICMP6,TCP,UDP,TCP_w_size,HTTP,...,NTP,IP_padding,IP_add_count,IP_ralert,Portcl_src,Portcl_dst,Pck_size,Pck_rawdata,payload_l,Entropy
ARP,1.0,-0.001692,-0.008069,-0.960443,-0.012529,-0.000798,-0.307725,-0.086049,-0.072974,-0.009115,...,-0.011469,,-0.068661,,-0.195467,-0.179102,-0.114558,-0.007578,-0.022337,
EAPOL,-0.008069,-0.000462,1.0,-0.262094,-0.003419,-0.000218,-0.083975,-0.023482,-0.019914,-0.002487,...,-0.00313,,-0.018737,,-0.053341,-0.048875,-0.023011,-0.002068,-0.006096,
IP,-0.960443,-0.054956,-0.262094,1.0,0.013045,-0.025906,0.320399,0.089247,0.075979,0.009491,...,0.011941,,0.071489,,0.203517,0.186479,0.116987,-0.004797,0.021496,
TCP,-0.307725,-0.017608,-0.083975,0.320399,-0.130386,-0.0083,1.0,-0.895469,0.237139,0.029621,...,-0.11935,,0.005578,,-0.173892,-0.369003,0.032366,-0.077996,-0.161417,
UDP,-0.086049,-0.004924,-0.023482,0.089247,-0.03646,-0.002321,-0.895469,1.0,-0.212351,-0.026525,...,0.133282,,0.028847,,0.291999,0.49034,0.024882,0.002716,0.184,
TCP_w_size,-0.072974,-0.004176,-0.019914,0.075979,-0.03092,-0.001968,0.237139,-0.212351,1.0,0.009557,...,-0.028303,,-0.129699,,-0.20699,-0.238582,-0.153234,-0.018637,-0.015778,
HTTPS,-0.181351,-0.010377,-0.049489,0.188821,-0.07684,-0.004892,0.589329,-0.527726,0.176114,-0.055901,...,-0.070337,,0.079199,,-0.173811,-0.486866,-0.076596,-0.046415,-0.091155,
Portcl_src,-0.195467,-0.011185,-0.053341,0.203517,-0.082059,-0.005272,-0.173892,0.291999,-0.20699,-0.012438,...,-0.01199,,0.33668,,1.0,0.851788,0.399458,-0.025982,-0.059207,
Portcl_dst,-0.179102,-0.010248,-0.048875,0.186479,-0.073435,-0.004831,-0.369003,0.49034,-0.238582,-0.026642,...,-0.031109,,0.217366,,0.851788,1.0,0.389263,-0.018849,-0.075658,
Pck_size,-0.114558,-0.006446,-0.023011,0.116987,-0.036261,-0.003089,0.032366,0.024882,-0.153234,-0.008988,...,-0.035103,,0.359593,,0.399458,0.389263,1.0,-0.026195,-0.048174,


In [36]:
df_IOT_Privacy_Hub=matrix[(matrix['IP']<=(-0.07)) | (matrix['IP']>=0.07)]
df_IOT_Privacy_Hub

Unnamed: 0,ARP,LLC,EAPOL,IP,ICMP,ICMP6,TCP,UDP,TCP_w_size,HTTP,...,NTP,IP_padding,IP_add_count,IP_ralert,Portcl_src,Portcl_dst,Pck_size,Pck_rawdata,payload_l,Entropy
ARP,1.0,-0.001692,-0.008069,-0.960443,-0.012529,-0.000798,-0.307725,-0.086049,-0.072974,-0.009115,...,-0.011469,,-0.068661,,-0.195467,-0.179102,-0.114558,-0.007578,-0.022337,
EAPOL,-0.008069,-0.000462,1.0,-0.262094,-0.003419,-0.000218,-0.083975,-0.023482,-0.019914,-0.002487,...,-0.00313,,-0.018737,,-0.053341,-0.048875,-0.023011,-0.002068,-0.006096,
IP,-0.960443,-0.054956,-0.262094,1.0,0.013045,-0.025906,0.320399,0.089247,0.075979,0.009491,...,0.011941,,0.071489,,0.203517,0.186479,0.116987,-0.004797,0.021496,
TCP,-0.307725,-0.017608,-0.083975,0.320399,-0.130386,-0.0083,1.0,-0.895469,0.237139,0.029621,...,-0.11935,,0.005578,,-0.173892,-0.369003,0.032366,-0.077996,-0.161417,
UDP,-0.086049,-0.004924,-0.023482,0.089247,-0.03646,-0.002321,-0.895469,1.0,-0.212351,-0.026525,...,0.133282,,0.028847,,0.291999,0.49034,0.024882,0.002716,0.184,
TCP_w_size,-0.072974,-0.004176,-0.019914,0.075979,-0.03092,-0.001968,0.237139,-0.212351,1.0,0.009557,...,-0.028303,,-0.129699,,-0.20699,-0.238582,-0.153234,-0.018637,-0.015778,
HTTPS,-0.181351,-0.010377,-0.049489,0.188821,-0.07684,-0.004892,0.589329,-0.527726,0.176114,-0.055901,...,-0.070337,,0.079199,,-0.173811,-0.486866,-0.076596,-0.046415,-0.091155,
IP_add_count,-0.068661,-0.003929,-0.018737,0.071489,-0.017141,-0.001852,0.005578,0.028847,-0.129699,-0.014494,...,-0.01251,,1.0,,0.33668,0.217366,0.359593,-0.002837,-0.028957,
Portcl_src,-0.195467,-0.011185,-0.053341,0.203517,-0.082059,-0.005272,-0.173892,0.291999,-0.20699,-0.012438,...,-0.01199,,0.33668,,1.0,0.851788,0.399458,-0.025982,-0.059207,
Portcl_dst,-0.179102,-0.010248,-0.048875,0.186479,-0.073435,-0.004831,-0.369003,0.49034,-0.238582,-0.026642,...,-0.031109,,0.217366,,0.851788,1.0,0.389263,-0.018849,-0.075658,


## Create IPAssess


In [22]:
df=pd.read_csv("dataset/Main_hub.csv")
deleted=["SSDP","DNS","MDNS","Pck_rawdata","NTP","DHCP","BOOTP","ICMP","IP_padding","IP_ralert"]
name="IPAssess_hub.csv"
df=df.drop(columns=deleted)
df.to_csv('dataset/'+name, index=False)
df.columns

Index(['ARP', 'LLC', 'EAPOL', 'IP', 'ICMP6', 'TCP', 'UDP', 'TCP_w_size',
       'HTTP', 'HTTPS', 'IP_add_count', 'Portcl_src', 'Portcl_dst', 'Pck_size',
       'payload_l', 'Entropy', 'Label', 'MAC'],
      dtype='object')

In [23]:
len(df.columns)  # includes label, Mac

18

### Create Main Feature set without IP_padding,IP_alert

In [19]:
df=pd.read_csv("dataset/Main_hub.csv")
deleted=["IP_padding","IP_ralert","Entropy"]
name="Main_hub_reduced.csv"
df=df.drop(columns=deleted)
df.to_csv('dataset/'+name, index=False)
df.columns

Index(['ARP', 'LLC', 'EAPOL', 'IP', 'ICMP', 'ICMP6', 'TCP', 'UDP',
       'TCP_w_size', 'HTTP', 'HTTPS', 'DHCP', 'BOOTP', 'SSDP', 'DNS', 'MDNS',
       'NTP', 'IP_add_count', 'Portcl_src', 'Portcl_dst', 'Pck_size',
       'Pck_rawdata', 'payload_l', 'Label', 'MAC'],
      dtype='object')

In [20]:
len(df.columns) # includes label, Mac

25

### Consider features by setting threshold at 0.05 for correlation

In [24]:
df=pd.read_csv("dataset/Main_hub.csv")
deleted=["IP_padding","IP_ralert","Entropy","ICMP","ICMP6","HTTP","SSDP","DNS","MDNS","Pck_rawdata","NTP","DHCP","BOOTP","payload_l"]
name="Threshold_0.05.csv"
df=df.drop(columns=deleted)
df.to_csv('dataset/'+name, index=False)
df.columns

Index(['ARP', 'LLC', 'EAPOL', 'IP', 'TCP', 'UDP', 'TCP_w_size', 'HTTPS',
       'IP_add_count', 'Portcl_src', 'Portcl_dst', 'Pck_size', 'Label', 'MAC'],
      dtype='object')

In [25]:
len(df.columns)

14

### Consider features by setting threshold at 0.06 for correlation

In [33]:
df=pd.read_csv("dataset/Main_hub.csv")
deleted=["LLC","IP_padding","IP_ralert","Entropy","ICMP","ICMP6","HTTP","SSDP","DNS","MDNS","Pck_rawdata","NTP","DHCP","BOOTP","payload_l"]
name="Threshold_0.06.csv"
df=df.drop(columns=deleted)
df.to_csv('dataset/'+name, index=False)
df.columns

Index(['ARP', 'EAPOL', 'IP', 'TCP', 'UDP', 'TCP_w_size', 'HTTPS',
       'IP_add_count', 'Portcl_src', 'Portcl_dst', 'Pck_size', 'Label', 'MAC'],
      dtype='object')

In [34]:
len(df.columns)

13

### Consider features by setting threshold at 0.075 for correlation

In [41]:
df=pd.read_csv("dataset/Main_hub.csv")
deleted=["IP_add_count","LLC","IP_padding","IP_ralert","Entropy","ICMP","ICMP6","HTTP","SSDP","DNS","MDNS","Pck_rawdata","NTP","DHCP","BOOTP","payload_l"]
name="Threshold_0.075.csv"
df=df.drop(columns=deleted)
df.to_csv('dataset/'+name, index=False)
df.columns

Index(['ARP', 'EAPOL', 'IP', 'TCP', 'UDP', 'TCP_w_size', 'HTTPS', 'Portcl_src',
       'Portcl_dst', 'Pck_size', 'Label', 'MAC'],
      dtype='object')

In [42]:
len(df.columns)

12