# This file extracts feature sets from pcap files.

### Input & Output

`Input Files`: All files with the pcap extension in the “./pcaps/” folder is read.

`Output Files`: Fingerprint result file named *FP_MAIN.csv*.

###  importing relevant libraries

In [1]:
from scapy.all import*
import math
import pandas as pd
import os
import numpy as np




### Discovering pcap extension files under "pcaps" folder.

In [45]:
def find_the_way(path,file_format):
    count=0
    files_add = []
    for r, d, f in os.walk(path):
        for file in f:
            if file_format in file:
                files_add.append(os.path.join(r,file))  
    return files_add
files_add=find_the_way('./BenignTraffic/','.pcap')

### List of pcap files to be processed

In [46]:
files_add

['./BenignTraffic/BenignTraffic.pcap',
 './BenignTraffic/BenignTraffic1.pcap',
 './BenignTraffic/BenignTraffic2.pcap',
 './BenignTraffic/BenignTraffic3.pcap']

### Port numbers are classified in this part as:

| Port Numbers | Equivalents |
| :------ | :------ |
|No port| 0|
|Well known ports (between 0 and 1023) |1|
|Rregistered ports (between 1024 and 49151)  |2|
|Dynamic ports (between  49152 and 65535) |3|
# ↓ 

In [47]:
def port_class(port):
    if 0 <= port <= 1023:
        return 1
    elif  1024 <= port <= 49151 :
        return 2
    elif 49152 <=port <= 65535 :
        return 3
    else:
        return 0

### The dictionary to be used for MAC address and device matching.
#### Datasets, their MAC addresses and Devices are given separately.

In [48]:
MAC_list={
# Dictionary of devices and their MAC addresses

    "1c:fe:2b:98:16:dd": "Amazon Alexa Echo Dot 1",
    "aO:d0:dc:c4:08:ff": "Amazon Alexa Echo Dot 2",
    "1c:12:b0:9b:0c:ec": "Amazon Alexa Echo Spot",
    "08:7c:39:ce:6e:2a": "Amazon Alexa Echo Studio",
    "cc:f4:11:9c:d0:00": "Google Nest Mini",
    "48:a6:b8:f9:1b:88": "Sonos One Speaker",
    "9c:8e:cd:1d:ab:9f": "AMCREST WIFi Camera",
    "3c:37:86:6f:B9:51": "Arlo Base Station",
    "40:5d:82:35:14:C8": "Arlo Q Camera",
    "c0:e7:bf:0a:79:D1": "Borun/Sichuan-Al Camera",
    "bO:c5:54:59:2e:99": "DCS8000LHA1 D-Link Mini Camera",
    "44:01:bb:ec:10:4a": "HeimVision Smart WiFi Camera",
    "34:75:63:73:f3:36": "Home Eye Camera",
    "7c:a7:b0:cd:18:32": "Luohe Cam Dog",
    "44:bb:3b:00:39:07": "Nest Indoor Camera",
    "70:ee:50:68:0e:32": "Netatmo Camera",
    "10:2c:6b:1b:43:be": "SIMCAM 15 (AMPAKTec)",
    "b8:5f:98:d0:76:e6": "Amazon Plug",
    "68:57:2d:56:ac:47": "Atomi Coffee Maker",
    "8c:85:80:6c:b6:47": "Eufy HomeBase 2",
    "50:02:91:b1:68:0c": "Globe Lamp ESP_B1680C",
    "b8:f0:09:03:9a:af":  "Gosund ESP 039AAF Socket",
    "b8:f0:09:03:29:79": "Gosund ESP_032979 Plug",
    "50:02:91:10:09:8f": "Gosund ESP 10098F Socket",
    "c4:dd:57:0c:39:94": "Gosund ESP_0C3994 Plug",
    "50:02:91:1a:ce:e1": "Gosund ESP 1ACEE1 Socket",
    "24:a1:60:14:7f:f9": "Gosund ESP_147FF9 Plug",
    "50:02:91:10:ac:d8": "Gosund ESP 10ACD8 Plug",
    "d4:a6:51:30:64:b7": "HeimVision SmartLife Radio/Lamp",
    "00:17:88:60:d6:4f": "Philips Hue Bridge",
    "b0:09:da:3e:82:6c": "Ring Base Station AC:1236",
    "50:14:79:37:80:18": "iRobot Roomba",
    "00:02:75:f6:e3:cb": "Smart Board",
    "d4:a6:51:76:06:64": "Teckin Plug 1",
    "d4:a6:51:78:97:4e": "Teckin Plug 2",
    "d4:a6:51:20:91:d1": "Yutron Plug 1",
    "d4:a6:51:21:6c:29": "Yutron Plug 2",
    "f0:b4:d2:f9:60:95": "D-Link DCHS-161 Water Sensor",
    "ac:f1:08:4e:00:82": "LG Smart TV",
    "70:ee:50:6b:a8:1a": "Netatmo Weather Station"
    }

In [49]:
len(MAC_list)

40

### Calculating the payload entropy value.


# ↓ 

In [50]:
def pre_entropy(payload):
    characters=[]
    for i in payload:
            characters.append(i)
    return shannon(characters)


def shannon(data):
    freq_dict={} 
    for i in data:
        if i in freq_dict:
            freq_dict[i] += 1
        else:
            freq_dict[i] = 1    
    entropy = 0.0
    logarithm_base = 2
    payload_size = len(data) #
    for key in freq_dict.keys():
        frequency = float(freq_dict[key])/payload_size
        if frequency > 0: 
            entropy = entropy + frequency * math.log(frequency, logarithm_base)
    return -entropy

### This section is the main backbone of our program. In this section, the following operations are performed briefly.


#### - The pcap_files variable contains the addresses of the pcap files. The `for` loop moves through the values of this variable, allowing all files to be processed.


#### - The second for loop examines individual packets in the processed pcap file. All features belonging to a packet are extracted and processed as a new line in the fingerprint file at the end of the second for loop.


#### - All properties are initially assigned a value of 0. These properties are then queried in the packet. If the properties have corresponding data, the data is processed in the variable, otherwise, the value of the variable remains as 0.

In [51]:
count=0
ths = open("./dataset/Main1.csv", "w")
header="ARP,LLC,EAPOL,IP,ICMP,ICMP6,TCP,UDP,TCP_w_size,HTTP,HTTPS,DHCP,BOOTP,SSDP,DNS,MDNS,NTP,IP_padding,IP_add_count,IP_ralert,Portcl_src,Portcl_dst,Pck_size,Pck_rawdata,payload_l,Entropy,Label,MAC\n"
ths.write(header)
dst_ip_list={}

for i in MAC_list:
    dst_ip_list[i]=[]
import time

for i in files_add:
    print(i)
    pkt = PcapReader(i)
    print("\n\n"+"========"+ i[8:]+"========"+"\n" )
    print(pkt)

    for jj,j in enumerate(pkt):
      #  print(j)
        ip_add_count=0
        layer_2_arp = 0
        layer_2_llc = 0

        layer_3_eapol = 0
        layer_3_ip = 0
        layer_3_icmp = 0
        layer_3_icmp6 = 0



        layer_4_tcp = 0
        layer_4_udp = 0
        layer_4_tcp_ws=0


        layer_7_http = 0
        layer_7_https = 0
        layer_7_dhcp = 0
        layer_7_bootp = 0
        layer_7_ssdp = 0
        layer_7_dns = 0
        layer_7_mdns = 0
        layer_7_ntp = 0

        ip_padding = 0
        ip_ralert = 0


        port_class_src = 0
        port_class_dst = 0

        pck_size = 0
        pck_rawdata = 0
        entropy=0

        layer_4_payload_l=0

        try:

            pck_size=j.len

        except:pass

        try:

            if j[IP]:

                layer_3_ip = 1
            temp=str(j[IP].dst)
            if temp not in dst_ip_list[j.src]:
                dst_ip_list[j.src].append(temp)
            ip_add_count=len(dst_ip_list[j.src])

            port_class_src = port_class(j[IP].sport)
            port_class_dst = port_class(j[IP].dport)

        except:pass

        temp=str(j.show)

        if "ICMPv6" in temp:

            layer_3_icmp6 = 1

        try:
            if j[IP].ihl >5:
                if IPOption_Router_Alert(j):
                    pad=str(IPOption_Router_Alert(j).show)
                    if "Padding" in pad:
                        ip_padding=1
                    ip_ralert = 1
        except:pass

        if j.haslayer(ICMP):
            layer_3_icmp = 1


        if j.haslayer(Raw):
            pck_rawdata = 1

        if j.haslayer(UDP):

            layer_4_udp = 1
            if j[UDP].sport==68 or j[UDP].sport==67:
                layer_7_dhcp = 1
                layer_7_bootp = 1
            if j[UDP].sport==53 or j[UDP].dport==53:
                layer_7_dns = 1
            if j[UDP].sport==5353 or j[UDP].dport==5353:
                layer_7_mdns = 1
            if j[UDP].sport==1900 or j[UDP].dport==1900:
                layer_7_ssdp = 1
            if j[UDP].sport==123 or j[UDP].dport==123:
                layer_7_ntp = 1

        try:
            if j[UDP].payload:
                layer_4_payload_l=len(j[UDP].payload)
        except:pass



        if j.haslayer(TCP):
            layer_4_tcp = 1
            layer_4_tcp_ws=j[TCP].window
            if j[TCP].sport==80 or j[TCP].dport==80:
                layer_7_http = 1
            if j[TCP].sport==443 or j[TCP].dport==443:
                layer_7_https = 1
            try:
                if j[TCP].payload:
                    layer_4_payload_l=len(j[TCP].payload)
            except:pass

        if j.haslayer(ARP):
            layer_2_arp = 1

        if j.haslayer(LLC):
            layer_2_llc = 1

        if j.haslayer(EAPOL):
            layer_3_eapol = 1
        try: 
            entropy=pre_entropy(j[Raw].original)
        except:pass
        if j.src in MAC_list:
            label=MAC_list[j.src]
            line=[layer_2_arp, layer_2_llc, layer_3_eapol, layer_3_ip, layer_3_icmp, layer_3_icmp6, layer_4_tcp, layer_4_udp, layer_4_tcp_ws, layer_7_http, layer_7_https, layer_7_dhcp, layer_7_bootp, layer_7_ssdp, layer_7_dns, layer_7_mdns, layer_7_ntp, ip_padding, ip_add_count, ip_ralert, port_class_src, port_class_dst, pck_size, pck_rawdata,layer_4_payload_l,entropy, label,j.src]
            line=str(line).replace("[","")
            line=str(line).replace("]","")
            line=str(line).replace(", ",",")
            line=str(line).replace("\'","")
            if label!="unknown":
                ths.write(str(line)+"\n")
                
    
ths.close()


./BenignTraffic/BenignTraffic.pcap



<scapy.utils.PcapReader object at 0x7f80822d1250>
./BenignTraffic/BenignTraffic1.pcap



<scapy.utils.PcapReader object at 0x7f80b22cc640>
./BenignTraffic/BenignTraffic2.pcap



<scapy.utils.PcapReader object at 0x7f808231e190>
./BenignTraffic/BenignTraffic3.pcap



<scapy.utils.PcapReader object at 0x7f808234ad30>


### Input & Output

`Input Files`: MAIN_Sentinel.csv

`Output Files`: IPAssess.csv

In [6]:
dataset="dataset/Main.csv"
df=pd.read_csv(dataset)

In [3]:
df

Unnamed: 0,ARP,LLC,EAPOL,IP,ICMP,ICMP6,TCP,UDP,TCP_w_size,HTTP,...,IP_add_count,IP_ralert,Portcl_src,Portcl_dst,Pck_size,Pck_rawdata,payload_l,Entropy,Label,MAC
0,0,0,0,1,0,0,0,1,0,0,...,1,0,2,2,204,1,176,6.842788,HeimVision Smart WiFi Camera,44:01:bb:ec:10:4a
1,0,0,0,1,0,0,1,0,3233,0,...,1,0,2,1,142,1,90,6.127910,Nest Indoor Camera,44:bb:3b:00:39:07
2,0,0,0,1,0,0,0,1,0,0,...,1,0,3,2,200,1,172,6.512079,Teckin Plug 2,d4:a6:51:78:97:4e
3,0,0,0,1,0,0,1,0,3233,0,...,1,0,2,1,142,1,90,6.216799,Nest Indoor Camera,44:bb:3b:00:39:07
4,0,0,0,1,0,0,1,0,3233,0,...,1,0,2,1,142,1,90,5.933357,Nest Indoor Camera,44:bb:3b:00:39:07
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1886458,0,0,0,1,0,0,1,0,2881,0,...,7,0,3,1,142,1,90,6.133357,Nest Indoor Camera,44:bb:3b:00:39:07
1886459,0,0,0,1,0,0,0,1,0,0,...,39,0,2,2,61,0,33,0.000000,Google Nest Mini,cc:f4:11:9c:d0:00
1886460,0,0,0,1,0,0,1,0,2881,0,...,7,0,3,1,142,1,90,6.208411,Nest Indoor Camera,44:bb:3b:00:39:07
1886461,0,0,0,1,0,0,0,1,0,0,...,40,0,2,2,261,0,233,0.000000,Philips Hue Bridge,00:17:88:60:d6:4f


In [15]:
df.Label.value_counts()

Label
Nest Indoor Camera                 693910
Amazon Alexa Echo Studio           115136
LG Smart TV                        115064
Google Nest Mini                    97061
Philips Hue Bridge                  91945
Home Eye Camera                     76145
Amazon Alexa Echo Dot 1             69003
Netatmo Camera                      68681
HeimVision Smart WiFi Camera        48986
Eufy HomeBase 2                     35327
Gosund ESP_0C3994 Plug              33611
Gosund ESP 10098F Socket            33610
Gosund ESP_147FF9 Plug              33600
Gosund ESP_032979 Plug              33598
Gosund ESP 1ACEE1 Socket            33590
Gosund ESP 039AAF Socket            33587
Gosund ESP 10ACD8 Plug              33567
Netatmo Weather Station             29695
HeimVision SmartLife Radio/Lamp     28001
AMCREST WIFi Camera                 24660
Atomi Coffee Maker                  24349
Yutron Plug 2                       23192
Yutron Plug 1                       23072
Teckin Plug 2               

## Correlation matrix on Main Feature Set i.e FP_Main

In [7]:
dataframe1 = df
del dataframe1["MAC"]
del dataframe1["Label"]
matrix = dataframe1.corr()
matrix = matrix['IP']
matrix

ARP            -0.966162
LLC            -0.030133
EAPOL                NaN
IP              1.000000
ICMP            0.037752
ICMP6          -0.044215
TCP             0.358520
UDP             0.170430
TCP_w_size      0.174435
HTTP            0.039871
HTTPS           0.283438
DHCP            0.002445
BOOTP           0.002445
SSDP            0.063542
DNS             0.040603
MDNS           -0.070626
NTP             0.026996
IP_padding           NaN
IP_add_count    0.171516
IP_ralert            NaN
Portcl_src      0.763891
Portcl_dst      0.538554
Pck_size        0.172629
Pck_rawdata     0.538381
payload_l       0.141548
Entropy         0.494683
Name: IP, dtype: float64

In [57]:
df_CIC=matrix[(matrix['IP']<=(-0.1)) | (matrix['IP']>=0.1)]
df_CIC

Unnamed: 0,ARP,LLC,EAPOL,IP,ICMP,ICMP6,TCP,UDP,TCP_w_size,HTTP,...,NTP,IP_padding,IP_add_count,IP_ralert,Portcl_src,Portcl_dst,Pck_size,Pck_rawdata,payload_l,Entropy
ARP,1.0,-0.002669,,-0.966162,-0.036475,-0.003916,-0.346388,-0.201107,-0.168532,-0.038522,...,-0.026082,,-0.165712,,-0.738042,-0.52033,-0.17448,-0.520873,-0.144133,-0.478127
IP,-0.966162,-0.030133,,1.0,0.037752,-0.044215,0.35852,0.17043,0.174435,0.039871,...,0.026996,,0.171516,,0.763891,0.538554,0.172629,0.538381,0.141548,0.494683
TCP,-0.346388,-0.010803,,0.35852,-0.147656,-0.015852,1.0,-0.814119,0.486541,0.111211,...,-0.105587,,0.087962,,0.26802,-0.233652,0.16055,0.136509,0.124185,0.221978
UDP,-0.201107,-0.006272,,0.17043,-0.085727,-0.009203,-0.814119,1.0,-0.396102,-0.090539,...,0.129694,,-0.023209,,0.231255,0.607438,-0.05493,0.139393,-0.030522,0.044254
TCP_w_size,-0.168532,-0.005256,,0.174435,-0.071841,-0.007713,0.486541,-0.396102,1.0,0.143186,...,-0.051372,,0.031969,,0.12539,-0.150704,-0.009701,-0.025289,-0.029035,0.011708
HTTPS,-0.273847,-0.008541,,0.283438,-0.116734,-0.012532,0.790578,-0.643625,0.394054,-0.123287,...,-0.083475,,-0.092983,,0.261413,-0.447451,0.212639,0.303015,0.182002,0.377533
SSDP,-0.061391,-0.001915,,0.063542,-0.02617,-0.00281,-0.248524,0.305268,-0.120917,-0.027639,...,-0.018713,,0.100039,,-0.045519,0.341601,0.078256,0.117863,0.085457,0.068478
MDNS,-0.044978,-0.001403,,-0.070626,-0.019173,-0.002058,-0.18208,0.223652,-0.088589,-0.020249,...,-0.01371,,0.025222,,-0.109225,0.041978,-0.034072,-0.273798,-0.027764,-0.251328
IP_add_count,-0.165712,-0.005168,,0.171516,0.0986,-0.007584,0.087962,-0.023209,0.031969,0.21357,...,-0.033835,,1.0,,0.086146,0.237687,0.001817,-0.20466,-0.006361,-0.28032
Portcl_src,-0.738042,-0.023018,,0.763891,-0.311193,-0.033776,0.26802,0.231255,0.12539,-0.020282,...,0.020916,,0.086146,,1.0,0.493205,0.176631,0.433637,0.159163,0.443956


## Create IPAssess


In [92]:
df=pd.read_csv("dataset/Main.csv")
deleted=["SSDP","DNS","MDNS","Pck_rawdata","NTP","DHCP","BOOTP","ICMP","IP_padding","IP_ralert","EAPOL"]
name="IPAssess_CIC.csv"
df=df.drop(columns=deleted)
df.to_csv('dataset/'+name, index=False)
df.columns

Index(['ARP', 'LLC', 'IP', 'ICMP6', 'TCP', 'UDP', 'TCP_w_size', 'HTTP',
       'HTTPS', 'IP_add_count', 'Portcl_src', 'Portcl_dst', 'Pck_size',
       'payload_l', 'Entropy', 'Label', 'MAC'],
      dtype='object')

In [93]:
len(df.columns)  # includes label, Mac

17

### Create Main Feature set without IP_padding,IP_alert,EAPOL

In [59]:
df=pd.read_csv("dataset/Main.csv")
deleted=["IP_padding","IP_ralert","EAPOL"]
name="Main_CIC.csv"
df=df.drop(columns=deleted)
df.to_csv('dataset/'+name, index=False)
df.columns

Index(['ARP', 'LLC', 'IP', 'ICMP', 'ICMP6', 'TCP', 'UDP', 'TCP_w_size', 'HTTP',
       'HTTPS', 'DHCP', 'BOOTP', 'SSDP', 'DNS', 'MDNS', 'NTP', 'IP_add_count',
       'Portcl_src', 'Portcl_dst', 'Pck_size', 'Pck_rawdata', 'payload_l',
       'Entropy', 'Label', 'MAC'],
      dtype='object')

In [60]:
len(df.columns) # includes label, Mac

25

### Threshold 0.15

In [71]:
df_CIC=matrix[(matrix['IP']<=(-0.15)) | (matrix['IP']>=0.15)]
df_CIC

Unnamed: 0,ARP,LLC,EAPOL,IP,ICMP,ICMP6,TCP,UDP,TCP_w_size,HTTP,...,NTP,IP_padding,IP_add_count,IP_ralert,Portcl_src,Portcl_dst,Pck_size,Pck_rawdata,payload_l,Entropy
ARP,1.0,-0.002669,,-0.966162,-0.036475,-0.003916,-0.346388,-0.201107,-0.168532,-0.038522,...,-0.026082,,-0.165712,,-0.738042,-0.52033,-0.17448,-0.520873,-0.144133,-0.478127
IP,-0.966162,-0.030133,,1.0,0.037752,-0.044215,0.35852,0.17043,0.174435,0.039871,...,0.026996,,0.171516,,0.763891,0.538554,0.172629,0.538381,0.141548,0.494683
TCP,-0.346388,-0.010803,,0.35852,-0.147656,-0.015852,1.0,-0.814119,0.486541,0.111211,...,-0.105587,,0.087962,,0.26802,-0.233652,0.16055,0.136509,0.124185,0.221978
UDP,-0.201107,-0.006272,,0.17043,-0.085727,-0.009203,-0.814119,1.0,-0.396102,-0.090539,...,0.129694,,-0.023209,,0.231255,0.607438,-0.05493,0.139393,-0.030522,0.044254
TCP_w_size,-0.168532,-0.005256,,0.174435,-0.071841,-0.007713,0.486541,-0.396102,1.0,0.143186,...,-0.051372,,0.031969,,0.12539,-0.150704,-0.009701,-0.025289,-0.029035,0.011708
HTTPS,-0.273847,-0.008541,,0.283438,-0.116734,-0.012532,0.790578,-0.643625,0.394054,-0.123287,...,-0.083475,,-0.092983,,0.261413,-0.447451,0.212639,0.303015,0.182002,0.377533
IP_add_count,-0.165712,-0.005168,,0.171516,0.0986,-0.007584,0.087962,-0.023209,0.031969,0.21357,...,-0.033835,,1.0,,0.086146,0.237687,0.001817,-0.20466,-0.006361,-0.28032
Portcl_src,-0.738042,-0.023018,,0.763891,-0.311193,-0.033776,0.26802,0.231255,0.12539,-0.020282,...,0.020916,,0.086146,,1.0,0.493205,0.176631,0.433637,0.159163,0.443956
Portcl_dst,-0.52033,-0.016228,,0.538554,-0.212424,-0.023812,-0.233652,0.607438,-0.150704,-0.055459,...,-0.042617,,0.237687,,0.493205,1.0,0.031932,0.267316,0.035358,0.177053
Pck_size,-0.17448,-0.005291,,0.172629,-0.047906,-0.007961,0.16055,-0.05493,-0.009701,-0.03915,...,-0.034564,,0.001817,,0.176631,0.031932,1.0,0.266828,0.998885,0.423384


In [74]:
df=pd.read_csv("dataset/Main.csv")
selected_columns=['ARP', 'IP','TCP', 'UDP',
       'TCP_w_size', 'HTTPS', 'IP_add_count','Portcl_src',
       'Portcl_dst', 'Pck_size', 'Pck_rawdata', 'Entropy','Label', 'MAC']
name="Threshold_0.15.csv"
df=df[selected_columns]
df.to_csv('dataset/'+name, index=False)
df.columns

Index(['ARP', 'IP', 'TCP', 'UDP', 'TCP_w_size', 'HTTPS', 'IP_add_count',
       'Portcl_src', 'Portcl_dst', 'Pck_size', 'Pck_rawdata', 'Entropy',
       'Label', 'MAC'],
      dtype='object')

In [75]:
len(df.columns)

14

### Threshold 0.172

In [85]:
df_CIC=matrix[(matrix['IP']<=(-0.172)) | (matrix['IP']>=0.172)]
df_CIC

Unnamed: 0,ARP,LLC,EAPOL,IP,ICMP,ICMP6,TCP,UDP,TCP_w_size,HTTP,...,NTP,IP_padding,IP_add_count,IP_ralert,Portcl_src,Portcl_dst,Pck_size,Pck_rawdata,payload_l,Entropy
ARP,1.0,-0.002669,,-0.966162,-0.036475,-0.003916,-0.346388,-0.201107,-0.168532,-0.038522,...,-0.026082,,-0.165712,,-0.738042,-0.52033,-0.17448,-0.520873,-0.144133,-0.478127
IP,-0.966162,-0.030133,,1.0,0.037752,-0.044215,0.35852,0.17043,0.174435,0.039871,...,0.026996,,0.171516,,0.763891,0.538554,0.172629,0.538381,0.141548,0.494683
TCP,-0.346388,-0.010803,,0.35852,-0.147656,-0.015852,1.0,-0.814119,0.486541,0.111211,...,-0.105587,,0.087962,,0.26802,-0.233652,0.16055,0.136509,0.124185,0.221978
TCP_w_size,-0.168532,-0.005256,,0.174435,-0.071841,-0.007713,0.486541,-0.396102,1.0,0.143186,...,-0.051372,,0.031969,,0.12539,-0.150704,-0.009701,-0.025289,-0.029035,0.011708
HTTPS,-0.273847,-0.008541,,0.283438,-0.116734,-0.012532,0.790578,-0.643625,0.394054,-0.123287,...,-0.083475,,-0.092983,,0.261413,-0.447451,0.212639,0.303015,0.182002,0.377533
Portcl_src,-0.738042,-0.023018,,0.763891,-0.311193,-0.033776,0.26802,0.231255,0.12539,-0.020282,...,0.020916,,0.086146,,1.0,0.493205,0.176631,0.433637,0.159163,0.443956
Portcl_dst,-0.52033,-0.016228,,0.538554,-0.212424,-0.023812,-0.233652,0.607438,-0.150704,-0.055459,...,-0.042617,,0.237687,,0.493205,1.0,0.031932,0.267316,0.035358,0.177053
Pck_size,-0.17448,-0.005291,,0.172629,-0.047906,-0.007961,0.16055,-0.05493,-0.009701,-0.03915,...,-0.034564,,0.001817,,0.176631,0.031932,1.0,0.266828,0.998885,0.423384
Pck_rawdata,-0.520873,0.005123,,0.538381,0.063018,-0.023559,0.136509,0.139393,-0.025289,-0.161699,...,-0.158773,,-0.20466,,0.433637,0.267316,0.266828,1.0,0.251456,0.917935
Entropy,-0.478127,-0.00969,,0.494683,-0.001926,-0.021752,0.221978,0.044254,0.011708,-0.164953,...,-0.145744,,-0.28032,,0.443956,0.177053,0.423384,0.917935,0.406316,1.0


In [86]:
df=pd.read_csv("dataset/Main.csv")
selected_columns=['ARP', 'IP','TCP', 
       'TCP_w_size', 'HTTPS','Portcl_src',
       'Portcl_dst', 'Pck_size', 'Pck_rawdata', 'Entropy','Label', 'MAC']
name="Threshold_0.172.csv"
df=df[selected_columns]
df.to_csv('dataset/'+name, index=False)
df.columns

Index(['ARP', 'IP', 'TCP', 'TCP_w_size', 'HTTPS', 'Portcl_src', 'Portcl_dst',
       'Pck_size', 'Pck_rawdata', 'Entropy', 'Label', 'MAC'],
      dtype='object')

In [87]:
len(df.columns)

12

### Threshold 0.175

In [94]:
df_CIC=matrix[(matrix['IP']<=(-0.175)) | (matrix['IP']>=0.175)]
df_CIC

Unnamed: 0,ARP,LLC,EAPOL,IP,ICMP,ICMP6,TCP,UDP,TCP_w_size,HTTP,...,NTP,IP_padding,IP_add_count,IP_ralert,Portcl_src,Portcl_dst,Pck_size,Pck_rawdata,payload_l,Entropy
ARP,1.0,-0.002669,,-0.966162,-0.036475,-0.003916,-0.346388,-0.201107,-0.168532,-0.038522,...,-0.026082,,-0.165712,,-0.738042,-0.52033,-0.17448,-0.520873,-0.144133,-0.478127
IP,-0.966162,-0.030133,,1.0,0.037752,-0.044215,0.35852,0.17043,0.174435,0.039871,...,0.026996,,0.171516,,0.763891,0.538554,0.172629,0.538381,0.141548,0.494683
TCP,-0.346388,-0.010803,,0.35852,-0.147656,-0.015852,1.0,-0.814119,0.486541,0.111211,...,-0.105587,,0.087962,,0.26802,-0.233652,0.16055,0.136509,0.124185,0.221978
HTTPS,-0.273847,-0.008541,,0.283438,-0.116734,-0.012532,0.790578,-0.643625,0.394054,-0.123287,...,-0.083475,,-0.092983,,0.261413,-0.447451,0.212639,0.303015,0.182002,0.377533
Portcl_src,-0.738042,-0.023018,,0.763891,-0.311193,-0.033776,0.26802,0.231255,0.12539,-0.020282,...,0.020916,,0.086146,,1.0,0.493205,0.176631,0.433637,0.159163,0.443956
Portcl_dst,-0.52033,-0.016228,,0.538554,-0.212424,-0.023812,-0.233652,0.607438,-0.150704,-0.055459,...,-0.042617,,0.237687,,0.493205,1.0,0.031932,0.267316,0.035358,0.177053
Pck_rawdata,-0.520873,0.005123,,0.538381,0.063018,-0.023559,0.136509,0.139393,-0.025289,-0.161699,...,-0.158773,,-0.20466,,0.433637,0.267316,0.266828,1.0,0.251456,0.917935
Entropy,-0.478127,-0.00969,,0.494683,-0.001926,-0.021752,0.221978,0.044254,0.011708,-0.164953,...,-0.145744,,-0.28032,,0.443956,0.177053,0.423384,0.917935,0.406316,1.0


In [95]:
df=pd.read_csv("dataset/Main.csv")
selected_columns=['ARP', 'IP','TCP', 
       'HTTPS','Portcl_src',
       'Portcl_dst', 'Pck_rawdata', 'Entropy','Label', 'MAC']
name="Threshold_0.175.csv"
df=df[selected_columns]
df.to_csv('dataset/'+name, index=False)
df.columns

Index(['ARP', 'IP', 'TCP', 'HTTPS', 'Portcl_src', 'Portcl_dst', 'Pck_rawdata',
       'Entropy', 'Label', 'MAC'],
      dtype='object')

In [96]:
len(df.columns)

10