### Data extraction using NFStream

### This notebook run from WSL, 

#### Enviroment 
* experiment1-env - <code>source /home/kali/experiment1-env/bin/activate</code>

* workingspace <code>/mnt/e/Onedrive/1-Documents/1-PhD/2-Experiment/feature_extract</code>

#### Dataset 
* UNSW - <code>/mnt/f/My Drive/UNSW_pcap-isolated</code>
* MonIoTr - <code>/mnt/d/Dataset/iot-data/us</code>
* Sentinel - <code>/mnt/e/Onedrive/1-Documents/1-PhD/2-Experiment/IoT_Sentinel-master/captures_IoT-Sentinel</code>

In [1]:
import nfstream
print(nfstream.__version__)


6.3.5


In [2]:
import pandas as pd
import glob, os
import numpy as np
from nfstream import NFStreamer, NFPlugin
pd.set_option('display.max_columns', 500)
pd.set_option('display.max_rows', 500)

### function definition

In [3]:
def pcap_filepath(path,device_label):
    file_path=path+'/'+device_label+'/**/*.pcap'
    all_files = glob.glob(file_path,recursive = True)
    return all_files

In [4]:
# get all device in dataset from folder name
def get_device_list(path):
    p = os.listdir(path)
    device_list=[]
    for i in p:
        x = path+'/'+i
        if os.path.isdir(x):
            device_list.append(i)
            #print(x)
    return device_list

In [5]:
def add_label(df,device_name, label):
    df1= df.copy()
    df1['device'] = device_name
    df1['label'] = label
    
    df1.reset_index(drop = True, inplace = True)
    columns_name = df1.columns
    dflist = df1.values.tolist() #convert df to list
    return dflist, columns_name

In [29]:
#feature extract, label and combine 
def data_preparation(path,device_list,label,am=1):
    mylist = []
    i=0
    
    while i < len(device_list):
        j=0
        filepath = pcap_filepath(path,device_list[i])
        for filename in filepath:
            print(filename)

            #df = NFStreamer(filename,statistical_analysis=True,accounting_mode=am,idle_timeout=120,active_timeout=1800,).to_pandas() #s1
            df = NFStreamer(filename,statistical_analysis=True,accounting_mode=am,idle_timeout=10,active_timeout=30,).to_pandas() #s3
            #df = NFStreamer(filename,statistical_analysis=True,accounting_mode=am,idle_timeout=60,active_timeout=60,).to_pandas()   #s2_1
            #df = NFStreamer(filename,statistical_analysis=True,accounting_mode=am,idle_timeout=120,active_timeout=120,).to_pandas()   #s2_2
            #df = NFStreamer(filename,statistical_analysis=True,accounting_mode=am,idle_timeout=240,active_timeout=240,).to_pandas()   #s2_4
            #df = NFStreamer(filename,statistical_analysis=True,accounting_mode=am,idle_timeout=120,active_timeout=480,).to_pandas()   #s2_8
            if(df is None):
                continue
            
            # add label(0,1:non,iot) and device name
            dl, columns_name = add_label(df,device_list[i],label)
            
            # combine
            mylist.extend(dl)
            j+=1
        
        print('combined ',j,' ', device_list[i],' pcap files')
        i+=1
    print('all ',i,' devices combined')
    #change list to dataframe
    mydf = pd.DataFrame(mylist, columns=columns_name)
    return mydf

### Dataset 

#### 1-Moniotr

In [8]:
# main method

path = "/mnt/d/Dataset/iot-data/us"

#list all device
device_list = get_device_list(path)

#extract feature
df_iot = data_preparation(path,device_list,1)
#save df to csv
df_iot.to_csv('../data/moniotr/iot_dataset_s3.csv', index=False)

#90mins++ s1
#65mins set3

/mnt/d/Dataset/iot-data/us/amcrest-cam-wired/android_lan_photo/2019-03-29_21_22_44.45s.pcap
/mnt/d/Dataset/iot-data/us/amcrest-cam-wired/android_lan_photo/2019-03-29_21_46_40.46s.pcap
/mnt/d/Dataset/iot-data/us/amcrest-cam-wired/android_lan_photo/2019-03-29_22_10_27.45s.pcap
/mnt/d/Dataset/iot-data/us/amcrest-cam-wired/android_lan_photo/2019-03-29_22_34_18.45s.pcap
/mnt/d/Dataset/iot-data/us/amcrest-cam-wired/android_lan_photo/2019-03-29_22_58_30.45s.pcap
/mnt/d/Dataset/iot-data/us/amcrest-cam-wired/android_lan_photo/2019-03-29_23_22_18.45s.pcap
/mnt/d/Dataset/iot-data/us/amcrest-cam-wired/android_lan_photo/2019-03-29_23_46_10.45s.pcap
/mnt/d/Dataset/iot-data/us/amcrest-cam-wired/android_lan_photo/2019-03-30_00_10_11.45s.pcap
/mnt/d/Dataset/iot-data/us/amcrest-cam-wired/android_lan_photo/2019-03-30_00_34_18.46s.pcap
/mnt/d/Dataset/iot-data/us/amcrest-cam-wired/android_lan_photo/2019-03-30_00_58_15.46s.pcap
/mnt/d/Dataset/iot-data/us/amcrest-cam-wired/android_lan_photo/2019-03-30_01_22_

#### 2-IoT Sentinel

In [20]:
# main method

path = "/mnt/e/OneDrive/1-Documents/1-PhD/2-Experiment/IoT_Sentinel-master/captures_IoT-Sentinel"

#list all device
device_list = get_device_list(path)

#extract feature
df_iot = data_preparation(path,device_list,1)
#save df to csv
df_iot.to_csv('../data/sentinel/iot_dataset.csv', index=False)

#4mins #s1
#3mins #s3
#4mins #s2_1
#4mins8s #s2_2
#4min322s #s2_4
#4min27s #s2_8

/mnt/e/OneDrive/1-Documents/1-PhD/2-Experiment/IoT_Sentinel-master/captures_IoT-Sentinel/Aria/Setup-A-1-STA.pcap
/mnt/e/OneDrive/1-Documents/1-PhD/2-Experiment/IoT_Sentinel-master/captures_IoT-Sentinel/Aria/Setup-A-2-STA.pcap
/mnt/e/OneDrive/1-Documents/1-PhD/2-Experiment/IoT_Sentinel-master/captures_IoT-Sentinel/Aria/Setup-A-3-STA.pcap
/mnt/e/OneDrive/1-Documents/1-PhD/2-Experiment/IoT_Sentinel-master/captures_IoT-Sentinel/Aria/Setup-A-4-STA.pcap
/mnt/e/OneDrive/1-Documents/1-PhD/2-Experiment/IoT_Sentinel-master/captures_IoT-Sentinel/Aria/Setup-A-5-STA.pcap
/mnt/e/OneDrive/1-Documents/1-PhD/2-Experiment/IoT_Sentinel-master/captures_IoT-Sentinel/Aria/Setup-C-1-STA.pcap
/mnt/e/OneDrive/1-Documents/1-PhD/2-Experiment/IoT_Sentinel-master/captures_IoT-Sentinel/Aria/Setup-C-10-STA.pcap
/mnt/e/OneDrive/1-Documents/1-PhD/2-Experiment/IoT_Sentinel-master/captures_IoT-Sentinel/Aria/Setup-C-11-STA.pcap
/mnt/e/OneDrive/1-Documents/1-PhD/2-Experiment/IoT_Sentinel-master/captures_IoT-Sentinel/Aria/

#### 3-UNSW

In [18]:
# main method

path = "/mnt/f/My Drive/UNSW_pcap-isolated"

#list all device
device_list = get_device_list(path)

#list non iot device
non_iot_dev =['Android Phone 1','Android Phone 2','IPhone','Laptop','MacBook','Samsung Galaxy Tab','MacBook Iphone']

#list iot device
iot_dev = list(set(device_list) - set(non_iot_dev)) 

#extract feature
df_non_iot = data_preparation(path,non_iot_dev,0)
df_non_iot.to_csv('../data/unsw/nfstream/non_iot_dataset_s3.csv', index=False)

df_iot = data_preparation(path,iot_dev,1)
df_iot.to_csv('../data/unsw/nfstream/iot_dataset_s3.csv', index=False)

#combine df
df = df_iot.append(df_non_iot, ignore_index=True)

#save combined df to csv
df.to_csv('../data/unsw/nfstream/dataset_s3.csv', index=False)

#8hour +

/mnt/f/My Drive/UNSW_pcap-isolated/Android Phone 1/1.pcap
/mnt/f/My Drive/UNSW_pcap-isolated/Android Phone 1/2.pcap
/mnt/f/My Drive/UNSW_pcap-isolated/Android Phone 1/3.pcap
/mnt/f/My Drive/UNSW_pcap-isolated/Android Phone 1/4.pcap
/mnt/f/My Drive/UNSW_pcap-isolated/Android Phone 1/5.pcap
/mnt/f/My Drive/UNSW_pcap-isolated/Android Phone 1/6.pcap
/mnt/f/My Drive/UNSW_pcap-isolated/Android Phone 1/7.pcap
/mnt/f/My Drive/UNSW_pcap-isolated/Android Phone 1/8.pcap
/mnt/f/My Drive/UNSW_pcap-isolated/Android Phone 1/9.pcap
/mnt/f/My Drive/UNSW_pcap-isolated/Android Phone 1/10.pcap
/mnt/f/My Drive/UNSW_pcap-isolated/Android Phone 1/11.pcap
/mnt/f/My Drive/UNSW_pcap-isolated/Android Phone 1/12.pcap
/mnt/f/My Drive/UNSW_pcap-isolated/Android Phone 1/13.pcap
/mnt/f/My Drive/UNSW_pcap-isolated/Android Phone 1/14.pcap
/mnt/f/My Drive/UNSW_pcap-isolated/Android Phone 1/15.pcap
/mnt/f/My Drive/UNSW_pcap-isolated/Android Phone 1/16.pcap
/mnt/f/My Drive/UNSW_pcap-isolated/Android Phone 1/17.pcap
/mnt/f

#### 4-Local non-IoT

In [21]:
# main method
if __name__ == "__main__":

    filename = "/mnt/e/OneDrive/1-Documents/1-PhD/2-Experiment/1-Experiment_1/data/noniot/non-iot.pcapng"


    df_noniot = NFStreamer(filename,statistical_analysis=True).to_pandas()
    #add label
    df_noniot['device'] = 'PC'
    df_noniot['label'] = 0

    #save df to csv
    df_noniot.to_csv('../data/local/non_iot_dataset.csv', index=False)

In [15]:
# main method , different accounting_mode
# Specify the accounting mode that will be used to report bytes related features (0: Link layer, 1: IP layer, 2: Transport layer, 3: Payload).
if __name__ == "__main__":

    filename = "/mnt/e/OneDrive/1-Documents/1-PhD/2-Experiment/1-Experiment_1/data/noniot/non-iot.pcapng"
    am = [0,1,2,3]
    for i in am:
        df_noniot = NFStreamer(filename,statistical_analysis=True,accounting_mode=i).to_pandas()
        #add label
        df_noniot['device'] = 'PC'
        df_noniot['label'] = 0

        path = '/mnt/e/OneDrive/1-Documents/1-PhD/2-Experiment/data/local/nfstream/non_iot_dataset'+str(i)+'.csv'
        print(path)
        #save df to csv
        df_noniot.to_csv(path, index=False)

In [27]:
if __name__ == "__main__":

    source = 'eth0'


    df_noniot = NFStreamer(source,statistical_analysis=True,n_meters=4,performance_report=1).to_pandas()
    #add label
    #df_noniot['device'] = 'PC'
    #df_noniot['label'] = 0

    #save df to csv
    #df_noniot.to_csv('../data/local/live_dataset.csv', index=False)



ValueError: Unable to activate source.

In [26]:
df_noniot.shape

(352, 85)