In [1]:

import pandas as pd
import numpy as np
import glob, os
import pickle

In [2]:

from sklearn.model_selection import train_test_split
from sklearn.metrics import f1_score
# oneclass SVM classification
from sklearn.svm import OneClassSVM
from sklearn.datasets import make_classification
from sklearn.metrics import accuracy_score
from sklearn.metrics import confusion_matrix


In [6]:
#path = r'csv_full_fingerprint' 

def csv_filepath(path,device_label):
    file_path=path+'/'+device_label+'/*.csv'
    all_files = glob.glob(file_path)
    return all_files

In [5]:
# combine csv data and select 12 unique fingerprint

def combine_data(path):
    li = {}
    i = 1
    for filename in path:
        df = pd.read_csv(filename, index_col=None,header=None, delimiter = '\t')
        df = df.drop_duplicates()
        df = df[:12]
        li[i]= df
        #li.append(df)
        #print(filename)
        i+=1
    print('combined ',i-1,' csv files')
    frame = pd.concat(li, axis=0)
    return frame

In [3]:
def split_data(df):
    df.loc[:,23]=1   # mark inliers to 1, as for outlier is -1
    X = df.loc[:,:22]
    y = df.loc[:,23]
    return X,y

In [7]:
 def save_model(model,outputdir,device_label):
    if not os.path.exists(outputdir):
        os.makedirs(outputdir)
    
    pickle_file = outputdir+'/model_'+device_label+'.sav'
    pickle.dump(model, open(pickle_file, 'wb'))
    print(device_label,' model saved'+'\n\n')
  

In [73]:
# train one class svm model for each device

inputdir = r'csv_full_fingerprint'
outputdir = 'classification_models'
device_label=os.listdir(inputdir)

i=0
while i < len(device_label):
    path = csv_filepath(inputdir,device_label[i])
   
    print('Device: '+device_label[i]+'\n\n')

    df = combine_data(path)

    X,y = split_data(df)

    # split into train/test sets
    trainX, testX, trainy, testy = train_test_split(X, y, test_size=0.2, random_state=2, stratify=y)
    # define outlier detection model
    model = OneClassSVM(gamma='scale', nu=0.01)
    # fit on majority class
    model.fit(trainX)

    # save model
    save_model(model,outputdir,device_label[i])

    i += 1





Device: Aria


csv_full_fingerprint/Aria\file_Aria_1.csv
csv_full_fingerprint/Aria\file_Aria_10.csv
csv_full_fingerprint/Aria\file_Aria_11.csv
csv_full_fingerprint/Aria\file_Aria_12.csv
csv_full_fingerprint/Aria\file_Aria_13.csv
csv_full_fingerprint/Aria\file_Aria_14.csv
csv_full_fingerprint/Aria\file_Aria_15.csv
csv_full_fingerprint/Aria\file_Aria_16.csv
csv_full_fingerprint/Aria\file_Aria_17.csv
csv_full_fingerprint/Aria\file_Aria_18.csv
csv_full_fingerprint/Aria\file_Aria_19.csv
csv_full_fingerprint/Aria\file_Aria_2.csv
csv_full_fingerprint/Aria\file_Aria_3.csv
csv_full_fingerprint/Aria\file_Aria_4.csv
csv_full_fingerprint/Aria\file_Aria_5.csv
csv_full_fingerprint/Aria\file_Aria_6.csv
csv_full_fingerprint/Aria\file_Aria_7.csv
csv_full_fingerprint/Aria\file_Aria_8.csv
csv_full_fingerprint/Aria\file_Aria_9.csv
combined  19  csv files
Aria  model saved


Device: D-LinkCam


csv_full_fingerprint/D-LinkCam\file_D-LinkCam_20.csv
csv_full_fingerprint/D-LinkCam\file_D-LinkCam_21.csv
csv_ful

csv_full_fingerprint/D-LinkSwitch\file_D-LinkSwitch_146.csv
csv_full_fingerprint/D-LinkSwitch\file_D-LinkSwitch_147.csv
csv_full_fingerprint/D-LinkSwitch\file_D-LinkSwitch_148.csv
csv_full_fingerprint/D-LinkSwitch\file_D-LinkSwitch_149.csv
csv_full_fingerprint/D-LinkSwitch\file_D-LinkSwitch_150.csv
csv_full_fingerprint/D-LinkSwitch\file_D-LinkSwitch_151.csv
csv_full_fingerprint/D-LinkSwitch\file_D-LinkSwitch_152.csv
csv_full_fingerprint/D-LinkSwitch\file_D-LinkSwitch_153.csv
csv_full_fingerprint/D-LinkSwitch\file_D-LinkSwitch_154.csv
csv_full_fingerprint/D-LinkSwitch\file_D-LinkSwitch_155.csv
csv_full_fingerprint/D-LinkSwitch\file_D-LinkSwitch_156.csv
csv_full_fingerprint/D-LinkSwitch\file_D-LinkSwitch_157.csv
csv_full_fingerprint/D-LinkSwitch\file_D-LinkSwitch_158.csv
csv_full_fingerprint/D-LinkSwitch\file_D-LinkSwitch_159.csv
combined  20  csv files
D-LinkSwitch  model saved


Device: D-LinkWaterSensor


csv_full_fingerprint/D-LinkWaterSensor\file_D-LinkWaterSensor_160.csv
csv_full_f

csv_full_fingerprint/HomeMaticPlug\file_HomeMaticPlug_290.csv
csv_full_fingerprint/HomeMaticPlug\file_HomeMaticPlug_291.csv
csv_full_fingerprint/HomeMaticPlug\file_HomeMaticPlug_292.csv
csv_full_fingerprint/HomeMaticPlug\file_HomeMaticPlug_293.csv
csv_full_fingerprint/HomeMaticPlug\file_HomeMaticPlug_294.csv
csv_full_fingerprint/HomeMaticPlug\file_HomeMaticPlug_295.csv
csv_full_fingerprint/HomeMaticPlug\file_HomeMaticPlug_296.csv
csv_full_fingerprint/HomeMaticPlug\file_HomeMaticPlug_297.csv
csv_full_fingerprint/HomeMaticPlug\file_HomeMaticPlug_298.csv
csv_full_fingerprint/HomeMaticPlug\file_HomeMaticPlug_299.csv
combined  20  csv files
HomeMaticPlug  model saved


Device: HueBridge


csv_full_fingerprint/HueBridge\file_HueBridge_300.csv
csv_full_fingerprint/HueBridge\file_HueBridge_301.csv
csv_full_fingerprint/HueBridge\file_HueBridge_302.csv
csv_full_fingerprint/HueBridge\file_HueBridge_303.csv
csv_full_fingerprint/HueBridge\file_HueBridge_304.csv
csv_full_fingerprint/HueBridge\file_H

csv_full_fingerprint/TP-LinkPlugHS110\file_TP-LinkPlugHS110_442.csv
csv_full_fingerprint/TP-LinkPlugHS110\file_TP-LinkPlugHS110_443.csv
csv_full_fingerprint/TP-LinkPlugHS110\file_TP-LinkPlugHS110_444.csv
csv_full_fingerprint/TP-LinkPlugHS110\file_TP-LinkPlugHS110_445.csv
csv_full_fingerprint/TP-LinkPlugHS110\file_TP-LinkPlugHS110_446.csv
csv_full_fingerprint/TP-LinkPlugHS110\file_TP-LinkPlugHS110_447.csv
csv_full_fingerprint/TP-LinkPlugHS110\file_TP-LinkPlugHS110_448.csv
csv_full_fingerprint/TP-LinkPlugHS110\file_TP-LinkPlugHS110_449.csv
csv_full_fingerprint/TP-LinkPlugHS110\file_TP-LinkPlugHS110_450.csv
csv_full_fingerprint/TP-LinkPlugHS110\file_TP-LinkPlugHS110_451.csv
csv_full_fingerprint/TP-LinkPlugHS110\file_TP-LinkPlugHS110_452.csv
csv_full_fingerprint/TP-LinkPlugHS110\file_TP-LinkPlugHS110_453.csv
csv_full_fingerprint/TP-LinkPlugHS110\file_TP-LinkPlugHS110_454.csv
csv_full_fingerprint/TP-LinkPlugHS110\file_TP-LinkPlugHS110_455.csv
csv_full_fingerprint/TP-LinkPlugHS110\file_TP-Li

### Experiment IoT sentinel 
- load csv file (each csv file  represent 1 fingerprint)
- filter n unique rows , n=12
- combine n rows into 1 instance with (n * 23) features
- return df shape(number of fingerprint, (n * 23) features)

In [3]:
def combine_all_data(path,rows=12): # in iotsentinel number of packet is 12
    device_label=os.listdir(path)
    i=0
    j=0
    li = []
    while i < len(device_label):
        filepath = csv_filepath(path,device_label[i])
        for filename in filepath:
            df = pd.read_csv(filename, index_col=None,header=None, delimiter = '\t')
            df = df.iloc[:,:-1]      #exclude label column
            df = df.drop_duplicates() 
            df = df[:rows]           #select n uniques row
            
            if df.shape[0]<rows:     #fill with 0 if row less than n
                k = df.shape[0]
                while k < rows:   
                    temp = pd.DataFrame([[0 for i in range(len(df.columns))]], columns = df.columns)
                    df = df.append(temp)
                    k += 1
            df.reset_index(drop = True, inplace = True)
            
            #combine all rows into 1
            df=instances_combine(df,rows)
            
            #add label col
            df[df.shape[1]+1]=device_label[i]
            
            li.append(df)
            
            j+=1
            
        print('combined ',device_label[i],' csv files')
        i+=1
    print('all ',j,' files combined')
    frame = pd.concat(li, axis=0,ignore_index=True)
    return frame
        

In [4]:
#combine all rows into 1
def instances_combine(df, rows):

    x=df.iloc[0].tolist()
    i=1
    
    while i < rows:
        y=df.iloc[i].tolist()
        x.extend(y)
        #print(len(x))
        i+=1
    ddf = pd.DataFrame(data = [x])
    return ddf


In [11]:
def split_data(df):
    #df.loc[:,23]=1   # mark inliers to 1, as for outlier is -1
    last=df.shape[1]
    X = df.loc[:,:last-1]
    y = df.loc[:,last]
    return X,y

def change_label(df,device_label='Aria'):

    label_col = df.shape[1]
    df[label_col] = df[label_col].where(df[label_col]==device_label,other="Other")
    return df

In [8]:
inputdir = r'csv_full_fingerprint'
full_df = combine_all_data(inputdir,rows=12)

combined  Aria  csv files
combined  D-LinkCam  csv files
combined  D-LinkDayCam  csv files
combined  D-LinkDoorSensor  csv files
combined  D-LinkHomeHub  csv files
combined  D-LinkSensor  csv files
combined  D-LinkSiren  csv files
combined  D-LinkSwitch  csv files
combined  D-LinkWaterSensor  csv files
combined  EdimaxCam1  csv files
combined  EdimaxCam2  csv files
combined  EdimaxPlug1101W  csv files
combined  EdimaxPlug2101W  csv files
combined  EdnetCam1  csv files
combined  EdnetCam2  csv files
combined  EdnetGateway  csv files
combined  HomeMaticPlug  csv files
combined  HueBridge  csv files
combined  HueSwitch  csv files
combined  iKettle2  csv files
combined  Lightify  csv files
combined  MAXGateway  csv files
combined  SmarterCoffee  csv files
combined  TP-LinkPlugHS100  csv files
combined  TP-LinkPlugHS110  csv files
combined  WeMoInsightSwitch  csv files
combined  WeMoInsightSwitch2  csv files
combined  WeMoLink  csv files
combined  WeMoSwitch  csv files
combined  WeMoSwitch2

In [9]:
print(full_df.shape)
print(full_df[277].unique(), " devices")
#full_df.head(2)
df=full_df
print(len(df[df.shape[1]].unique()))

(550, 277)
['Aria' 'D-LinkCam' 'D-LinkDayCam' 'D-LinkDoorSensor' 'D-LinkHomeHub'
 'D-LinkSensor' 'D-LinkSiren' 'D-LinkSwitch' 'D-LinkWaterSensor'
 'EdimaxCam1' 'EdimaxCam2' 'EdimaxPlug1101W' 'EdimaxPlug2101W' 'EdnetCam1'
 'EdnetCam2' 'EdnetGateway' 'HomeMaticPlug' 'HueBridge' 'HueSwitch'
 'iKettle2' 'Lightify' 'MAXGateway' 'SmarterCoffee' 'TP-LinkPlugHS100'
 'TP-LinkPlugHS110' 'WeMoInsightSwitch' 'WeMoInsightSwitch2' 'WeMoLink'
 'WeMoSwitch' 'WeMoSwitch2' 'Withings']  devices
31


In [12]:
#training part , change others label to "other", crossvalidate, train with rf, save model 
from sklearn.model_selection import StratifiedKFold, KFold

df=full_df
df= change_label(df,'Aria')

X, y =split_data(df)
skf = StratifiedKFold(n_splits=10)
for train, test in skf.split(X, y):
    print('train -  {}   |   test -  {}'.format(np.bincount(y[train]), np.bincount(y[test])))

ValueError: invalid literal for int() with base 10: 'Aria'

In [None]:
#iot sentinel experiment
inputdir = r'csv_full_fingerprint'
outputdir = 'classification2_models'
device_label=os.listdir(inputdir)
i=0
while i < len(device_label):
    path = csv_filepath(inputdir,device_label[i])
    print('Device: '+device_label[i]+'\n\n')
    

## Performance measure 
### Test with other device

In [92]:
def sav_filepath(path):
    filepath=path+'/*.sav'
    all_files = glob.glob(filepath)
    return all_files

In [93]:
filename = 'classification_models'
print(sav_filepath(filename))

['classification_models\\model_Aria.sav', 'classification_models\\model_D-LinkCam.sav', 'classification_models\\model_D-LinkDayCam.sav', 'classification_models\\model_D-LinkDoorSensor.sav', 'classification_models\\model_D-LinkHomeHub.sav', 'classification_models\\model_D-LinkSensor.sav', 'classification_models\\model_D-LinkSiren.sav', 'classification_models\\model_D-LinkSwitch.sav', 'classification_models\\model_D-LinkWaterSensor.sav', 'classification_models\\model_EdimaxCam1.sav', 'classification_models\\model_EdimaxCam2.sav', 'classification_models\\model_EdimaxPlug1101W.sav', 'classification_models\\model_EdimaxPlug2101W.sav', 'classification_models\\model_EdnetCam1.sav', 'classification_models\\model_EdnetCam2.sav', 'classification_models\\model_EdnetGateway.sav', 'classification_models\\model_HomeMaticPlug.sav', 'classification_models\\model_HueBridge.sav', 'classification_models\\model_HueSwitch.sav', 'classification_models\\model_iKettle2.sav', 'classification_models\\model_Ligh

In [78]:
filename = 'classification_models/model_Aria.sav'
#load model
model = pickle.load(open(filename, 'rb'))
# detect outliers in the test set
yhat = model.predict(testX)

In [100]:
# performance measures
def performances(testy,yhat):
    score = f1_score(testy, yhat,pos_label=-1)
    acc= accuracy_score(testy,yhat)
    print('F1 Score: %.6f' % score)
    print('Accuracy Score: %.3f'% acc)
    print('Confusion matrix:')
    confusion_matrix(testy,yhat)

In [9]:
# mark inliers to 1 for testing
def label_inliers(label,y):
    y[y[:].isin([label])] = 1
    y =y.astype('int64')
    return y

In [163]:
import sys
print(sys.executable)

C:\Users\onewa\.conda\envs\test-env\python.exe
