In [1]:
import pandas as pd
import numpy as np
from sklearn.cluster import KMeans, MiniBatchKMeans, Birch
from sklearn.neighbors import LocalOutlierFactor
from sklearn.covariance import EllipticEnvelope
from sklearn.ensemble import IsolationForest
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, matthews_corrcoef, roc_auc_score

df_ISCX = pd.read_csv('ISCX_trabajados.csv')
# class_numeric -> normal: 0 ; attacker:1

In [2]:
df_ISCX.head()

Unnamed: 0.1,Unnamed: 0,Src IP Addr,Src Pt,Dst IP Addr,Dst Pt,class,Same_ip_src,Same_ip_dst,Same_ip_src_port_dst,Same_ip_dst_port_src,class_numeric
0,0,192.168.5.122,5353,224.0.0.251,5353,Normal,37698,16,14,16,0
1,1,192.168.5.122,5353,224.0.0.251,5353,Normal,37698,16,14,16,0
2,2,192.168.2.113,4191,192.168.5.122,80,Normal,3026,27078,988,12,0
3,3,192.168.2.113,4191,192.168.5.122,80,Normal,3026,27078,988,12,0
4,4,192.168.2.113,4192,207.241.148.80,80,Normal,3026,154,988,2,0


In [3]:
def print_results(metodo, real, prediccion):
    print(metodo + ' Accuracy:\t\t', round(accuracy_score(real, prediccion), 4))
    print(metodo + ' Precision:\t\t', round(precision_score(real, prediccion), 4))
    print(metodo + ' Recall:\t\t', round(recall_score(real, prediccion), 4))
    print(metodo + ' F1 score:\t\t', round(f1_score(real, prediccion), 4))
    print(metodo + ' Mathews Corr.:\t', round(matthews_corrcoef(real, prediccion), 4))
    print(metodo + ' AUC:\t\t', round(roc_auc_score(real, prediccion), 4))
    print()
    print()

# KMeans

In [4]:
kmeans = KMeans(n_clusters=2, n_init=10, random_state=0, algorithm='full').fit(df_ISCX[['Same_ip_src','Same_ip_dst','Same_ip_src_port_dst','Same_ip_dst_port_src']])
print_results('ISCX', df_ISCX['class_numeric'], kmeans.labels_)

ISCX Accuracy:		 0.4081
ISCX Precision:		 0.1109
ISCX Recall:		 0.9992
ISCX F1 score:		 0.1997
ISCX Mathews Corr.:	 0.1997
ISCX AUC:		 0.6801




# MiniBatch KMeans

In [5]:
minibatch = MiniBatchKMeans(n_clusters=2,n_init=10, random_state=0).fit(df_ISCX[['Same_ip_src','Same_ip_dst','Same_ip_src_port_dst','Same_ip_dst_port_src']])
print_results('ISCX', df_ISCX['class_numeric'], minibatch.labels_)

ISCX Accuracy:		 0.4104
ISCX Precision:		 0.1113
ISCX Recall:		 0.9992
ISCX F1 score:		 0.2003
ISCX Mathews Corr.:	 0.2008
ISCX AUC:		 0.6813




# Birch

In [None]:
birch = Birch(n_clusters=2, threshold=0.1, branching_factor=10).fit_predict(df_ISCX[['Same_ip_src','Same_ip_dst','Same_ip_src_port_dst','Same_ip_dst_port_src']])
print_results('ISCX', df_ISCX['class_numeric'], birch)

# MinCovDet

In [6]:
minCovDet = EllipticEnvelope(assume_centered=False, contamination=0.2, random_state=4).fit_predict(df_ISCX[['Same_ip_src','Same_ip_dst','Same_ip_src_port_dst','Same_ip_dst_port_src']])
minCovDet[minCovDet == 1] = 0
minCovDet[minCovDet == -1] = 1
print_results('ISCX', df_ISCX['class_numeric'], minCovDet)

ISCX Accuracy:		 0.8737
ISCX Precision:		 0.3618
ISCX Recall:		 0.9289
ISCX F1 score:		 0.5208
ISCX Mathews Corr.:	 0.5325
ISCX AUC:		 0.8991




# Isolation Forest

In [7]:
isolation = IsolationForest(n_estimators=100, contamination=0.2, random_state=0, behaviour='new').fit_predict(df_ISCX[['Same_ip_src','Same_ip_dst','Same_ip_src_port_dst','Same_ip_dst_port_src']])
isolation[isolation == 1] = 0
isolation[isolation == -1] = 1
print_results('ISCX', df_ISCX['class_numeric'], isolation)

ISCX Accuracy:		 0.7999
ISCX Precision:		 0.1819
ISCX Recall:		 0.4888
ISCX F1 score:		 0.2652
ISCX Mathews Corr.:	 0.2056
ISCX AUC:		 0.6567


