# DATOS YA PROCESADOS

In [1]:
import pandas as pd
import numpy as np
from sklearn.cluster import KMeans, MiniBatchKMeans, Birch, DBSCAN
from sklearn.neighbors import LocalOutlierFactor
from sklearn.covariance import EllipticEnvelope
from sklearn.ensemble import IsolationForest
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, matthews_corrcoef, roc_auc_score

df_UNSW = pd.read_csv('UNSW_trabajados.csv')
# class_numeric -> normal: 0 ; attacker:1

In [2]:
df_UNSW.head()

Unnamed: 0.1,Unnamed: 0,Src IP Addr,Src Pt,Dst IP Addr,Dst Pt,class,Same_ip_src,Same_ip_dst,Same_ip_src_port_dst,Same_ip_dst_port_src,class_numeric
0,0,59.166.0.9,7045,149.171.126.7,25,Normal,26398,26075,1120,1,0
1,1,59.166.0.9,9685,149.171.126.2,80,Normal,26398,26990,2705,2,0
2,2,59.166.0.2,1421,149.171.126.4,53,Normal,27050,27120,5249,1,0
3,3,59.166.0.2,21553,149.171.126.2,25,Normal,27050,26990,1138,1,0
4,4,59.166.0.8,45212,149.171.126.4,53,Normal,26024,27120,5072,1,0


In [3]:
def print_results(metodo, real, prediccion):
    print(metodo + ' Accuracy:\t\t', round(accuracy_score(real, prediccion), 4))
    print(metodo + ' Precision:\t\t', round(precision_score(real, prediccion), 4))
    print(metodo + ' Recall:\t\t', round(recall_score(real, prediccion), 4))
    print(metodo + ' F1 score:\t\t', round(f1_score(real, prediccion), 4))
    print(metodo + ' Mathews Corr.:\t', round(matthews_corrcoef(real, prediccion), 4))
    print(metodo + ' AUC:\t\t', round(roc_auc_score(real, prediccion), 4))
    print()
    print()

# KMeans

In [None]:
kmeans = KMeans(n_clusters=2, n_init=10, random_state=0, algorithm='full').fit(df_UNSW[['Same_ip_src','Same_ip_dst','Same_ip_src_port_dst','Same_ip_dst_port_src']])
print_results('UNSW', df_UNSW['class_numeric'], kmeans.labels_)

# MiniBatch KMeans

In [None]:
minibatch = MiniBatchKMeans(n_clusters=2,n_init=10, random_state=0).fit(df_UNSW[['Same_ip_src','Same_ip_dst','Same_ip_src_port_dst','Same_ip_dst_port_src']])
print_results('UNSW', df_UNSW['class_numeric'], minibatch.labels_)

# Birch

In [None]:
## Problemas con inconsistent ...

birch = Birch(n_clusters=2, threshold=0.1, branching_factor=10).fit_predict(df_UNSW[['Same_ip_src','Same_ip_dst','Same_ip_src_port_dst','Same_ip_dst_port_src']])
print_results('UNSW', df_UNSW['class_numeric'], birch)

# MinCovDet

In [None]:
minCovDet = EllipticEnvelope(assume_centered=False, contamination=0.4, random_state=0).fit_predict(df_UNSW[['Same_ip_src','Same_ip_dst','Same_ip_src_port_dst','Same_ip_dst_port_src']])
minCovDet[minCovDet == 1] = 0
minCovDet[minCovDet == -1] = 1
print_results('UNSW', df_UNSW['class_numeric'], minCovDet)

# Isolation Forest

In [None]:
isolation = IsolationForest(n_estimators=100, contamination=0.4, random_state=4, behaviour='new').fit_predict(df_UNSW[['Same_ip_src','Same_ip_dst','Same_ip_src_port_dst','Same_ip_dst_port_src']])
isolation[isolation == 1] = 0
isolation[isolation == -1] = 1
print_results('UNSW', df_UNSW['class_numeric'], isolation)