# Análisis de resultados

In [1]:
# Importaciones
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import numpy as np
from sklearn.ensemble import RandomForestClassifier
from sklearn.tree import DecisionTreeClassifier
from sklearn.neighbors import KNeighborsClassifier
from sklearn.neural_network import MLPClassifier
from scipy import stats
import matplotlib.pyplot as plt
import seaborn as sns

## 1. Utils

In [2]:
SEED = 9603

bagging_model = [
  {
    'path_name' : 'rf', 
    'model_name' : 'RandomForestClassifier',
  },
  {
    'path_name' : 'dt', 
    'model_name' : 'DecisionTreeClassifier',
  },
  {
    'path_name' : 'mlp',
    'model_name' : 'MLPClassifier',
  },
  {
    'path_name' : 'knn',
    'model_name' : 'KNeighborsClassifier'
  },
]

# Número de modelos 
prng = np.random.RandomState(seed=SEED)
max_int32 = np.iinfo(np.int32).max
SEEDS_POR_MODELO = prng.randint(0, max_int32, size=len(bagging_model))
print(SEEDS_POR_MODELO)

iter_list   = ['Iteración 1', 'Iteración 2', 'Iteración 3', 'Iteración 4', 'Iteración 5', 'Iteración 6', 'Iteración 7', 'Iteración 8', 'Iteración 9', 'Iteración 10']
metric_list = ['Exac', 'Prec', 'Sens', 'F1', 'ms', 'AUC_PS']
label_list  = ['BENIGN', 'BOT', 'DDOS', 'DOS_GOLDENEYE', 'DOS_HULK', 'DOS_SLOWHTTPTEST', 'DOS_SLOWLORIS', 'FTP_PATATOR', 'PORTSCAN', 'SSH_PATATOR']
model_list  = ['RandomForestClassifier', 'DecisionTreeClassifier', 'MLPClassifier', 'KNeighborsClassifier', 'IDSBaggingClassifier']

[793494059 498241738 377997800 912782427]


In [3]:
# Resultados ordenados por métrica
df_results = pd.read_excel('DB/testing_results.xlsx')
df_results['iter'] = pd.Categorical(df_results['iter'], categories=iter_list, ordered=True)
df_results

Unnamed: 0,iter,model,label,TP,TN,FP,FN,Exac,Prec,Sens,F1,ms,AUC_PS
0,Iteración 1,RandomForestClassifier,BENIGN,11567,5416,6,4,0.999412,0.999482,0.999654,0.999568,72645.400011,0.999999
1,Iteración 1,RandomForestClassifier,BOT,8,16984,0,1,0.999941,1.000000,0.888889,0.941176,72645.400011,1.000000
2,Iteración 1,RandomForestClassifier,DDOS,1189,15804,0,0,1.000000,1.000000,1.000000,1.000000,72645.400011,1.000000
3,Iteración 1,RandomForestClassifier,DOS_GOLDENEYE,92,16894,5,2,0.999588,0.948454,0.978723,0.963351,72645.400011,0.995983
4,Iteración 1,RandomForestClassifier,DOS_HULK,1974,15011,1,7,0.999529,0.999494,0.996466,0.997978,72645.400011,0.999750
...,...,...,...,...,...,...,...,...,...,...,...,...,...
495,Iteración 10,IDSBaggingClassifier,DOS_SLOWHTTPTEST,20,16970,0,2,0.999882,1.000000,0.909091,0.952381,427326.000063,0.984790
496,Iteración 10,IDSBaggingClassifier,DOS_SLOWLORIS,50,16941,1,0,0.999941,0.980392,1.000000,0.990099,427326.000063,1.000000
497,Iteración 10,IDSBaggingClassifier,FTP_PATATOR,50,16942,0,0,1.000000,1.000000,1.000000,1.000000,427326.000063,1.000000
498,Iteración 10,IDSBaggingClassifier,PORTSCAN,1989,15002,0,1,0.999941,1.000000,0.999497,0.999749,427326.000063,0.999568


In [4]:
# Resultados ordenados por iteración
df_results_ordered = df_results.copy()
df_results_ordered = df_results_ordered.drop(
  ['TP', 'TN', 'FP', 'FN'], axis=1
).melt(
  id_vars=['iter', 'model', 'label'], var_name='metric', value_name='value'
).pivot(
  index=['metric', 'label', 'model'], columns='iter', values='value'
).reset_index()

# df_results_ordered.columns = ['{}_{}'.format(*col) if isinstance(col, tuple) else col for col in df_results_ordered.columns]
df_results_ordered['metric'] = pd.Categorical(df_results_ordered['metric'], categories=metric_list, ordered=True)
df_results_ordered['label'] = pd.Categorical(df_results_ordered['label'], categories=label_list, ordered=True)
df_results_ordered['model'] = pd.Categorical(df_results_ordered['model'], categories=model_list, ordered=True)

df_results_ordered = df_results_ordered.sort_values(by=['metric', 'label', 'model'])
df_results_ordered

iter,metric,label,model,Iteración 1,Iteración 2,Iteración 3,Iteración 4,Iteración 5,Iteración 6,Iteración 7,Iteración 8,Iteración 9,Iteración 10
54,Exac,BENIGN,RandomForestClassifier,0.999412,0.999529,0.999470,0.999765,0.999588,0.999823,0.999294,0.999647,0.999353,0.998176
50,Exac,BENIGN,DecisionTreeClassifier,0.999058,0.998941,0.998823,0.997940,0.998588,0.999058,0.998588,0.998705,0.997234,0.998823
53,Exac,BENIGN,MLPClassifier,0.989290,0.988995,0.989113,0.988642,0.988584,0.987407,0.988642,0.990467,0.987641,0.989524
52,Exac,BENIGN,KNeighborsClassifier,0.997999,0.998411,0.997999,0.998058,0.998352,0.998293,0.998588,0.998764,0.998529,0.998117
51,Exac,BENIGN,IDSBaggingClassifier,0.999353,0.999353,0.999529,0.999588,0.999588,0.999529,0.999588,0.999470,0.999529,0.999470
...,...,...,...,...,...,...,...,...,...,...,...,...,...
49,AUC_PS,SSH_PATATOR,RandomForestClassifier,1.000000,0.999644,1.000000,1.000000,1.000000,1.000000,1.000000,0.995876,1.000000,1.000000
45,AUC_PS,SSH_PATATOR,DecisionTreeClassifier,0.986516,0.973032,0.986842,1.000000,0.948054,1.000000,1.000000,0.960230,0.986516,1.000000
48,AUC_PS,SSH_PATATOR,MLPClassifier,1.000000,1.000000,1.000000,1.000000,1.000000,1.000000,1.000000,1.000000,1.000000,1.000000
47,AUC_PS,SSH_PATATOR,KNeighborsClassifier,0.986842,1.000000,1.000000,1.000000,1.000000,1.000000,1.000000,1.000000,0.972656,1.000000


In [5]:
with pd.ExcelWriter('output/df_results_ordered_by_iter.xlsx', engine='openpyxl') as writer:
  for metric in metric_list : 
    for label in label_list : 
      df_results_ordered[
        (df_results_ordered['metric'] == metric) &
        (df_results_ordered['label'] == label)
      ].to_excel(writer, sheet_name=f'{metric}_{label}', index=False)

In [6]:
df_results_ordered_by_model = df_results.copy()
df_results_ordered_by_model['model'] = pd.Categorical(df_results_ordered_by_model['model'], categories=model_list, ordered=True)
df_results_ordered_by_model = df_results_ordered_by_model.drop(
  ['TP', 'TN', 'FP', 'FN'], axis=1
).melt(
  id_vars=['iter', 'model', 'label'], var_name='metric', value_name='value'
).pivot(
  index=['metric', 'label', 'iter'], columns='model', values='value'
).reset_index()

# df_results_ordered.columns = ['{}_{}'.format(*col) if isinstance(col, tuple) else col for col in df_results_ordered.columns]
df_results_ordered_by_model['iter'] = pd.Categorical(df_results_ordered_by_model['iter'], categories=iter_list, ordered=True)
df_results_ordered_by_model['metric'] = pd.Categorical(df_results_ordered_by_model['metric'], categories=metric_list, ordered=True)
df_results_ordered_by_model['label'] = pd.Categorical(df_results_ordered_by_model['label'], categories=label_list, ordered=True)

df_results_ordered_by_model = df_results_ordered_by_model.sort_values(by=['metric', 'label', 'iter'])
df_results_ordered_by_model

model,metric,label,iter,RandomForestClassifier,DecisionTreeClassifier,MLPClassifier,KNeighborsClassifier,IDSBaggingClassifier
100,Exac,BENIGN,Iteración 1,0.999412,0.999058,0.989290,0.997999,0.999353
101,Exac,BENIGN,Iteración 2,0.999529,0.998941,0.988995,0.998411,0.999353
102,Exac,BENIGN,Iteración 3,0.999470,0.998823,0.989113,0.997999,0.999529
103,Exac,BENIGN,Iteración 4,0.999765,0.997940,0.988642,0.998058,0.999588
104,Exac,BENIGN,Iteración 5,0.999588,0.998588,0.988584,0.998352,0.999588
...,...,...,...,...,...,...,...,...
95,AUC_PS,SSH_PATATOR,Iteración 6,1.000000,1.000000,1.000000,1.000000,1.000000
96,AUC_PS,SSH_PATATOR,Iteración 7,1.000000,1.000000,1.000000,1.000000,1.000000
97,AUC_PS,SSH_PATATOR,Iteración 8,0.995876,0.960230,1.000000,1.000000,1.000000
98,AUC_PS,SSH_PATATOR,Iteración 9,1.000000,0.986516,1.000000,0.972656,0.999279


In [7]:
with pd.ExcelWriter('output/df_results_ordered_by_model.xlsx', engine='openpyxl') as writer: 
  for metric in metric_list : 
    for label in label_list : 
      df_results_ordered_by_model[
        (df_results_ordered_by_model['metric'] == metric) &
        (df_results_ordered_by_model['label'] == label)
      ].to_excel(writer, sheet_name=f'{metric}_{label}', index=False)

In [8]:
df_results_ordered[
  (df_results_ordered['metric'] == 'Exac') & 
  (df_results_ordered['label'] == 'BENIGN') & 
  (df_results_ordered['model'] == 'RandomForestClassifier')
][iter_list].iloc[0].to_list()

[0.999411522391573,
 0.9995292179132584,
 0.9994703701524157,
 0.9997646089566292,
 0.9995880656741011,
 0.9998234567174719,
 0.9992938268698875,
 0.9996469134349438,
 0.99935263653484,
 0.9981756120527306]

## Test de Friedmann

In [9]:
""" Test de Friedman """
# H0 (hipótesis nula): No hay diferencias significativas entre los grupos comparados (los modelos rinden igual).
# H1 (hipótesis alternativa): Al menos un grupo difiere significativamente de los demás.
# Nivel de significancia = 0.05
# Si p <= nivel de significancia  ->  Se rechaza H0 (al menos una muestra difiere de las demás)

df_test_friedmann = pd.DataFrame(columns=["metric", "label", "gl", "stadistic", "p_value", "las muestras difieren?"])
nivel_significancia = 0.05

for metric in metric_list : 
  for label in label_list :
    lista_friedman = []
    for model in model_list : 
      mask = (
        (df_results_ordered['metric'] == metric) & 
        (df_results_ordered['label'] == label) & 
        (df_results_ordered['model'] == model)
      )
      muestra = df_results_ordered[mask][iter_list].iloc[0].to_list()
      lista_friedman.append(muestra)
    stat, p = stats.friedmanchisquare(*lista_friedman)
    df_test_friedmann.loc[len(df_test_friedmann)] = [
      metric,
      label,
      len(lista_friedman[0])-1,
      stat,
      p,
      p <= nivel_significancia 
    ]

df_test_friedmann

Unnamed: 0,metric,label,gl,stadistic,p_value,las muestras difieren?
0,Exac,BENIGN,9,35.111111,4.407507e-07,True
1,Exac,BOT,9,35.068783,4.496648e-07,True
2,Exac,DDOS,9,16.671329,0.002238784,True
3,Exac,DOS_GOLDENEYE,9,33.959596,7.595428e-07,True
4,Exac,DOS_HULK,9,29.394872,6.498504e-06,True
5,Exac,DOS_SLOWHTTPTEST,9,29.723404,5.571788e-06,True
6,Exac,DOS_SLOWLORIS,9,32.676923,1.390935e-06,True
7,Exac,FTP_PATATOR,9,5.892473,0.2073236,False
8,Exac,PORTSCAN,9,25.585799,3.83504e-05,True
9,Exac,SSH_PATATOR,9,7.459459,0.1135103,False


In [10]:
df_results_ordered

iter,metric,label,model,Iteración 1,Iteración 2,Iteración 3,Iteración 4,Iteración 5,Iteración 6,Iteración 7,Iteración 8,Iteración 9,Iteración 10
54,Exac,BENIGN,RandomForestClassifier,0.999412,0.999529,0.999470,0.999765,0.999588,0.999823,0.999294,0.999647,0.999353,0.998176
50,Exac,BENIGN,DecisionTreeClassifier,0.999058,0.998941,0.998823,0.997940,0.998588,0.999058,0.998588,0.998705,0.997234,0.998823
53,Exac,BENIGN,MLPClassifier,0.989290,0.988995,0.989113,0.988642,0.988584,0.987407,0.988642,0.990467,0.987641,0.989524
52,Exac,BENIGN,KNeighborsClassifier,0.997999,0.998411,0.997999,0.998058,0.998352,0.998293,0.998588,0.998764,0.998529,0.998117
51,Exac,BENIGN,IDSBaggingClassifier,0.999353,0.999353,0.999529,0.999588,0.999588,0.999529,0.999588,0.999470,0.999529,0.999470
...,...,...,...,...,...,...,...,...,...,...,...,...,...
49,AUC_PS,SSH_PATATOR,RandomForestClassifier,1.000000,0.999644,1.000000,1.000000,1.000000,1.000000,1.000000,0.995876,1.000000,1.000000
45,AUC_PS,SSH_PATATOR,DecisionTreeClassifier,0.986516,0.973032,0.986842,1.000000,0.948054,1.000000,1.000000,0.960230,0.986516,1.000000
48,AUC_PS,SSH_PATATOR,MLPClassifier,1.000000,1.000000,1.000000,1.000000,1.000000,1.000000,1.000000,1.000000,1.000000,1.000000
47,AUC_PS,SSH_PATATOR,KNeighborsClassifier,0.986842,1.000000,1.000000,1.000000,1.000000,1.000000,1.000000,1.000000,0.972656,1.000000


In [11]:
""" Test de Wilcoxon """
# H0 (hipótesis nula): La mediana de las diferencias es igual a 0
# H1 (hipótesis alternativa): La mediana de las diferencias es diferente de 0
# Nivel de significancia = 0.05
# Si p <= nivel de significancia  ->  Se rechaza H0 (al menos una muestra difiere de las demás)
df_test_wilcoxon = pd.DataFrame(columns=["metric", "label", "models", "gl", "stadistic", "p_value", "las muestras difieren?"])
nivel_significancia = 0.0125
list_models = ['RandomForestClassifier', 'DecisionTreeClassifier', 'MLPClassifier', 'KNeighborsClassifier']

for metric in metric_list : 
  for label in label_list :
    lista_wilcoxon = []
    for model in ['IDSBaggingClassifier']+list_models :
      muestra = df_results_ordered[
        (df_results_ordered['metric'] == metric) & 
        (df_results_ordered['label'] == label) & 
        (df_results_ordered['model'] == model)
      ][iter_list].iloc[0].to_list()
      if model == 'IDSBaggingClassifier' : 
        muestra_bagging_model = muestra
      else : 
        stat, p = stats.wilcoxon(muestra_bagging_model, muestra, alternative='greater')

        df_test_wilcoxon.loc[len(df_test_wilcoxon)] = [
          metric,
          label,
          f'IDSBaggingClassifier > {model}',
          4,
          stat,
          p,
          p <= nivel_significancia
        ]
df_test_wilcoxon

  z = (r_plus - mn) / se
  z = (r_plus - mn) / se
  z = (r_plus - mn) / se
  z = (r_plus - mn) / se
  z = (r_plus - mn) / se


Unnamed: 0,metric,label,models,gl,stadistic,p_value,las muestras difieren?
0,Exac,BENIGN,IDSBaggingClassifier > RandomForestClassifier,4,24.5,0.423828,False
1,Exac,BENIGN,IDSBaggingClassifier > DecisionTreeClassifier,4,55.0,0.000977,True
2,Exac,BENIGN,IDSBaggingClassifier > MLPClassifier,4,55.0,0.000977,True
3,Exac,BENIGN,IDSBaggingClassifier > KNeighborsClassifier,4,55.0,0.000977,True
4,Exac,BOT,IDSBaggingClassifier > RandomForestClassifier,4,0.0,1.000000,False
...,...,...,...,...,...,...,...
235,AUC_PS,PORTSCAN,IDSBaggingClassifier > KNeighborsClassifier,4,25.0,0.031250,False
236,AUC_PS,SSH_PATATOR,IDSBaggingClassifier > RandomForestClassifier,4,14.0,0.710938,False
237,AUC_PS,SSH_PATATOR,IDSBaggingClassifier > DecisionTreeClassifier,4,39.0,0.025391,False
238,AUC_PS,SSH_PATATOR,IDSBaggingClassifier > MLPClassifier,4,5.0,0.984375,False


In [18]:
df_test_wilcoxon.to_excel('output/df_wilcoxon_greater.xlsx', index=False)

In [13]:
with pd.ExcelWriter('output/analisis_descriptivo.xlsx', engine='openpyxl') as writer: 
  for metric in metric_list : 
    for label in label_list : 
      df_results_ordered_by_model[
        (df_results_ordered_by_model['metric'] == metric) & 
        (df_results_ordered_by_model['label'] == label) 
      ][model_list].describe().to_excel(writer, sheet_name=f'{metric}_{label}')

In [14]:

df_results_ordered_by_model[
  (df_results_ordered_by_model['metric'] == 'Exac') & 
  (df_results_ordered_by_model['label'] == 'BENIGN') 
][model_list].describe()

model,RandomForestClassifier,DecisionTreeClassifier,MLPClassifier,KNeighborsClassifier,IDSBaggingClassifier
count,10.0,10.0,10.0,10.0,10.0
mean,0.999406,0.998576,0.988831,0.998311,0.9995
std,0.000465,0.000572,0.000885,0.000266,8.9e-05
min,0.998176,0.997234,0.987407,0.997999,0.999353
25%,0.999367,0.998588,0.988598,0.998073,0.99947
50%,0.9995,0.998764,0.988819,0.998323,0.999529
75%,0.999632,0.998911,0.989246,0.998499,0.999573
max,0.999823,0.999058,0.990467,0.998764,0.999588
