# Análisis de resultados

In [4]:
# Importaciones
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import numpy as np
from sklearn.ensemble import RandomForestClassifier
from sklearn.tree import DecisionTreeClassifier
from sklearn.neighbors import KNeighborsClassifier
from sklearn.neural_network import MLPClassifier
from scipy import stats
from statsmodels.stats.multitest import multipletests
import matplotlib.pyplot as plt
import seaborn as sns

## 1. Utils

In [5]:
SEED = 9603

bagging_model = [
  {
    'path_name' : 'rf', 
    'model_name' : 'RandomForestClassifier',
  },
  {
    'path_name' : 'dt', 
    'model_name' : 'DecisionTreeClassifier',
  },
  {
    'path_name' : 'mlp',
    'model_name' : 'MLPClassifier',
  },
  {
    'path_name' : 'knn',
    'model_name' : 'KNeighborsClassifier'
  },
]

# Número de modelos 
prng = np.random.RandomState(seed=SEED)
max_int32 = np.iinfo(np.int32).max
SEEDS_POR_MODELO = prng.randint(0, max_int32, size=len(bagging_model))
print(SEEDS_POR_MODELO)

iter_list   = ['Iteración 1', 'Iteración 2', 'Iteración 3', 'Iteración 4', 'Iteración 5', 'Iteración 6', 'Iteración 7', 'Iteración 8', 'Iteración 9', 'Iteración 10']
metric_list = ['Exac', 'Prec', 'Sens', 'F1', 'ms', 'AUC_PS']
label_list  = ['BENIGN', 'BOT', 'DDOS', 'DOS_GOLDENEYE', 'DOS_HULK', 'DOS_SLOWHTTPTEST', 'DOS_SLOWLORIS', 'FTP_PATATOR', 'PORTSCAN', 'SSH_PATATOR']
model_list  = ['RandomForestClassifier', 'DecisionTreeClassifier', 'MLPClassifier', 'KNeighborsClassifier', 'IDSBaggingClassifier']

[793494059 498241738 377997800 912782427]


In [6]:
# Resultados ordenados por métrica
df_results = pd.read_excel('DB/testing_results.xlsx')
df_results['iter'] = pd.Categorical(df_results['iter'], categories=iter_list, ordered=True)
df_results

Unnamed: 0,iter,model,label,TP,TN,FP,FN,Exac,Prec,Sens,F1,ms,AUC_PS
0,Iteración 1,RandomForestClassifier,BENIGN,11567,5416,6,4,0.999412,0.999482,0.999654,0.999568,71872.600005,0.999999
1,Iteración 1,RandomForestClassifier,BOT,8,16984,0,1,0.999941,1.000000,0.888889,0.941176,71872.600005,1.000000
2,Iteración 1,RandomForestClassifier,DDOS,1189,15804,0,0,1.000000,1.000000,1.000000,1.000000,71872.600005,1.000000
3,Iteración 1,RandomForestClassifier,DOS_GOLDENEYE,92,16894,5,2,0.999588,0.948454,0.978723,0.963351,71872.600005,0.995983
4,Iteración 1,RandomForestClassifier,DOS_HULK,1974,15011,1,7,0.999529,0.999494,0.996466,0.997978,71872.600005,0.999750
...,...,...,...,...,...,...,...,...,...,...,...,...,...
495,Iteración 10,IDSBaggingClassifier,DOS_SLOWHTTPTEST,20,16970,0,2,0.999882,1.000000,0.909091,0.952381,436.615200,0.984790
496,Iteración 10,IDSBaggingClassifier,DOS_SLOWLORIS,50,16941,1,0,0.999941,0.980392,1.000000,0.990099,436.615200,1.000000
497,Iteración 10,IDSBaggingClassifier,FTP_PATATOR,50,16942,0,0,1.000000,1.000000,1.000000,1.000000,436.615200,1.000000
498,Iteración 10,IDSBaggingClassifier,PORTSCAN,1989,15002,0,1,0.999941,1.000000,0.999497,0.999749,436.615200,0.999568


In [7]:
# Resultados ordenados por iteración
df_results_ordered = df_results.copy()
df_results_ordered = df_results_ordered.drop(
  ['TP', 'TN', 'FP', 'FN'], axis=1
).melt(
  id_vars=['iter', 'model', 'label'], var_name='metric', value_name='value'
).pivot(
  index=['metric', 'label', 'model'], columns='iter', values='value'
).reset_index()

# df_results_ordered.columns = ['{}_{}'.format(*col) if isinstance(col, tuple) else col for col in df_results_ordered.columns]
df_results_ordered['metric'] = pd.Categorical(df_results_ordered['metric'], categories=metric_list, ordered=True)
df_results_ordered['label'] = pd.Categorical(df_results_ordered['label'], categories=label_list, ordered=True)
df_results_ordered['model'] = pd.Categorical(df_results_ordered['model'], categories=model_list, ordered=True)

df_results_ordered = df_results_ordered.sort_values(by=['metric', 'label', 'model'])
df_results_ordered

iter,metric,label,model,Iteración 1,Iteración 2,Iteración 3,Iteración 4,Iteración 5,Iteración 6,Iteración 7,Iteración 8,Iteración 9,Iteración 10
54,Exac,BENIGN,RandomForestClassifier,0.999412,0.999529,0.999470,0.999765,0.998411,0.999823,0.999294,0.999647,0.999353,0.998176
50,Exac,BENIGN,DecisionTreeClassifier,0.999058,0.998941,0.998823,0.997940,0.998588,0.999058,0.998588,0.998705,0.997234,0.998823
53,Exac,BENIGN,MLPClassifier,0.989290,0.988995,0.989113,0.988642,0.988584,0.987407,0.988642,0.990467,0.987641,0.989524
52,Exac,BENIGN,KNeighborsClassifier,0.997999,0.998411,0.997999,0.998058,0.998352,0.998293,0.998588,0.998764,0.998529,0.998117
51,Exac,BENIGN,IDSBaggingClassifier,0.999353,0.999353,0.999529,0.999588,0.999588,0.999529,0.999588,0.999470,0.999529,0.999470
...,...,...,...,...,...,...,...,...,...,...,...,...,...
49,AUC_PS,SSH_PATATOR,RandomForestClassifier,1.000000,0.999644,1.000000,1.000000,1.000000,1.000000,1.000000,0.995876,1.000000,1.000000
45,AUC_PS,SSH_PATATOR,DecisionTreeClassifier,0.986516,0.973032,0.986842,1.000000,0.948054,1.000000,1.000000,0.960230,0.986516,1.000000
48,AUC_PS,SSH_PATATOR,MLPClassifier,1.000000,1.000000,1.000000,1.000000,1.000000,1.000000,1.000000,1.000000,1.000000,1.000000
47,AUC_PS,SSH_PATATOR,KNeighborsClassifier,0.986842,1.000000,1.000000,1.000000,1.000000,1.000000,1.000000,1.000000,0.972656,1.000000


In [8]:
with pd.ExcelWriter('output/df_results_ordered_by_iter.xlsx', engine='openpyxl') as writer:
  for metric in metric_list : 
    for label in label_list : 
      df_results_ordered[
        (df_results_ordered['metric'] == metric) &
        (df_results_ordered['label'] == label)
      ].to_excel(writer, sheet_name=f'{metric}_{label}', index=False)

In [9]:
df_results_ordered_by_model = df_results.copy()
df_results_ordered_by_model['model'] = pd.Categorical(df_results_ordered_by_model['model'], categories=model_list, ordered=True)
df_results_ordered_by_model = df_results_ordered_by_model.drop(
  ['TP', 'TN', 'FP', 'FN'], axis=1
).melt(
  id_vars=['iter', 'model', 'label'], var_name='metric', value_name='value'
).pivot(
  index=['metric', 'label', 'iter'], columns='model', values='value'
).reset_index()

# df_results_ordered.columns = ['{}_{}'.format(*col) if isinstance(col, tuple) else col for col in df_results_ordered.columns]
df_results_ordered_by_model['iter'] = pd.Categorical(df_results_ordered_by_model['iter'], categories=iter_list, ordered=True)
df_results_ordered_by_model['metric'] = pd.Categorical(df_results_ordered_by_model['metric'], categories=metric_list, ordered=True)
df_results_ordered_by_model['label'] = pd.Categorical(df_results_ordered_by_model['label'], categories=label_list, ordered=True)

df_results_ordered_by_model = df_results_ordered_by_model.sort_values(by=['metric', 'label', 'iter'])
df_results_ordered_by_model

model,metric,label,iter,RandomForestClassifier,DecisionTreeClassifier,MLPClassifier,KNeighborsClassifier,IDSBaggingClassifier
100,Exac,BENIGN,Iteración 1,0.999412,0.999058,0.989290,0.997999,0.999353
101,Exac,BENIGN,Iteración 2,0.999529,0.998941,0.988995,0.998411,0.999353
102,Exac,BENIGN,Iteración 3,0.999470,0.998823,0.989113,0.997999,0.999529
103,Exac,BENIGN,Iteración 4,0.999765,0.997940,0.988642,0.998058,0.999588
104,Exac,BENIGN,Iteración 5,0.998411,0.998588,0.988584,0.998352,0.999588
...,...,...,...,...,...,...,...,...
95,AUC_PS,SSH_PATATOR,Iteración 6,1.000000,1.000000,1.000000,1.000000,1.000000
96,AUC_PS,SSH_PATATOR,Iteración 7,1.000000,1.000000,1.000000,1.000000,1.000000
97,AUC_PS,SSH_PATATOR,Iteración 8,0.995876,0.960230,1.000000,1.000000,1.000000
98,AUC_PS,SSH_PATATOR,Iteración 9,1.000000,0.986516,1.000000,0.972656,0.999279


In [10]:
with pd.ExcelWriter('output/df_results_ordered_by_model.xlsx', engine='openpyxl') as writer: 
  for metric in metric_list : 
    for label in label_list : 
      df_results_ordered_by_model[
        (df_results_ordered_by_model['metric'] == metric) &
        (df_results_ordered_by_model['label'] == label)
      ].to_excel(writer, sheet_name=f'{metric}_{label}', index=False)

In [11]:
df_results_ordered[
  (df_results_ordered['metric'] == 'Exac') & 
  (df_results_ordered['label'] == 'BENIGN') & 
  (df_results_ordered['model'] == 'RandomForestClassifier')
][iter_list].iloc[0].to_list()

[0.999411522391573,
 0.9995292179132584,
 0.9994703701524157,
 0.9997646089566292,
 0.9984111104572471,
 0.9998234567174719,
 0.9992938268698875,
 0.9996469134349438,
 0.99935263653484,
 0.9981756120527306]

## Test de Friedmann

In [12]:
""" Test de Friedman """
# H0 (hipótesis nula): No hay diferencias significativas entre los grupos comparados (los modelos rinden igual).
# H1 (hipótesis alternativa): Al menos un grupo difiere significativamente de los demás.
# Nivel de significancia = 0.05
# Si p <= nivel de significancia  ->  Se rechaza H0 (al menos una muestra difiere de las demás)

df_test_friedmann = pd.DataFrame(columns=["metric", "label", "gl", "stadistic", "p_value", "las muestras difieren?"])
nivel_significancia = 0.05

for metric in metric_list : 
  for label in label_list :
    lista_friedman = []
    for model in model_list : 
      mask = (
        (df_results_ordered['metric'] == metric) & 
        (df_results_ordered['label'] == label) & 
        (df_results_ordered['model'] == model)
      )
      muestra = df_results_ordered[mask][iter_list].iloc[0].to_list()
      lista_friedman.append(muestra)
    stat, p = stats.friedmanchisquare(*lista_friedman)
    df_test_friedmann.loc[len(df_test_friedmann)] = [
      metric,
      label,
      len(lista_friedman[0])-1,
      stat,
      p,
      p <= nivel_significancia 
    ]

df_test_friedmann

Unnamed: 0,metric,label,gl,stadistic,p_value,las muestras difieren?
0,Exac,BENIGN,9,33.708543,8.551161e-07,True
1,Exac,BOT,9,35.068783,4.496648e-07,True
2,Exac,DDOS,9,16.671329,0.002238784,True
3,Exac,DOS_GOLDENEYE,9,33.676768,8.680372e-07,True
4,Exac,DOS_HULK,9,28.865979,8.323046e-06,True
5,Exac,DOS_SLOWHTTPTEST,9,29.925134,5.069236e-06,True
6,Exac,DOS_SLOWLORIS,9,32.969072,1.212024e-06,True
7,Exac,FTP_PATATOR,9,5.892473,0.2073236,False
8,Exac,PORTSCAN,9,24.581395,6.106124e-05,True
9,Exac,SSH_PATATOR,9,7.459459,0.1135103,False


In [13]:
df_results_ordered

iter,metric,label,model,Iteración 1,Iteración 2,Iteración 3,Iteración 4,Iteración 5,Iteración 6,Iteración 7,Iteración 8,Iteración 9,Iteración 10
54,Exac,BENIGN,RandomForestClassifier,0.999412,0.999529,0.999470,0.999765,0.998411,0.999823,0.999294,0.999647,0.999353,0.998176
50,Exac,BENIGN,DecisionTreeClassifier,0.999058,0.998941,0.998823,0.997940,0.998588,0.999058,0.998588,0.998705,0.997234,0.998823
53,Exac,BENIGN,MLPClassifier,0.989290,0.988995,0.989113,0.988642,0.988584,0.987407,0.988642,0.990467,0.987641,0.989524
52,Exac,BENIGN,KNeighborsClassifier,0.997999,0.998411,0.997999,0.998058,0.998352,0.998293,0.998588,0.998764,0.998529,0.998117
51,Exac,BENIGN,IDSBaggingClassifier,0.999353,0.999353,0.999529,0.999588,0.999588,0.999529,0.999588,0.999470,0.999529,0.999470
...,...,...,...,...,...,...,...,...,...,...,...,...,...
49,AUC_PS,SSH_PATATOR,RandomForestClassifier,1.000000,0.999644,1.000000,1.000000,1.000000,1.000000,1.000000,0.995876,1.000000,1.000000
45,AUC_PS,SSH_PATATOR,DecisionTreeClassifier,0.986516,0.973032,0.986842,1.000000,0.948054,1.000000,1.000000,0.960230,0.986516,1.000000
48,AUC_PS,SSH_PATATOR,MLPClassifier,1.000000,1.000000,1.000000,1.000000,1.000000,1.000000,1.000000,1.000000,1.000000,1.000000
47,AUC_PS,SSH_PATATOR,KNeighborsClassifier,0.986842,1.000000,1.000000,1.000000,1.000000,1.000000,1.000000,1.000000,0.972656,1.000000


## Test de Shapiro-Wilk

In [14]:
list_models = ['RandomForestClassifier', 'DecisionTreeClassifier', 'MLPClassifier', 'KNeighborsClassifier']
bagging_model_name = 'IDSBaggingClassifier'

In [34]:
""" Test de Shapiro Wilk """
# H0 (hipótesis nula): Los datos provienen de una distribución normal
# H1 (hipótesis alternativa): Los datos no provienen de una distribución normal
# Nivel de significancia = 0.05
# Si p < nivel de significancia  ->  Los datos no provienen de una distribución normal
df_shapiro_wilk = pd.DataFrame(columns=["metric", "label", "model", "stadistic", "p_value", "es fiable?"])
nivel_significancia_normalidad = 0.05

for metric in metric_list : 
  for label in label_list :
    for model in [bagging_model_name]+list_models :
      muestra = df_results_ordered[
        (df_results_ordered['metric'] == metric) & 
        (df_results_ordered['label'] == label) & 
        (df_results_ordered['model'] == model)
      ][iter_list].iloc[0].to_list()

      fiable = True
      if np.ptp(muestra) > 0 : 
        fiable = True
      else : 
        print(f'Los resultados de la prueba en {metric}, {label} y {model} no son fiables porque tienen varianza 0')
        fiable = False

      stat, p = stats.shapiro(muestra)

      df_shapiro_wilk.loc[len(df_shapiro_wilk)] = [
        metric,
        label,
        model,
        stat,
        p,
        fiable
      ]
reject, p_corrected, _, _ = multipletests(
  df_shapiro_wilk['p_value'].to_list(),
  alpha=nivel_significancia_normalidad,
  method='holm'
) 

df_shapiro_wilk['p_corrected'] = p_corrected
df_shapiro_wilk['reject'] = reject
df_shapiro_wilk[df_shapiro_wilk['reject'] & df_shapiro_wilk['es fiable?']]

Los resultados de la prueba en Prec, SSH_PATATOR y IDSBaggingClassifier no son fiables porque tienen varianza 0
Los resultados de la prueba en Prec, SSH_PATATOR y RandomForestClassifier no son fiables porque tienen varianza 0
Los resultados de la prueba en Sens, DDOS y DecisionTreeClassifier no son fiables porque tienen varianza 0
Los resultados de la prueba en Sens, FTP_PATATOR y IDSBaggingClassifier no son fiables porque tienen varianza 0
Los resultados de la prueba en AUC_PS, DDOS y IDSBaggingClassifier no son fiables porque tienen varianza 0
Los resultados de la prueba en AUC_PS, DDOS y RandomForestClassifier no son fiables porque tienen varianza 0
Los resultados de la prueba en AUC_PS, FTP_PATATOR y IDSBaggingClassifier no son fiables porque tienen varianza 0
Los resultados de la prueba en AUC_PS, FTP_PATATOR y RandomForestClassifier no son fiables porque tienen varianza 0


  res = hypotest_fun_out(*samples, **kwds)
  res = hypotest_fun_out(*samples, **kwds)
  res = hypotest_fun_out(*samples, **kwds)
  res = hypotest_fun_out(*samples, **kwds)
  res = hypotest_fun_out(*samples, **kwds)
  res = hypotest_fun_out(*samples, **kwds)
  res = hypotest_fun_out(*samples, **kwds)
  res = hypotest_fun_out(*samples, **kwds)


Unnamed: 0,metric,label,model,stadistic,p_value,es fiable?,p_corrected,reject
6,Exac,BOT,RandomForestClassifier,0.594183,4.714743e-05,True,0.012019,True
10,Exac,DDOS,IDSBaggingClassifier,0.594183,4.714743e-05,True,0.012019,True
11,Exac,DDOS,RandomForestClassifier,0.531644,8.563494e-06,True,0.002244,True
12,Exac,DDOS,DecisionTreeClassifier,0.365721,1.003693e-07,True,0.000030,True
35,Exac,FTP_PATATOR,IDSBaggingClassifier,0.365721,1.003693e-07,True,0.000030,True
...,...,...,...,...,...,...,...,...
290,AUC_PS,PORTSCAN,IDSBaggingClassifier,0.373213,1.224219e-07,True,0.000034,True
291,AUC_PS,PORTSCAN,RandomForestClassifier,0.572053,2.572848e-05,True,0.006638,True
295,AUC_PS,SSH_PATATOR,IDSBaggingClassifier,0.365721,1.003693e-07,True,0.000030,True
296,AUC_PS,SSH_PATATOR,RandomForestClassifier,0.407557,3.050081e-07,True,0.000085,True


In [70]:
""" Test de Shapiro Wilk """
# H0 (hipótesis nula): Los datos provienen de una distribución normal
# H1 (hipótesis alternativa): Los datos no provienen de una distribución normal
# Nivel de significancia = 0.05
# Si p < nivel de significancia  ->  Los datos no provienen de una distribución normal
df_shapiro_wilk = pd.DataFrame(columns=["metric", "label", "model", "difference", "stadistic", "p_value", "es fiable?"])
nivel_significancia_normalidad = 0.05

for metric in metric_list : 
  for label in label_list :
    mask = (df_results_ordered['metric'] == metric) & (df_results_ordered['label'] == label)
    bagging_mask = mask & (df_results_ordered['model'] == bagging_model_name)
    muestra_bagging = df_results_ordered[bagging_mask][iter_list].iloc[0]
    for model in list_models :
      model_mask = mask & (df_results_ordered['model'] == model)
      muestra = df_results_ordered[model_mask][iter_list].iloc[0]
      muestra_diferencia = muestra_bagging-muestra
      fiable = True
      if np.ptp(muestra_diferencia) > 0 : 
        fiable = True
      else : 
        print(f'Los resultados de la prueba en {metric}, {label} y {model} no son fiables porque tienen varianza 0')
        fiable = False

      stat, p = stats.shapiro(muestra_diferencia.to_list())

      df_shapiro_wilk.loc[len(df_shapiro_wilk)] = [
        metric,
        label,
        model,
        f'{bagging_model_name} - {model}',
        stat,
        p,
        fiable,
      ]
reject, p_corrected, _, _ = multipletests(
  df_shapiro_wilk['p_value'].to_list(),
  alpha=nivel_significancia_normalidad,
  method='holm'
) 

df_shapiro_wilk['p_corrected'] = p_corrected
df_shapiro_wilk['reject'] = reject
# df_shapiro_wilk
df_shapiro_wilk['test'] = np.where(
    (~df_shapiro_wilk['reject']) & (df_shapiro_wilk['es fiable?']),
    'T de Student',
    'Wilcoxon'
)
# df_shapiro_wilk
df_shapiro_wilk[['metric', 'label', 'model', 'es fiable?', 'reject', 'difference', 'stadistic', 'p_value', 'p_corrected', 'test']]

Los resultados de la prueba en Prec, FTP_PATATOR y RandomForestClassifier no son fiables porque tienen varianza 0
Los resultados de la prueba en Prec, SSH_PATATOR y RandomForestClassifier no son fiables porque tienen varianza 0
Los resultados de la prueba en Sens, BOT y DecisionTreeClassifier no son fiables porque tienen varianza 0
Los resultados de la prueba en AUC_PS, DDOS y RandomForestClassifier no son fiables porque tienen varianza 0
Los resultados de la prueba en AUC_PS, FTP_PATATOR y RandomForestClassifier no son fiables porque tienen varianza 0


  res = hypotest_fun_out(*samples, **kwds)
  res = hypotest_fun_out(*samples, **kwds)
  res = hypotest_fun_out(*samples, **kwds)
  res = hypotest_fun_out(*samples, **kwds)
  res = hypotest_fun_out(*samples, **kwds)


Unnamed: 0,metric,label,model,es fiable?,reject,difference,stadistic,p_value,p_corrected,test
0,Exac,BENIGN,RandomForestClassifier,True,False,IDSBaggingClassifier - RandomForestClassifier,0.777261,7.666488e-03,1.000000,T de Student
1,Exac,BENIGN,DecisionTreeClassifier,True,False,IDSBaggingClassifier - DecisionTreeClassifier,0.856477,6.933510e-02,1.000000,T de Student
2,Exac,BENIGN,MLPClassifier,True,False,IDSBaggingClassifier - MLPClassifier,0.964717,8.380139e-01,1.000000,T de Student
3,Exac,BENIGN,KNeighborsClassifier,True,False,IDSBaggingClassifier - KNeighborsClassifier,0.939242,5.446063e-01,1.000000,T de Student
4,Exac,BOT,RandomForestClassifier,True,False,IDSBaggingClassifier - RandomForestClassifier,0.769916,6.239046e-03,0.992008,T de Student
...,...,...,...,...,...,...,...,...,...,...
235,AUC_PS,PORTSCAN,KNeighborsClassifier,True,False,IDSBaggingClassifier - KNeighborsClassifier,0.919986,3.568414e-01,1.000000,T de Student
236,AUC_PS,SSH_PATATOR,RandomForestClassifier,True,True,IDSBaggingClassifier - RandomForestClassifier,0.516595,5.695451e-06,0.001276,Wilcoxon
237,AUC_PS,SSH_PATATOR,DecisionTreeClassifier,True,False,IDSBaggingClassifier - DecisionTreeClassifier,0.837377,4.104164e-02,1.000000,T de Student
238,AUC_PS,SSH_PATATOR,MLPClassifier,True,True,IDSBaggingClassifier - MLPClassifier,0.365721,1.003693e-07,0.000024,Wilcoxon


In [None]:
""" Test T de Student unilateral """
# H0 (hipótesis nula): El metamodelo de ensamble Bagging tiene resultados iguales o menores a la instancia evaluada
# H1 (hipótesis alternativa): El metamodelo de ensamble Bagging tiene mejores resultados que la instancia evaluada
# Nivel de significancia = 0.05
# Si p < nivel de significancia  ->  Se acepta la hipótesis nula
""" Test Wilcoxon """
# H0 (hipótesis nula): El metamodelo de ensamble Bagging tiene resultados iguales o menores a la instancia evaluada
# H1 (hipótesis alternativa): El metamodelo de ensamble Bagging tiene mejores resultados que la instancia evaluada
# Nivel de significancia = 0.05
# Si p < nivel de significancia  ->  Se acepta la hipótesis nula
df_prueba_estadistica_diferencias_significativas = pd.DataFrame(columns=['metric', 'label', 'model', 'comparison', 'test', 'stat', 'p_value'])
nivel_significancia_diferencias_significativas = 0.05
for metric in metric_list : 
  for label in label_list :
    mask = (df_results_ordered['metric'] == metric) & (df_results_ordered['label'] == label)
    bagging_mask = mask & (df_results_ordered['model'] == bagging_model_name)
    muestra_bagging = df_results_ordered[bagging_mask][iter_list].iloc[0]
    for model in list_models :
      model_mask = mask & (df_results_ordered['model'] == model)
      muestra = df_results_ordered[model_mask][iter_list].iloc[0]
      test = df_shapiro_wilk[
        (df_shapiro_wilk['metric'] == metric) & 
        (df_shapiro_wilk['label'] == label) & 
        (df_shapiro_wilk['model'] == model)
      ]['test'].iloc[0]
      if test == 'T de Student' : 
        stat, p_value = stats.ttest_rel(muestra_bagging, muestra, alternative='greater')
        df_prueba_estadistica_diferencias_significativas.loc[len(df_prueba_estadistica_diferencias_significativas)] = [
          metric,
          label,
          model,
          f'{bagging_model_name} > {model}',
          test,
          stat,
          p_value
        ]
      elif test == 'Wilcoxon' : 
        stat, p_value = stats.wilcoxon(muestra_bagging, muestra, alternative='greater')
        df_prueba_estadistica_diferencias_significativas.loc[len(df_prueba_estadistica_diferencias_significativas)] = [
          metric,
          label,
          model,
          f'{bagging_model_name} > {model}',
          test,
          stat,
          p_value
        ]
reject, p_corrected, _, _ = multipletests(
  df_prueba_estadistica_diferencias_significativas['p_value'].to_list(),
  alpha=nivel_significancia_diferencias_significativas,
  method='holm'
) 
df_prueba_estadistica_diferencias_significativas['p_corrected'] = p_corrected
df_prueba_estadistica_diferencias_significativas['reject'] = reject
df_prueba_estadistica_diferencias_significativas


  z = (r_plus - mn) / se
  z = (r_plus - mn) / se
  z = (r_plus - mn) / se
  z = (r_plus - mn) / se
  z = (r_plus - mn) / se


Unnamed: 0,metric,label,model,comparison,test,stat,p_value,p_corrected,reject
0,Exac,BENIGN,RandomForestClassifier,IDSBaggingClassifier > RandomForestClassifier,T de Student,1.176712,1.347469e-01,1.000000e+00,False
1,Exac,BENIGN,DecisionTreeClassifier,IDSBaggingClassifier > DecisionTreeClassifier,T de Student,4.733337,5.344395e-04,9.673355e-02,False
2,Exac,BENIGN,MLPClassifier,IDSBaggingClassifier > MLPClassifier,T de Student,36.501759,2.153056e-11,5.145805e-09,True
3,Exac,BENIGN,KNeighborsClassifier,IDSBaggingClassifier > KNeighborsClassifier,T de Student,13.917370,1.079215e-07,2.471403e-05,True
4,Exac,BOT,RandomForestClassifier,IDSBaggingClassifier > RandomForestClassifier,T de Student,-2.449503,9.816067e-01,1.000000e+00,False
...,...,...,...,...,...,...,...,...,...
235,AUC_PS,PORTSCAN,KNeighborsClassifier,IDSBaggingClassifier > KNeighborsClassifier,T de Student,2.075635,3.387134e-02,1.000000e+00,False
236,AUC_PS,SSH_PATATOR,RandomForestClassifier,IDSBaggingClassifier > RandomForestClassifier,Wilcoxon,14.000000,7.109375e-01,1.000000e+00,False
237,AUC_PS,SSH_PATATOR,DecisionTreeClassifier,IDSBaggingClassifier > DecisionTreeClassifier,T de Student,2.726427,1.168037e-02,1.000000e+00,False
238,AUC_PS,SSH_PATATOR,MLPClassifier,IDSBaggingClassifier > MLPClassifier,Wilcoxon,5.000000,9.843750e-01,1.000000e+00,False


In [41]:
df_prueba_estadistica = pd.DataFrame(columns=["metric", "label", "model", "models", "prueba"])

for metric in metric_list : 
  for label in label_list :
    mask = (df_shapiro_wilk['metric'] == metric) & (df_shapiro_wilk['label'] == label)
    bagging_mask = mask & (df_shapiro_wilk['model'] == bagging_model_name)
    bagging_is_normal = df_shapiro_wilk[bagging_mask]['es fiable?'] & ~df_shapiro_wilk[bagging_mask]['reject']
    print('Bagging : ', bagging_is_normal)
    for model in list_models :
      model_mask = mask & (df_shapiro_wilk['model'] == model)
      model_is_normal = df_shapiro_wilk[model_mask]['es fiable?'] & ~df_shapiro_wilk[model_mask]['reject']
      print(model, ':', model_is_normal)
      df_prueba_estadistica.loc[len(df_prueba_estadistica)] = [
        metric,
        label,
        model,
        f'{bagging_model_name} != {model}',
        'T_Student' if bagging_is_normal.bool() and model_is_normal.bool() else 'Wilcoxon'
      ]
df_prueba_estadistica[df_prueba_estadistica['prueba'] == 'T_Student']

Bagging :  0    True
dtype: bool
RandomForestClassifier : 1    True
dtype: bool
DecisionTreeClassifier : 2    True
dtype: bool
MLPClassifier : 3    True
dtype: bool
KNeighborsClassifier : 4    True
dtype: bool
Bagging :  5    True
dtype: bool
RandomForestClassifier : 6    False
dtype: bool
DecisionTreeClassifier : 7    True
dtype: bool
MLPClassifier : 8    True
dtype: bool
KNeighborsClassifier : 9    True
dtype: bool
Bagging :  10    False
dtype: bool
RandomForestClassifier : 11    False
dtype: bool
DecisionTreeClassifier : 12    False
dtype: bool
MLPClassifier : 13    True
dtype: bool
KNeighborsClassifier : 14    True
dtype: bool
Bagging :  15    True
dtype: bool
RandomForestClassifier : 16    True
dtype: bool
DecisionTreeClassifier : 17    True
dtype: bool
MLPClassifier : 18    True
dtype: bool
KNeighborsClassifier : 19    True
dtype: bool
Bagging :  20    True
dtype: bool
RandomForestClassifier : 21    True
dtype: bool
DecisionTreeClassifier : 22    True
dtype: bool
MLPClassifier : 

  'T_Student' if bagging_is_normal.bool() and model_is_normal.bool() else 'Wilcoxon'
  'T_Student' if bagging_is_normal.bool() and model_is_normal.bool() else 'Wilcoxon'
  'T_Student' if bagging_is_normal.bool() and model_is_normal.bool() else 'Wilcoxon'
  'T_Student' if bagging_is_normal.bool() and model_is_normal.bool() else 'Wilcoxon'
  'T_Student' if bagging_is_normal.bool() and model_is_normal.bool() else 'Wilcoxon'
  'T_Student' if bagging_is_normal.bool() and model_is_normal.bool() else 'Wilcoxon'
  'T_Student' if bagging_is_normal.bool() and model_is_normal.bool() else 'Wilcoxon'
  'T_Student' if bagging_is_normal.bool() and model_is_normal.bool() else 'Wilcoxon'
  'T_Student' if bagging_is_normal.bool() and model_is_normal.bool() else 'Wilcoxon'
  'T_Student' if bagging_is_normal.bool() and model_is_normal.bool() else 'Wilcoxon'
  'T_Student' if bagging_is_normal.bool() and model_is_normal.bool() else 'Wilcoxon'
  'T_Student' if bagging_is_normal.bool() and model_is_normal.boo

Unnamed: 0,metric,label,model,models,prueba
0,Exac,BENIGN,RandomForestClassifier,IDSBaggingClassifier != RandomForestClassifier,T_Student
1,Exac,BENIGN,DecisionTreeClassifier,IDSBaggingClassifier != DecisionTreeClassifier,T_Student
2,Exac,BENIGN,MLPClassifier,IDSBaggingClassifier != MLPClassifier,T_Student
3,Exac,BENIGN,KNeighborsClassifier,IDSBaggingClassifier != KNeighborsClassifier,T_Student
5,Exac,BOT,DecisionTreeClassifier,IDSBaggingClassifier != DecisionTreeClassifier,T_Student
...,...,...,...,...,...
215,AUC_PS,DOS_GOLDENEYE,KNeighborsClassifier,IDSBaggingClassifier != KNeighborsClassifier,T_Student
220,AUC_PS,DOS_SLOWHTTPTEST,RandomForestClassifier,IDSBaggingClassifier != RandomForestClassifier,T_Student
221,AUC_PS,DOS_SLOWHTTPTEST,DecisionTreeClassifier,IDSBaggingClassifier != DecisionTreeClassifier,T_Student
222,AUC_PS,DOS_SLOWHTTPTEST,MLPClassifier,IDSBaggingClassifier != MLPClassifier,T_Student


In [None]:
""" Test de Wilcoxon """
# H0 (hipótesis nula): La mediana de las diferencias es igual a 0
# H1 (hipótesis alternativa): La mediana de las diferencias es diferente de 0
# Nivel de significancia = 0.05
# Si p <= nivel de significancia  ->  Se rechaza H0 (al menos una muestra difiere de las demás)
df_shapiro_wilk = pd.DataFrame(columns=["metric", "label", "models", "stadistic", "p_value", "las muestras difieren?"])
nivel_significancia_normalidad = 0.05

df_test_diferencias_significativas = pd.DataFrame(columns=["metric", "label", "models", "stadistic", "p_value", "las muestras difieren?"])
nivel_significancia = 0.0125
list_models = ['RandomForestClassifier', 'DecisionTreeClassifier', 'MLPClassifier', 'KNeighborsClassifier']

for metric in metric_list : 
  for label in label_list :
    lista_wilcoxon = []
    for model in ['IDSBaggingClassifier']+list_models :
      muestra = df_results_ordered[
        (df_results_ordered['metric'] == metric) & 
        (df_results_ordered['label'] == label) & 
        (df_results_ordered['model'] == model)
      ][iter_list].iloc[0].to_list()
      if model == 'IDSBaggingClassifier' : 
        muestra_bagging_model = muestra
        normal_stats, normal_p = stats.shapiro(muestra_bagging_model)
        bagging_is_normal = normal_p >  
      else : 
        stat, p = stats.wilcoxon(muestra_bagging_model, muestra, alternative='greater')

        df_test_diferencias_significativas.loc[len(df_test_diferencias_significativas)] = [
          metric,
          label,
          f'IDSBaggingClassifier > {model}',
          stat,
          p,
          p <= nivel_significancia
        ]
df_test_diferencias_significativas

SyntaxError: invalid syntax (2550611416.py, line 25)

In [None]:
df_test_wilcoxon.to_excel('output/df_wilcoxon_greater.xlsx', index=False)

In [None]:
with pd.ExcelWriter('output/analisis_descriptivo.xlsx', engine='openpyxl') as writer: 
  for metric in metric_list : 
    for label in label_list : 
      df_results_ordered_by_model[
        (df_results_ordered_by_model['metric'] == metric) & 
        (df_results_ordered_by_model['label'] == label) 
      ][model_list].describe().to_excel(writer, sheet_name=f'{metric}_{label}')

In [None]:

df_results_ordered_by_model[
  (df_results_ordered_by_model['metric'] == 'Exac') & 
  (df_results_ordered_by_model['label'] == 'BENIGN') 
][model_list].describe()

model,RandomForestClassifier,DecisionTreeClassifier,MLPClassifier,KNeighborsClassifier,IDSBaggingClassifier
count,10.0,10.0,10.0,10.0,10.0
mean,0.999406,0.998576,0.988831,0.998311,0.9995
std,0.000465,0.000572,0.000885,0.000266,8.9e-05
min,0.998176,0.997234,0.987407,0.997999,0.999353
25%,0.999367,0.998588,0.988598,0.998073,0.99947
50%,0.9995,0.998764,0.988819,0.998323,0.999529
75%,0.999632,0.998911,0.989246,0.998499,0.999573
max,0.999823,0.999058,0.990467,0.998764,0.999588
