In [3]:
import pandas as pd
import os
from IPython.display import display
import seaborn as sns
import numpy as np
import plotly.graph_objects as go
import plotly
from plotly.subplots import make_subplots
#sns.set_style("whitegrid")
#sns.set_color_codes("dark")

cols = plotly.colors.DEFAULT_PLOTLY_COLORS
metrics = ["Local_Concordance",
    "Local_Fidelity",
    "Prescriptivity",
    "Conciseness",
    "Robustness"]

In [2]:
if os.path.exists("../../../csvs/"):
    bigDf = []
    for file in os.listdir("../../../csvs/"):
        # Si el tamaño del archivo f"../csvs/{file}" es 0, no lo lee
        if os.stat("../../../csvs/" + file).st_size != 0:
            dataset = pd.read_csv(f"../../../csvs/{file}")
            bigDf.append(dataset)
    bigDf = pd.concat(bigDf)

    bigDf["num_features"] = bigDf["dim"]*bigDf["dim"]
    bigDf["Robustness"] = bigDf["robustness"]
    bigDf["Conciseness"] = bigDf["conciseness"]
    bigDf["Prescriptivity"] = bigDf["prescriptivity"]
    bigDf["Local_Fidelity"] = bigDf["local_fidelity"]
    bigDf["Local_Concordance"] = bigDf["local_concordance"]
    # Eliminar el anterio Experiments.csv
    if os.path.exists("Experiments.csv"):
        os.remove("Experiments.csv")
    bigDf.to_csv("Experiments.csv")

In [11]:
df = pd.read_csv("Experiments_mean.csv")
df.drop(columns=["dim","robustness","conciseness","prescriptivity","local_fidelity","local_concordance","Unnamed: 0"], inplace=True)
df.head()

Unnamed: 0,dataset,max_examples,xai_model,sigma,index,num_features,Robustness,Conciseness,Prescriptivity,Local_Fidelity,Local_Concordance
0,CIFAR10,100,LIME,2.0,0,36.0,0.892969,0.787615,0.808537,0.999839,0.999925
1,CIFAR10,100,LIME,2.0,1,36.0,0.896733,0.703711,0.709766,0.997634,0.994876
2,CIFAR10,100,LIME,2.0,2,36.0,0.914178,0.836191,0.318267,0.999924,0.999829
3,CIFAR10,100,LIME,2.0,3,36.0,0.873275,0.761558,0.861878,0.946153,0.932682
4,CIFAR10,100,LIME,2.0,4,36.0,0.91582,0.793477,0.536462,0.999694,0.999897


In [8]:
# Wilcoxon test for each metric and each dataset of LIME with sigma = 2.0 and 3.0

# Import the wilcoxon, shapiro and normaltest functions from scipy.stats
from scipy.stats import wilcoxon, shapiro, normaltest

# Import the matplotlib.pyplot submodule and name it plt
import matplotlib.pyplot as plt

# Hipótesis nula para el test de normalidad: la muestra proviene de una distribución normal
# p-value > 0.05: no se rechaza la hipótesis nula
# p-value < 0.05: se rechaza la hipótesis nula

# Hipótesis nula para el test de Wilcoxon: las muestras provienen de la misma distribución
# p-value > 0.05: no se rechaza la hipótesis nula
# p-value < 0.05: se rechaza la hipótesis nula

cases_num_features = df["num_features"].unique()
cases_sigma = df["sigma"].unique()
cases_max_examples = df["max_examples"].unique()
cases_dataset = df["dataset"].unique()

In [18]:
# Primer caso: comprobar cómo varía max_examples con el número de características
# DF Wilcoxon NxN: each row and column is a max_examples value and each cell is the p-value of the Wilcoxon test between the max_examples values
# Just for LIME
for m in metrics:
    df_wilcoxon = pd.DataFrame(index=cases_max_examples, columns=cases_max_examples)
    dfs = []
    for me in cases_max_examples:
        dfs.append(df[(df["max_examples"] == me) & (df["xai_model"] == "LIME") ])
    for i in range(len(dfs)):
        for j in range(i+1, len(dfs)):
            # write the p-value of the Wilcoxon test in the cell (i,j) with at most 4 decimals
            df_wilcoxon.iloc[i,j] = wilcoxon(dfs[i][m], dfs[j][m]).pvalue.round(4)
            
    # Plot the table with the p-values of the Wilcoxon test and color the cells
    fig = go.Figure(data=[go.Table( 
        header=dict(values=["max_examples"] + list(cases_max_examples),
                    fill_color='paleturquoise',
                    align='left'),
        cells=dict(values=[cases_max_examples] + [df_wilcoxon.iloc[i].values for i in range(len(df_wilcoxon))],
                     fill_color='lavender',
                        align='left'))
    ])
    fig.update_layout(title=f"Wilcoxon test for {m} and max_examples")
    fig.show()
    

In [28]:
# Segundo caso: comprobar cómo varía sigma 
# DF Wilcoxon NxN: each row and column is a max_examples value and each cell is the p-value of the Wilcoxon test between the max_examples values
# Just for LIME
cases_sigma = df["sigma"].unique()[:-2]
for m in metrics:
    df_wilcoxon = pd.DataFrame(index=cases_sigma, columns=cases_sigma)
    dfs = []
    for s in cases_sigma:
        df_s = df[(df["sigma"] == s) & (df["xai_model"] == "LIME")]
        if len(df_s) > 0:
            dfs.append(df_s)
    for i in range(len(dfs)):
        for j in range(i+1, len(dfs)):
            # write the p-value of the Wilcoxon test in the cell (i,j) with at most 4 decimals
            df_wilcoxon.iloc[i,j] = wilcoxon(dfs[i][m], dfs[j][m]).pvalue.round(8)
            
    # Plot the table with the p-values of the Wilcoxon test and color the cells
    fig = go.Figure(data=[go.Table( 
        header=dict(values=["sigma"] + list(cases_sigma),
                    fill_color='paleturquoise',
                    align='left'),
        cells=dict(values=[cases_sigma] + [df_wilcoxon.iloc[i].values for i in range(len(df_wilcoxon))],
                     fill_color='lavender',
                        align='left'))
    ])
    fig.update_layout(title=f"Wilcoxon test for {m} and sigma")
    fig.show()

In [27]:
#   Tercer caso: comprobar cómo varía num_features
# DF Wilcoxon NxN: each row and column is a max_examples value and each cell is the p-value of the Wilcoxon test between the max_examples values
# Just for LIME
for m in metrics:
    df_wilcoxon = pd.DataFrame(index=cases_num_features, columns=cases_num_features)
    dfs = []
    for nf in cases_num_features:
        dfs.append(df[(df["num_features"] == nf) & (df["xai_model"] == "LIME")])
    for i in range(len(dfs)):
        for j in range(i+1, len(dfs)):
            # write the p-value of the Wilcoxon test in the cell (i,j) with at most 4 decimals
            df_wilcoxon.iloc[i,j] = wilcoxon(dfs[i][m], dfs[j][m]).pvalue.round(8)
            
    # Plot the table with the p-values of the Wilcoxon test and color the cells
    fig = go.Figure(data=[go.Table( 
        header=dict(values=["num_features"] + list(cases_num_features),
                    fill_color='paleturquoise',
                    align='left'),
        cells=dict(values=[cases_num_features] + [df_wilcoxon.iloc[i].values for i in range(len(df_wilcoxon))],
                     fill_color='lavender',
                        align='left'))
    ])
    fig.update_layout(title=f"Wilcoxon test for {m} and num_features")
    fig.show()