In [20]:
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from scipy import stats
import plotly.express as px
import scikit_posthocs as sp

In [None]:
def posthoc_df(results_id:dict, *lists):
  matrix = []
  for list_n in lists:
    matrix.append(list_n)
  posthoc = sp.posthoc_dunn(matrix, p_adjust='bonferroni')
  df = pd.DataFrame(posthoc).rename(index=results_id, columns=results_id)
  return df

#### Épocas

In [21]:
df = pd.read_csv('csv_all_epochs.csv').drop('Unnamed: 0', axis=1)

In [22]:
fig = px.box(df, x='epochs', y='mean', color_discrete_sequence=['black'])
fig.update_layout(title_text='Mean Distribution by Number of Epochs', title_x=0.5)
fig.update_layout(xaxis_title='Number of Epochs', yaxis_title='Mean Dice')
fig.update_layout(height = 400, width = 700)
fig.show()

In [23]:
df.head()

Unnamed: 0,exec,seed,mean,median,std,max,min,epochs
0,1,56,0.81067,0.825643,0.072704,0.900002,0.514061,10
1,2,4,0.787536,0.800899,0.070627,0.887283,0.480996,10
2,3,31,0.610684,0.628777,0.066874,0.674103,0.38226,10
3,4,35,0.800221,0.815061,0.076169,0.883155,0.498038,10
4,5,36,0.776273,0.792638,0.068544,0.852988,0.496292,10


In [24]:
df.columns

Index(['exec', 'seed', 'mean', 'median', 'std', 'max', 'min', 'epochs'], dtype='object')

In [25]:
df = df.groupby('epochs')[['mean', 'median', 'std', 'max', 'min', 'epochs']].mean()
df.to_excel('resume_by_epoch.xlsx')

### Patience

In [26]:
df = pd.read_csv('csv_all_patiences.csv').drop('Unnamed: 0', axis=1)

In [27]:
fig = px.box(df, x='patience', y='mean', color_discrete_sequence=['black'])
fig.update_layout(title_text='Mean Distribution by Early Stopping Patience', title_x=0.5)
fig.update_layout(xaxis_title='Early Stopping Patience Value', yaxis_title='Mean Dice')
fig.update_layout(height = 400, width = 700)
fig.show()

In [28]:
df.columns

Index(['exec', 'seed', 'mean', 'median', 'std', 'max', 'min', 'epochs',
       'patience'],
      dtype='object')

In [29]:
df = df.groupby('patience')[['mean', 'median', 'std', 'max', 'min', 'epochs']].mean()
df.to_excel('resume_by_patience.xlsx')

### Batches

In [39]:
df = pd.read_csv('csv_all_batches.csv').drop('Unnamed: 0', axis=1)

In [40]:
list_dfs = []
for i in [4,8,16,32]:
    list_dfs.append(df[df['batch']==i])

stats.kruskal(list_dfs[0]['mean'].values, list_dfs[1]['mean'].values, list_dfs[2]['mean'].values,\
    list_dfs[3]['mean'].values) #p-value

KruskalResult(statistic=9.765714285714282, pvalue=0.020666320071143586)

In [51]:
listas_batches = [list(lista['mean'].values) for lista in list_dfs]
posthoc_df({1:'BATCH 4',2:'BATCH 8', 3: 'BATCH 16', 4:'BATCH 32'},\
    listas_batches[0], listas_batches[1], listas_batches[2], listas_batches[3])

Unnamed: 0,BATCH 4,BATCH 8,BATCH 16,BATCH 32
BATCH 4,1.0,1.0,0.652857,0.129218
BATCH 8,1.0,1.0,0.287753,0.045158
BATCH 16,0.652857,0.287753,1.0,1.0
BATCH 32,0.129218,0.045158,1.0,1.0


In [31]:
list_batch = [str(batch) for batch in df['batch'].values]

df['batch'] = list_batch

In [32]:
fig = px.box(df, x='batch', y='mean', color_discrete_sequence=['black'])
fig.update_layout(title_text='Mean Distribution by Batch Size', title_x=0.5)
fig.update_layout(xaxis_title='Batch Size Value', yaxis_title='Mean Dice')
fig.update_layout(height = 400, width = 700)
fig.show()

In [34]:
df.columns

Index(['exec', 'seed', 'mean', 'median', 'std', 'max', 'min', 'epochs',
       'patience', 'batch'],
      dtype='object')

In [35]:
df = df.groupby('batch')[['mean', 'median', 'std', 'max', 'min', 'epochs','patience']].mean()
df.to_excel('resume_by_batch.xlsx')

### Image Size

In [54]:
df = pd.read_csv('csv_all_imgsizes.csv').drop('Unnamed: 0', axis=1)
#df = df[df['img_size'].isin([32,48,64,80,96])]

In [55]:
fig = px.box(df, x='img_size', y='mean', color_discrete_sequence=['black'])
fig.update_layout(title_text='Mean Distribution by Image Size', title_x=0.5)
fig.update_layout(xaxis_title='Image Size', yaxis_title='Mean Dice')
fig.update_layout(height = 400, width = 700)
fig.show()

In [56]:
list_dfs = []
for i in [32,64,128]:
    list_dfs.append(df[df['img_size']==i])

stats.kruskal(list_dfs[0]['mean'].values, list_dfs[1]['mean'].values, list_dfs[2]['mean'].values) #p-value

KruskalResult(statistic=9.979999999999997, pvalue=0.006805664492230557)

In [58]:
listas_imgsizes = [list(lista['mean'].values) for lista in list_dfs]
posthoc_df({1:'SIZE 32',2:'SIZE 64', 3: 'SIZE 128'},\
    listas_imgsizes[0], listas_imgsizes[1], listas_imgsizes[2])

Unnamed: 0,SIZE 32,SIZE 64,SIZE 128
SIZE 32,1.0,1.0,0.070955
SIZE 64,1.0,1.0,0.007084
SIZE 128,0.070955,0.007084,1.0


In [38]:
df = df.groupby('img_size')[['mean', 'median', 'std', 'max', 'min', 'epochs','patience', 'batch']].mean()
df.to_excel('resume_by_imgsize.xlsx')