## Analaysis of results

In [1]:
import bayesiantests as bt
import matplotlib.pyplot as plt

#https://matplotlib.org/stable/gallery/lines_bars_and_markers/horizontal_barchart_distribution.html
def stacked_bar(results, category_names):
    """
    Parameters
    ----------
    results : dict
        A mapping from question labels to a list of answers per category.
        It is assumed all lists contain the same number of entries and that
        it matches the length of *category_names*.
    category_names : list of str
        The category labels.
    """
    labels = list(results.keys())
    data = np.array(list(results.values()))
    data_cum = data.cumsum(axis=1)
    category_colors = plt.colormaps['RdYlGn'](
        np.linspace(0.15, 0.85, data.shape[1]))

    fig, ax = plt.subplots(figsize=(9.2, 5))
    ax.invert_yaxis()
    ax.xaxis.set_visible(False)
    ax.set_xlim(0, np.sum(data, axis=1).max())

    for i, (colname, color) in enumerate(zip(category_names, category_colors)):
        widths = data[:, i]
        starts = data_cum[:, i] - widths
        rects = ax.barh(labels, widths, left=starts, height=0.5,
                        label=colname, color=color)

        r, g, b, _ = color
        text_color = 'white' if r * g * b < 0.5 else 'darkgrey'
        ax.bar_label(rects, label_type='center', color=text_color)
    ax.legend(ncols=len(category_names), bbox_to_anchor=(0, 1),
              loc='upper left', fontsize='small')

    return fig, ax

In [74]:
import pandas as pd
import numpy as np
df = pd.read_csv('5CV_MLP(AllTypes_1Repeat_new)_102datasets.csv')
df.rename(columns={'Unnamed: 0':'Metrics'}, inplace=True)
df

Unnamed: 0,Metrics,Dataset,MSE,BCE,WBCE,Pure_Fbeta,Any_Fbeta,WBCEFL,SPLFL,Pure_Gmean,Any_Gmean,WBCEGL,SPLGL,Pure_BAccu,Any_BAccu,WBCEBL,SPLBL
0,Acc,D,0.652,0.64,0.648,0.668,0.652,0.664,0.664,0.684,0.668,0.652,0.664,0.668,0.688,0.652,0.668
1,F1,S,0.5618,0.5453,0.6318,0.6728,0.6507,0.6584,0.6668,0.6332,0.6194,0.6262,0.6281,0.6307,0.6655,0.6207,0.6608
2,Gmean,1,0.6354,0.6264,0.6623,0.6882,0.6712,0.6849,0.686,0.6923,0.6741,0.6619,0.6793,0.6738,0.7036,0.6595,0.6892
3,B_Acc,C,0.6455,0.6337,0.691,0.7285,0.7062,0.7156,0.7229,0.7021,0.6872,0.6916,0.6913,0.7017,0.7295,0.6892,0.7187
4,Pre,S,0.5294,0.5016,0.5162,0.5281,0.5157,0.5246,0.5227,0.5499,0.5366,0.5159,0.5278,0.5304,0.5456,0.5139,0.5275
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
810,Gmean,105,0.943,0.9546,0.9668,0.9348,0.9572,0.9445,0.9351,0.9536,0.9712,0.9555,0.9532,0.9523,0.9738,0.9623,0.9511
811,B_Acc,C,0.9439,0.9553,0.967,0.9359,0.9578,0.9451,0.936,0.9538,0.9712,0.9556,0.9535,0.9525,0.9738,0.9624,0.9514
812,Pre,S,0.9219,0.9316,0.8114,0.9036,0.9472,0.8876,0.8725,0.737,0.8471,0.7608,0.7393,0.7377,0.8491,0.7934,0.7269
813,Rec,V,0.9027,0.9238,0.9783,0.8903,0.9257,0.9126,0.8978,0.9752,0.9771,0.9702,0.9733,0.9721,0.982,0.974,0.974


In [75]:
r = 0.005

In [76]:
# F1
df_f1 = df[df['Metrics'] == 'F1']
df_f1 = df_f1.drop('Dataset', axis=1)
df_f1 = df_f1.reset_index(drop=True)
df_f1 = pd.concat([df_f1.iloc[:,1:4], df_f1.iloc[:,4:8]], axis=1)
df_f1 = df_f1.astype(float)
print(len(df_f1))

# Average
print("==========", "Average", "==========")
print("MSE:",np.mean(df_f1.iloc[:,0]))
print("BCE:",np.mean(df_f1.iloc[:,1]))
print("WBCE:",np.mean(df_f1.iloc[:,2]))
print("Pure_Fbeta:",np.mean(df_f1.iloc[:,3]))
print("Any_Fbeta:",np.mean(df_f1.iloc[:,4]))
print("WBCEFL:",np.mean(df_f1.iloc[:,5]))
print("SPLFL:",np.mean(df_f1.iloc[:,6]))

# Bayesian Sign Test
print("============"*3, "Probability", "============"*3)
rope = r
baselines = ['MSE','BCE','WBCE']
ours = ['Pure_Fbeta','Any_Fbeta','WBCEFL','SPLFL']
dfs = [df_f1,df_f1,df_f1,df_f1]

comp = []
basewin = []
draw = []
ourswin = []
z = 0
for i in range(len(ours)):
    for j in range(len(baselines)):
#         print(z)
        names = (baselines[j],ours[i])
#         print(names)
        comp.append(names)
        X = np.array(dfs[i][[baselines[j],ours[i]]])
        left, within, right = bt.signtest(X, rope=rope, verbose=True, names=names)
#         print(left, within, right)
        basewin.append(left)
        draw.append(within)
        ourswin.append(right)        
results = pd.DataFrame(comp, columns = ["Baseline","Ours"])
results["Basewin_prob"] = basewin
results["Draw_prob"] = draw
results["Ourswin_prob"] = ourswin
results = np.round(results, 3)
results

102
MSE: 0.555031568627451
BCE: 0.5429868627450981
WBCE: 0.6561950980392157
Pure_Fbeta: 0.6624382352941176
Any_Fbeta: 0.6663637254901961
WBCEFL: 0.6663019607843137
SPLFL: 0.6597901960784315
P(MSE > Pure_Fbeta) = 0.0, P(rope) = 0.0, P(Pure_Fbeta > MSE) = 1.0
P(BCE > Pure_Fbeta) = 0.0, P(rope) = 0.0, P(Pure_Fbeta > BCE) = 1.0
P(WBCE > Pure_Fbeta) = 0.00668, P(rope) = 0.0003, P(Pure_Fbeta > WBCE) = 0.99302
P(MSE > Any_Fbeta) = 0.0, P(rope) = 0.00012, P(Any_Fbeta > MSE) = 0.99988
P(BCE > Any_Fbeta) = 0.0, P(rope) = 6e-05, P(Any_Fbeta > BCE) = 0.99994
P(WBCE > Any_Fbeta) = 0.0094, P(rope) = 0.00024, P(Any_Fbeta > WBCE) = 0.99036
P(MSE > WBCEFL) = 0.0, P(rope) = 0.00062, P(WBCEFL > MSE) = 0.99938
P(BCE > WBCEFL) = 0.0, P(rope) = 0.0, P(WBCEFL > BCE) = 1.0
P(WBCE > WBCEFL) = 0.0, P(rope) = 0.00102, P(WBCEFL > WBCE) = 0.99898
P(MSE > SPLFL) = 0.0, P(rope) = 0.0001, P(SPLFL > MSE) = 0.9999
P(BCE > SPLFL) = 0.0, P(rope) = 0.0, P(SPLFL > BCE) = 1.0
P(WBCE > SPLFL) = 0.04816, P(rope) = 0.00062, P(

Unnamed: 0,Baseline,Ours,Basewin_prob,Draw_prob,Ourswin_prob
0,MSE,Pure_Fbeta,0.0,0.0,1.0
1,BCE,Pure_Fbeta,0.0,0.0,1.0
2,WBCE,Pure_Fbeta,0.007,0.0,0.993
3,MSE,Any_Fbeta,0.0,0.0,1.0
4,BCE,Any_Fbeta,0.0,0.0,1.0
5,WBCE,Any_Fbeta,0.009,0.0,0.99
6,MSE,WBCEFL,0.0,0.001,0.999
7,BCE,WBCEFL,0.0,0.0,1.0
8,WBCE,WBCEFL,0.0,0.001,0.999
9,MSE,SPLFL,0.0,0.0,1.0


In [77]:
results['&'] = '&'
results[['Basewin_prob', '&','Draw_prob', '&', 'Ourswin_prob']]

Unnamed: 0,Basewin_prob,&,Draw_prob,&.1,Ourswin_prob
0,0.0,&,0.0,&,1.0
1,0.0,&,0.0,&,1.0
2,0.007,&,0.0,&,0.993
3,0.0,&,0.0,&,1.0
4,0.0,&,0.0,&,1.0
5,0.009,&,0.0,&,0.99
6,0.0,&,0.001,&,0.999
7,0.0,&,0.0,&,1.0
8,0.0,&,0.001,&,0.999
9,0.0,&,0.0,&,1.0


In [78]:
# Bayesian Sign Test
print("============"*3, "Probability", "============"*3)
rope = r
baselines = ['Pure_Fbeta','SPLFL']
ours = ['Any_Fbeta','WBCEFL']
dfs = [df_f1,df_f1]

comp = []
basewin = []
draw = []
ourswin = []
z = 0
for i in range(len(ours)):
    for j in range(len(baselines)):
#         print(z)
        names = (baselines[j],ours[i])
#         print(names)
        comp.append(names)
        X = np.array(dfs[i][[baselines[j],ours[i]]])
        left, within, right = bt.signtest(X, rope=rope, verbose=True, names=names)
#         print(left, within, right)
        basewin.append(left)
        draw.append(within)
        ourswin.append(right)        
results = pd.DataFrame(comp, columns = ["Baseline","Ours"])
results["Basewin_prob"] = basewin
results["Draw_prob"] = draw
results["Ourswin_prob"] = ourswin
results = np.round(results, 3)
results

P(Pure_Fbeta > Any_Fbeta) = 0.08058, P(rope) = 0.1838, P(Any_Fbeta > Pure_Fbeta) = 0.73562
P(SPLFL > Any_Fbeta) = 0.03154, P(rope) = 0.13944, P(Any_Fbeta > SPLFL) = 0.82902
P(Pure_Fbeta > WBCEFL) = 0.06364, P(rope) = 0.63696, P(WBCEFL > Pure_Fbeta) = 0.2994
P(SPLFL > WBCEFL) = 0.06158, P(rope) = 0.29696, P(WBCEFL > SPLFL) = 0.64146


Unnamed: 0,Baseline,Ours,Basewin_prob,Draw_prob,Ourswin_prob
0,Pure_Fbeta,Any_Fbeta,0.081,0.184,0.736
1,SPLFL,Any_Fbeta,0.032,0.139,0.829
2,Pure_Fbeta,WBCEFL,0.064,0.637,0.299
3,SPLFL,WBCEFL,0.062,0.297,0.641


In [79]:
results['&'] = '&'
results[['Basewin_prob', '&','Draw_prob', '&', 'Ourswin_prob']]

Unnamed: 0,Basewin_prob,&,Draw_prob,&.1,Ourswin_prob
0,0.081,&,0.184,&,0.736
1,0.032,&,0.139,&,0.829
2,0.064,&,0.637,&,0.299
3,0.062,&,0.297,&,0.641


In [80]:
# Bayesian Sign Test
print("============"*3, "Probability", "============"*3)
rope = r
baselines = ['WBCEFL']
ours = ['Any_Fbeta']
dfs = [df_f1]

comp = []
basewin = []
draw = []
ourswin = []
z = 0
for i in range(len(ours)):
    for j in range(len(baselines)):
#         print(z)
        names = (baselines[j],ours[i])
#         print(names)
        comp.append(names)
        X = np.array(dfs[i][[baselines[j],ours[i]]])
        left, within, right = bt.signtest(X, rope=rope, verbose=True, names=names)
#         print(left, within, right)
        basewin.append(left)
        draw.append(within)
        ourswin.append(right)        
results = pd.DataFrame(comp, columns = ["Baseline","Ours"])
results["Basewin_prob"] = basewin
results["Draw_prob"] = draw
results["Ourswin_prob"] = ourswin
results = np.round(results, 3)
results

P(WBCEFL > Any_Fbeta) = 0.3067, P(rope) = 0.31002, P(Any_Fbeta > WBCEFL) = 0.38328


Unnamed: 0,Baseline,Ours,Basewin_prob,Draw_prob,Ourswin_prob
0,WBCEFL,Any_Fbeta,0.307,0.31,0.383


In [81]:
results['&'] = '&'
results[['Basewin_prob', '&','Draw_prob', '&', 'Ourswin_prob']]

Unnamed: 0,Basewin_prob,&,Draw_prob,&.1,Ourswin_prob
0,0.307,&,0.31,&,0.383


In [82]:
# Gmean
df_gm = df[df['Metrics'] == 'Gmean']
df_gm = df_gm.drop('Dataset', axis=1)
df_gm = df_gm.reset_index(drop=True)
df_gm = pd.concat([df_gm.iloc[:,1:4], df_gm.iloc[:,8:12]], axis=1)
df_gm = df_gm.astype(float)
print(len(df_gm))

# Average
print("==========", "Average", "==========")
print("MSE:",np.mean(df_gm.iloc[:,0]))
print("BCE:",np.mean(df_gm.iloc[:,1]))
print("WBCE:",np.mean(df_gm.iloc[:,2]))
print("Pure_Gmean:",np.mean(df_gm.iloc[:,3]))
print("Any_Gmean:",np.mean(df_gm.iloc[:,4]))
print("WBCEGL:",np.mean(df_gm.iloc[:,5]))
print("SPLGL:",np.mean(df_gm.iloc[:,6]))

# Bayesian Sign Test
print("============"*3, "Probability", "============"*3)
rope = r
baselines = ['MSE','BCE','WBCE']
ours = ['Pure_Gmean','Any_Gmean','WBCEGL','SPLGL']
dfs = [df_gm,df_gm,df_gm,df_gm]

comp = []
basewin = []
draw = []
ourswin = []
z = 0
for i in range(len(ours)):
    for j in range(len(baselines)):
#         print(z)
        names = (baselines[j],ours[i])
#         print(names)
        comp.append(names)
        X = np.array(dfs[i][[baselines[j],ours[i]]])
        left, within, right = bt.signtest(X, rope=rope, verbose=True, names=names)
#         print(left, within, right)
        basewin.append(left)
        draw.append(within)
        ourswin.append(right)        
results = pd.DataFrame(comp, columns = ["Baseline","Ours"])
results["Basewin_prob"] = basewin
results["Draw_prob"] = draw
results["Ourswin_prob"] = ourswin
results = np.round(results, 3)
results

102
MSE: 0.6048629411764704
BCE: 0.5887368627450981
WBCE: 0.8117588235294119
Pure_Gmean: 0.8068970588235294
Any_Gmean: 0.8121725490196077
WBCEGL: 0.8092186274509804
SPLGL: 0.808121568627451
P(MSE > Pure_Gmean) = 0.0, P(rope) = 0.0, P(Pure_Gmean > MSE) = 1.0
P(BCE > Pure_Gmean) = 0.0, P(rope) = 0.0, P(Pure_Gmean > BCE) = 1.0
P(WBCE > Pure_Gmean) = 0.02388, P(rope) = 0.97108, P(Pure_Gmean > WBCE) = 0.00504
P(MSE > Any_Gmean) = 0.0, P(rope) = 0.0, P(Any_Gmean > MSE) = 1.0
P(BCE > Any_Gmean) = 0.0, P(rope) = 0.0, P(Any_Gmean > BCE) = 1.0
P(WBCE > Any_Gmean) = 0.02686, P(rope) = 0.3936, P(Any_Gmean > WBCE) = 0.57954
P(MSE > WBCEGL) = 0.0, P(rope) = 0.0, P(WBCEGL > MSE) = 1.0
P(BCE > WBCEGL) = 0.0, P(rope) = 0.0, P(WBCEGL > BCE) = 1.0
P(WBCE > WBCEGL) = 0.0021, P(rope) = 0.99788, P(WBCEGL > WBCE) = 2e-05
P(MSE > SPLGL) = 0.0, P(rope) = 0.0, P(SPLGL > MSE) = 1.0
P(BCE > SPLGL) = 0.0, P(rope) = 0.0, P(SPLGL > BCE) = 1.0
P(WBCE > SPLGL) = 0.1689, P(rope) = 0.8109, P(SPLGL > WBCE) = 0.0202


Unnamed: 0,Baseline,Ours,Basewin_prob,Draw_prob,Ourswin_prob
0,MSE,Pure_Gmean,0.0,0.0,1.0
1,BCE,Pure_Gmean,0.0,0.0,1.0
2,WBCE,Pure_Gmean,0.024,0.971,0.005
3,MSE,Any_Gmean,0.0,0.0,1.0
4,BCE,Any_Gmean,0.0,0.0,1.0
5,WBCE,Any_Gmean,0.027,0.394,0.58
6,MSE,WBCEGL,0.0,0.0,1.0
7,BCE,WBCEGL,0.0,0.0,1.0
8,WBCE,WBCEGL,0.002,0.998,0.0
9,MSE,SPLGL,0.0,0.0,1.0


In [83]:
results['&'] = '&'
results[['Basewin_prob', '&','Draw_prob', '&', 'Ourswin_prob']]

Unnamed: 0,Basewin_prob,&,Draw_prob,&.1,Ourswin_prob
0,0.0,&,0.0,&,1.0
1,0.0,&,0.0,&,1.0
2,0.024,&,0.971,&,0.005
3,0.0,&,0.0,&,1.0
4,0.0,&,0.0,&,1.0
5,0.027,&,0.394,&,0.58
6,0.0,&,0.0,&,1.0
7,0.0,&,0.0,&,1.0
8,0.002,&,0.998,&,0.0
9,0.0,&,0.0,&,1.0


In [84]:
# Bayesian Sign Test
print("============"*3, "Probability", "============"*3)
rope = r
baselines = ['Pure_Gmean','SPLGL']
ours = ['Any_Gmean','WBCEGL']
dfs = [df_gm,df_gm]

comp = []
basewin = []
draw = []
ourswin = []
z = 0
for i in range(len(ours)):
    for j in range(len(baselines)):
#         print(z)
        names = (baselines[j],ours[i])
#         print(names)
        comp.append(names)
        X = np.array(dfs[i][[baselines[j],ours[i]]])
        left, within, right = bt.signtest(X, rope=rope, verbose=True, names=names)
#         print(left, within, right)
        basewin.append(left)
        draw.append(within)
        ourswin.append(right)        
results = pd.DataFrame(comp, columns = ["Baseline","Ours"])
results["Basewin_prob"] = basewin
results["Draw_prob"] = draw
results["Ourswin_prob"] = ourswin
results = np.round(results, 3)
results

P(Pure_Gmean > Any_Gmean) = 0.00116, P(rope) = 0.71012, P(Any_Gmean > Pure_Gmean) = 0.28872
P(SPLGL > Any_Gmean) = 0.0, P(rope) = 0.86238, P(Any_Gmean > SPLGL) = 0.13762
P(Pure_Gmean > WBCEGL) = 0.01338, P(rope) = 0.9366, P(WBCEGL > Pure_Gmean) = 0.05002
P(SPLGL > WBCEGL) = 0.00488, P(rope) = 0.9106, P(WBCEGL > SPLGL) = 0.08452


Unnamed: 0,Baseline,Ours,Basewin_prob,Draw_prob,Ourswin_prob
0,Pure_Gmean,Any_Gmean,0.001,0.71,0.289
1,SPLGL,Any_Gmean,0.0,0.862,0.138
2,Pure_Gmean,WBCEGL,0.013,0.937,0.05
3,SPLGL,WBCEGL,0.005,0.911,0.085


In [85]:
results['&'] = '&'
results[['Basewin_prob', '&','Draw_prob', '&', 'Ourswin_prob']]

Unnamed: 0,Basewin_prob,&,Draw_prob,&.1,Ourswin_prob
0,0.001,&,0.71,&,0.289
1,0.0,&,0.862,&,0.138
2,0.013,&,0.937,&,0.05
3,0.005,&,0.911,&,0.085


In [86]:
# Bayesian Sign Test
print("============"*3, "Probability", "============"*3)
rope = r
baselines = ['WBCEGL']
ours = ['Any_Gmean']
dfs = [df_gm]

comp = []
basewin = []
draw = []
ourswin = []
z = 0
for i in range(len(ours)):
    for j in range(len(baselines)):
#         print(z)
        names = (baselines[j],ours[i])
#         print(names)
        comp.append(names)
        X = np.array(dfs[i][[baselines[j],ours[i]]])
        left, within, right = bt.signtest(X, rope=rope, verbose=True, names=names)
#         print(left, within, right)
        basewin.append(left)
        draw.append(within)
        ourswin.append(right)        
results = pd.DataFrame(comp, columns = ["Baseline","Ours"])
results["Basewin_prob"] = basewin
results["Draw_prob"] = draw
results["Ourswin_prob"] = ourswin
results = np.round(results, 3)
results

P(WBCEGL > Any_Gmean) = 0.0025, P(rope) = 0.67286, P(Any_Gmean > WBCEGL) = 0.32464


Unnamed: 0,Baseline,Ours,Basewin_prob,Draw_prob,Ourswin_prob
0,WBCEGL,Any_Gmean,0.002,0.673,0.325


In [87]:
results['&'] = '&'
results[['Basewin_prob', '&','Draw_prob', '&', 'Ourswin_prob']]

Unnamed: 0,Basewin_prob,&,Draw_prob,&.1,Ourswin_prob
0,0.002,&,0.673,&,0.325


In [88]:
# B_Accuracy
df_ba = df[df['Metrics'] == 'B_Acc']
df_ba = df_ba.drop('Dataset', axis=1)
df_ba = df_ba.reset_index(drop=True)
df_ba = pd.concat([df_ba.iloc[:,1:4], df_ba.iloc[:,12:16]], axis=1)
df_ba = df_ba.astype(float)
print(len(df_ba))

# Average
print("==========", "Average", "==========")
print("MSE:",np.mean(df_ba.iloc[:,0]))
print("BCE:",np.mean(df_ba.iloc[:,1]))
print("WBCE:",np.mean(df_ba.iloc[:,2]))
print("Pure_BAccu:",np.mean(df_ba.iloc[:,3]))
print("Any_BAccu:",np.mean(df_ba.iloc[:,4]))
print("WBCEBL:",np.mean(df_ba.iloc[:,5]))
print("SPLBL:",np.mean(df_ba.iloc[:,6]))

# Bayesian Sign Test
print("============"*3, "Probability", "============"*3)
rope = r
baselines = ['MSE','BCE','WBCE']
ours = ['Pure_BAccu','Any_BAccu','WBCEBL','SPLBL']
dfs = [df_ba,df_ba,df_ba,df_ba]

comp = []
basewin = []
draw = []
ourswin = []
z = 0
for i in range(len(ours)):
    for j in range(len(baselines)):
#         print(z)
        names = (baselines[j],ours[i])
#         print(names)
        comp.append(names)
        X = np.array(dfs[i][[baselines[j],ours[i]]])
        left, within, right = bt.signtest(X, rope=rope, verbose=True, names=names)
#         print(left, within, right)
        basewin.append(left)
        draw.append(within)
        ourswin.append(right)        
results = pd.DataFrame(comp, columns = ["Baseline","Ours"])
results["Basewin_prob"] = basewin
results["Draw_prob"] = draw
results["Ourswin_prob"] = ourswin
results = np.round(results, 3)
results

102
MSE: 0.7418735294117648
BCE: 0.7373264705882352
WBCE: 0.816313725490196
Pure_BAccu: 0.8150117647058824
Any_BAccu: 0.8129245098039216
WBCEBL: 0.8160196078431371
SPLBL: 0.8132480392156863
P(MSE > Pure_BAccu) = 0.0, P(rope) = 0.0, P(Pure_BAccu > MSE) = 1.0
P(BCE > Pure_BAccu) = 0.0, P(rope) = 0.0, P(Pure_BAccu > BCE) = 1.0
P(WBCE > Pure_BAccu) = 0.08118, P(rope) = 0.90334, P(Pure_BAccu > WBCE) = 0.01548
P(MSE > Any_BAccu) = 0.0, P(rope) = 0.0, P(Any_BAccu > MSE) = 1.0
P(BCE > Any_BAccu) = 0.0, P(rope) = 0.0, P(Any_BAccu > BCE) = 1.0
P(WBCE > Any_BAccu) = 0.12832, P(rope) = 0.79806, P(Any_BAccu > WBCE) = 0.07362
P(MSE > WBCEBL) = 0.0, P(rope) = 0.0, P(WBCEBL > MSE) = 1.0
P(BCE > WBCEBL) = 0.0, P(rope) = 0.0, P(WBCEBL > BCE) = 1.0
P(WBCE > WBCEBL) = 0.0001, P(rope) = 0.99984, P(WBCEBL > WBCE) = 6e-05
P(MSE > SPLBL) = 0.0, P(rope) = 0.0, P(SPLBL > MSE) = 1.0
P(BCE > SPLBL) = 0.0, P(rope) = 0.0, P(SPLBL > BCE) = 1.0
P(WBCE > SPLBL) = 0.01846, P(rope) = 0.98152, P(SPLBL > WBCE) = 2e-05


Unnamed: 0,Baseline,Ours,Basewin_prob,Draw_prob,Ourswin_prob
0,MSE,Pure_BAccu,0.0,0.0,1.0
1,BCE,Pure_BAccu,0.0,0.0,1.0
2,WBCE,Pure_BAccu,0.081,0.903,0.015
3,MSE,Any_BAccu,0.0,0.0,1.0
4,BCE,Any_BAccu,0.0,0.0,1.0
5,WBCE,Any_BAccu,0.128,0.798,0.074
6,MSE,WBCEBL,0.0,0.0,1.0
7,BCE,WBCEBL,0.0,0.0,1.0
8,WBCE,WBCEBL,0.0,1.0,0.0
9,MSE,SPLBL,0.0,0.0,1.0


In [89]:
results['&'] = '&'
results[['Basewin_prob', '&','Draw_prob', '&', 'Ourswin_prob']]

Unnamed: 0,Basewin_prob,&,Draw_prob,&.1,Ourswin_prob
0,0.0,&,0.0,&,1.0
1,0.0,&,0.0,&,1.0
2,0.081,&,0.903,&,0.015
3,0.0,&,0.0,&,1.0
4,0.0,&,0.0,&,1.0
5,0.128,&,0.798,&,0.074
6,0.0,&,0.0,&,1.0
7,0.0,&,0.0,&,1.0
8,0.0,&,1.0,&,0.0
9,0.0,&,0.0,&,1.0


In [90]:
# Bayesian Sign Test
print("============"*3, "Probability", "============"*3)
rope = r
baselines = ['Pure_BAccu','SPLBL']
ours = ['Any_BAccu','WBCEBL']
dfs = [df_ba,df_ba]

comp = []
basewin = []
draw = []
ourswin = []
z = 0
for i in range(len(ours)):
    for j in range(len(baselines)):
#         print(z)
        names = (baselines[j],ours[i])
#         print(names)
        comp.append(names)
        X = np.array(dfs[i][[baselines[j],ours[i]]])
        left, within, right = bt.signtest(X, rope=rope, verbose=True, names=names)
#         print(left, within, right)
        basewin.append(left)
        draw.append(within)
        ourswin.append(right)        
results = pd.DataFrame(comp, columns = ["Baseline","Ours"])
results["Basewin_prob"] = basewin
results["Draw_prob"] = draw
results["Ourswin_prob"] = ourswin
results = np.round(results, 3)
results

P(Pure_BAccu > Any_BAccu) = 0.0635, P(rope) = 0.85212, P(Any_BAccu > Pure_BAccu) = 0.08438
P(SPLBL > Any_BAccu) = 0.12722, P(rope) = 0.266, P(Any_BAccu > SPLBL) = 0.60678
P(Pure_BAccu > WBCEBL) = 0.08148, P(rope) = 0.73658, P(WBCEBL > Pure_BAccu) = 0.18194
P(SPLBL > WBCEBL) = 0.00818, P(rope) = 0.66964, P(WBCEBL > SPLBL) = 0.32218


Unnamed: 0,Baseline,Ours,Basewin_prob,Draw_prob,Ourswin_prob
0,Pure_BAccu,Any_BAccu,0.064,0.852,0.084
1,SPLBL,Any_BAccu,0.127,0.266,0.607
2,Pure_BAccu,WBCEBL,0.081,0.737,0.182
3,SPLBL,WBCEBL,0.008,0.67,0.322


In [91]:
results['&'] = '&'
results[['Basewin_prob', '&','Draw_prob', '&', 'Ourswin_prob']]

Unnamed: 0,Basewin_prob,&,Draw_prob,&.1,Ourswin_prob
0,0.064,&,0.852,&,0.084
1,0.127,&,0.266,&,0.607
2,0.081,&,0.737,&,0.182
3,0.008,&,0.67,&,0.322


In [92]:
# Bayesian Sign Test
print("============"*3, "Probability", "============"*3)
rope = r
baselines = ['WBCEBL']
ours = ['Any_BAccu']
dfs = [df_ba]

comp = []
basewin = []
draw = []
ourswin = []
z = 0
for i in range(len(ours)):
    for j in range(len(baselines)):
#         print(z)
        names = (baselines[j],ours[i])
#         print(names)
        comp.append(names)
        X = np.array(dfs[i][[baselines[j],ours[i]]])
        left, within, right = bt.signtest(X, rope=rope, verbose=True, names=names)
#         print(left, within, right)
        basewin.append(left)
        draw.append(within)
        ourswin.append(right)        
results = pd.DataFrame(comp, columns = ["Baseline","Ours"])
results["Basewin_prob"] = basewin
results["Draw_prob"] = draw
results["Ourswin_prob"] = ourswin
results = np.round(results, 3)
results

P(WBCEBL > Any_BAccu) = 0.08708, P(rope) = 0.8482, P(Any_BAccu > WBCEBL) = 0.06472


Unnamed: 0,Baseline,Ours,Basewin_prob,Draw_prob,Ourswin_prob
0,WBCEBL,Any_BAccu,0.087,0.848,0.065


In [93]:
results['&'] = '&'
results[['Basewin_prob', '&','Draw_prob', '&', 'Ourswin_prob']]

Unnamed: 0,Basewin_prob,&,Draw_prob,&.1,Ourswin_prob
0,0.087,&,0.848,&,0.065


# Imbalance Analysis

In [4]:
import pandas as pd
import numpy as np
df = pd.read_csv('5CV_MLP(AllTypes_1Repeat_new)_102datasets.csv')
df.rename(columns={'Unnamed: 0':'Metrics'}, inplace=True)
df

Unnamed: 0,Metrics,Dataset,MSE,BCE,WBCE,Pure_Fbeta,Any_Fbeta,WBCEFL,SPLFL,Pure_Gmean,Any_Gmean,WBCEGL,SPLGL,Pure_BAccu,Any_BAccu,WBCEBL,SPLBL
0,Acc,D,0.652,0.64,0.648,0.668,0.652,0.664,0.664,0.684,0.668,0.652,0.664,0.668,0.688,0.652,0.668
1,F1,S,0.5618,0.5453,0.6318,0.6728,0.6507,0.6584,0.6668,0.6332,0.6194,0.6262,0.6281,0.6307,0.6655,0.6207,0.6608
2,Gmean,1,0.6354,0.6264,0.6623,0.6882,0.6712,0.6849,0.686,0.6923,0.6741,0.6619,0.6793,0.6738,0.7036,0.6595,0.6892
3,B_Acc,C,0.6455,0.6337,0.691,0.7285,0.7062,0.7156,0.7229,0.7021,0.6872,0.6916,0.6913,0.7017,0.7295,0.6892,0.7187
4,Pre,S,0.5294,0.5016,0.5162,0.5281,0.5157,0.5246,0.5227,0.5499,0.5366,0.5159,0.5278,0.5304,0.5456,0.5139,0.5275
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
810,Gmean,105,0.943,0.9546,0.9668,0.9348,0.9572,0.9445,0.9351,0.9536,0.9712,0.9555,0.9532,0.9523,0.9738,0.9623,0.9511
811,B_Acc,C,0.9439,0.9553,0.967,0.9359,0.9578,0.9451,0.936,0.9538,0.9712,0.9556,0.9535,0.9525,0.9738,0.9624,0.9514
812,Pre,S,0.9219,0.9316,0.8114,0.9036,0.9472,0.8876,0.8725,0.737,0.8471,0.7608,0.7393,0.7377,0.8491,0.7934,0.7269
813,Rec,V,0.9027,0.9238,0.9783,0.8903,0.9257,0.9126,0.8978,0.9752,0.9771,0.9702,0.9733,0.9721,0.982,0.974,0.974


In [5]:
df_a = df.drop('Pure_Fbeta', axis=1)
df_a = df_a.drop('SPLFL', axis=1)
df_a = df_a.drop('Pure_Gmean', axis=1)
df_a = df_a.drop('SPLGL', axis=1)
df_a = df_a.drop('Pure_BAccu', axis=1)
df_a = df_a.drop('SPLBL', axis=1)
df_a[:14]

Unnamed: 0,Metrics,Dataset,MSE,BCE,WBCE,Any_Fbeta,WBCEFL,Any_Gmean,WBCEGL,Any_BAccu,WBCEBL
0,Acc,D,0.652,0.64,0.648,0.652,0.664,0.668,0.652,0.688,0.652
1,F1,S,0.5618,0.5453,0.6318,0.6507,0.6584,0.6194,0.6262,0.6655,0.6207
2,Gmean,1,0.6354,0.6264,0.6623,0.6712,0.6849,0.6741,0.6619,0.7036,0.6595
3,B_Acc,C,0.6455,0.6337,0.691,0.7062,0.7156,0.6872,0.6916,0.7295,0.6892
4,Pre,S,0.5294,0.5016,0.5162,0.5157,0.5246,0.5366,0.5159,0.5456,0.5139
5,Rec,V,0.6222,0.6111,0.8444,0.9,0.9,0.7556,0.8333,0.8778,0.8222
6,Spe,,0.6687,0.6562,0.5375,0.5125,0.5313,0.6188,0.55,0.5813,0.5562
7,,Dataset,MSE,BCE,WBCE,Any_Fbeta,WBCEFL,Any_Gmean,WBCEGL,Any_BAccu,WBCEBL
8,Acc,D,0.828,0.824,0.792,0.812,0.804,0.812,0.8,0.808,0.788
9,F1,S,0.7648,0.7602,0.7413,0.7476,0.7456,0.7508,0.7414,0.7444,0.7294


In [6]:
df_b = df_a[df_a.Metrics != 'Acc']
df_b = df_b[df_b.Metrics != 'Pre']
df_b = df_b[df_b.Metrics != 'Rec']
df_b = df_b[df_b.Metrics != 'Spe']
df_b = df_b[df_b.Dataset != 'Dataset']
df_b = df_b.reset_index(drop=True)
df_b

Unnamed: 0,Metrics,Dataset,MSE,BCE,WBCE,Any_Fbeta,WBCEFL,Any_Gmean,WBCEGL,Any_BAccu,WBCEBL
0,F1,S,0.5618,0.5453,0.6318,0.6507,0.6584,0.6194,0.6262,0.6655,0.6207
1,Gmean,1,0.6354,0.6264,0.6623,0.6712,0.6849,0.6741,0.6619,0.7036,0.6595
2,B_Acc,C,0.6455,0.6337,0.691,0.7062,0.7156,0.6872,0.6916,0.7295,0.6892
3,F1,S,0.7648,0.7602,0.7413,0.7476,0.7456,0.7508,0.7414,0.7444,0.7294
4,Gmean,2,0.804,0.8016,0.7854,0.7906,0.7894,0.7935,0.7865,0.7877,0.7782
...,...,...,...,...,...,...,...,...,...,...,...
301,Gmean,104,0.7693,0.7652,0.8061,0.7966,0.8014,0.8015,0.8038,0.7996,0.8003
302,B_Acc,C,0.7778,0.7743,0.8065,0.7971,0.8016,0.8022,0.8041,0.8007,0.8009
303,F1,S,0.912,0.9273,0.8865,0.9362,0.8998,0.9071,0.8528,0.9102,0.8743
304,Gmean,105,0.943,0.9546,0.9668,0.9572,0.9445,0.9712,0.9555,0.9738,0.9623


In [7]:
datnum = list(range(1, 106))
datnum.remove(23)
datnum.remove(82)
datnum.remove(84)
print(len(datnum), datnum)

102 [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79, 80, 81, 83, 85, 86, 87, 88, 89, 90, 91, 92, 93, 94, 95, 96, 97, 98, 99, 100, 101, 102, 103, 104, 105]


In [8]:
datanum=[]
for i in range(len(datnum)):
    datanum.append(datnum[i])
    datanum.append(datnum[i])
    datanum.append(datnum[i])
print(len(datanum), datanum)

306 [1, 1, 1, 2, 2, 2, 3, 3, 3, 4, 4, 4, 5, 5, 5, 6, 6, 6, 7, 7, 7, 8, 8, 8, 9, 9, 9, 10, 10, 10, 11, 11, 11, 12, 12, 12, 13, 13, 13, 14, 14, 14, 15, 15, 15, 16, 16, 16, 17, 17, 17, 18, 18, 18, 19, 19, 19, 20, 20, 20, 21, 21, 21, 22, 22, 22, 24, 24, 24, 25, 25, 25, 26, 26, 26, 27, 27, 27, 28, 28, 28, 29, 29, 29, 30, 30, 30, 31, 31, 31, 32, 32, 32, 33, 33, 33, 34, 34, 34, 35, 35, 35, 36, 36, 36, 37, 37, 37, 38, 38, 38, 39, 39, 39, 40, 40, 40, 41, 41, 41, 42, 42, 42, 43, 43, 43, 44, 44, 44, 45, 45, 45, 46, 46, 46, 47, 47, 47, 48, 48, 48, 49, 49, 49, 50, 50, 50, 51, 51, 51, 52, 52, 52, 53, 53, 53, 54, 54, 54, 55, 55, 55, 56, 56, 56, 57, 57, 57, 58, 58, 58, 59, 59, 59, 60, 60, 60, 61, 61, 61, 62, 62, 62, 63, 63, 63, 64, 64, 64, 65, 65, 65, 66, 66, 66, 67, 67, 67, 68, 68, 68, 69, 69, 69, 70, 70, 70, 71, 71, 71, 72, 72, 72, 73, 73, 73, 74, 74, 74, 75, 75, 75, 76, 76, 76, 77, 77, 77, 78, 78, 78, 79, 79, 79, 80, 80, 80, 81, 81, 81, 83, 83, 83, 85, 85, 85, 86, 86, 86, 87, 87, 87, 88, 88, 88, 89

In [9]:
df_b["Dataset"] = datanum
df_b

Unnamed: 0,Metrics,Dataset,MSE,BCE,WBCE,Any_Fbeta,WBCEFL,Any_Gmean,WBCEGL,Any_BAccu,WBCEBL
0,F1,1,0.5618,0.5453,0.6318,0.6507,0.6584,0.6194,0.6262,0.6655,0.6207
1,Gmean,1,0.6354,0.6264,0.6623,0.6712,0.6849,0.6741,0.6619,0.7036,0.6595
2,B_Acc,1,0.6455,0.6337,0.691,0.7062,0.7156,0.6872,0.6916,0.7295,0.6892
3,F1,2,0.7648,0.7602,0.7413,0.7476,0.7456,0.7508,0.7414,0.7444,0.7294
4,Gmean,2,0.804,0.8016,0.7854,0.7906,0.7894,0.7935,0.7865,0.7877,0.7782
...,...,...,...,...,...,...,...,...,...,...,...
301,Gmean,104,0.7693,0.7652,0.8061,0.7966,0.8014,0.8015,0.8038,0.7996,0.8003
302,B_Acc,104,0.7778,0.7743,0.8065,0.7971,0.8016,0.8022,0.8041,0.8007,0.8009
303,F1,105,0.912,0.9273,0.8865,0.9362,0.8998,0.9071,0.8528,0.9102,0.8743
304,Gmean,105,0.943,0.9546,0.9668,0.9572,0.9445,0.9712,0.9555,0.9738,0.9623


In [10]:
df_b["AF-MSE"] = df_b["Any_Fbeta"].astype(float) - df_b["MSE"].astype(float)
df_b["AF-BCE"] = df_b["Any_Fbeta"].astype(float) - df_b["BCE"].astype(float)
df_b["AF-WBCE"] = df_b["Any_Fbeta"].astype(float) - df_b["WBCE"].astype(float)
df_b["WF-MSE"] = df_b["WBCEFL"].astype(float) - df_b["MSE"].astype(float)
df_b["WF-BCE"] = df_b["WBCEFL"].astype(float) - df_b["BCE"].astype(float)
df_b["WF-WBCE"] = df_b["WBCEFL"].astype(float) - df_b["WBCE"].astype(float)
df_b["AG-MSE"] = df_b["Any_Gmean"].astype(float) - df_b["MSE"].astype(float)
df_b["AG-BCE"] = df_b["Any_Gmean"].astype(float) - df_b["BCE"].astype(float)
df_b["AG-WBCE"] = df_b["Any_Gmean"].astype(float) - df_b["WBCE"].astype(float)
df_b["WG-MSE"] = df_b["WBCEGL"].astype(float) - df_b["MSE"].astype(float)
df_b["WG-BCE"] = df_b["WBCEGL"].astype(float) - df_b["BCE"].astype(float)
df_b["WG-WBCE"] = df_b["WBCEGL"].astype(float) - df_b["WBCE"].astype(float)
df_b["AB-MSE"] = df_b["Any_BAccu"].astype(float) - df_b["MSE"].astype(float)
df_b["AB-BCE"] = df_b["Any_BAccu"].astype(float) - df_b["BCE"].astype(float)
df_b["AB-WBCE"] = df_b["Any_BAccu"].astype(float) - df_b["WBCE"].astype(float)
df_b["WB-MSE"] = df_b["WBCEBL"].astype(float) - df_b["MSE"].astype(float)
df_b["WB-BCE"] = df_b["WBCEBL"].astype(float) - df_b["BCE"].astype(float)
df_b["WB-WBCE"] = df_b["WBCEBL"].astype(float) - df_b["WBCE"].astype(float)
df_b

Unnamed: 0,Metrics,Dataset,MSE,BCE,WBCE,Any_Fbeta,WBCEFL,Any_Gmean,WBCEGL,Any_BAccu,...,AG-WBCE,WG-MSE,WG-BCE,WG-WBCE,AB-MSE,AB-BCE,AB-WBCE,WB-MSE,WB-BCE,WB-WBCE
0,F1,1,0.5618,0.5453,0.6318,0.6507,0.6584,0.6194,0.6262,0.6655,...,-0.0124,0.0644,0.0809,-0.0056,0.1037,0.1202,0.0337,0.0589,0.0754,-0.0111
1,Gmean,1,0.6354,0.6264,0.6623,0.6712,0.6849,0.6741,0.6619,0.7036,...,0.0118,0.0265,0.0355,-0.0004,0.0682,0.0772,0.0413,0.0241,0.0331,-0.0028
2,B_Acc,1,0.6455,0.6337,0.691,0.7062,0.7156,0.6872,0.6916,0.7295,...,-0.0038,0.0461,0.0579,0.0006,0.0840,0.0958,0.0385,0.0437,0.0555,-0.0018
3,F1,2,0.7648,0.7602,0.7413,0.7476,0.7456,0.7508,0.7414,0.7444,...,0.0095,-0.0234,-0.0188,0.0001,-0.0204,-0.0158,0.0031,-0.0354,-0.0308,-0.0119
4,Gmean,2,0.804,0.8016,0.7854,0.7906,0.7894,0.7935,0.7865,0.7877,...,0.0081,-0.0175,-0.0151,0.0011,-0.0163,-0.0139,0.0023,-0.0258,-0.0234,-0.0072
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
301,Gmean,104,0.7693,0.7652,0.8061,0.7966,0.8014,0.8015,0.8038,0.7996,...,-0.0046,0.0345,0.0386,-0.0023,0.0303,0.0344,-0.0065,0.0310,0.0351,-0.0058
302,B_Acc,104,0.7778,0.7743,0.8065,0.7971,0.8016,0.8022,0.8041,0.8007,...,-0.0043,0.0263,0.0298,-0.0024,0.0229,0.0264,-0.0058,0.0231,0.0266,-0.0056
303,F1,105,0.912,0.9273,0.8865,0.9362,0.8998,0.9071,0.8528,0.9102,...,0.0206,-0.0592,-0.0745,-0.0337,-0.0018,-0.0171,0.0237,-0.0377,-0.0530,-0.0122
304,Gmean,105,0.943,0.9546,0.9668,0.9572,0.9445,0.9712,0.9555,0.9738,...,0.0044,0.0125,0.0009,-0.0113,0.0308,0.0192,0.0070,0.0193,0.0077,-0.0045


In [11]:
df_b = df_b.drop('MSE', axis=1)
df_b = df_b.drop('BCE', axis=1)
df_b = df_b.drop('WBCE', axis=1)
df_b = df_b.drop('Any_Fbeta', axis=1)
df_b = df_b.drop('WBCEFL', axis=1)
df_b = df_b.drop('Any_Gmean', axis=1)
df_b = df_b.drop('WBCEGL', axis=1)
df_b = df_b.drop('Any_BAccu', axis=1)
df_b = df_b.drop('WBCEBL', axis=1)
df_b

Unnamed: 0,Metrics,Dataset,AF-MSE,AF-BCE,AF-WBCE,WF-MSE,WF-BCE,WF-WBCE,AG-MSE,AG-BCE,AG-WBCE,WG-MSE,WG-BCE,WG-WBCE,AB-MSE,AB-BCE,AB-WBCE,WB-MSE,WB-BCE,WB-WBCE
0,F1,1,0.0889,0.1054,0.0189,0.0966,0.1131,0.0266,0.0576,0.0741,-0.0124,0.0644,0.0809,-0.0056,0.1037,0.1202,0.0337,0.0589,0.0754,-0.0111
1,Gmean,1,0.0358,0.0448,0.0089,0.0495,0.0585,0.0226,0.0387,0.0477,0.0118,0.0265,0.0355,-0.0004,0.0682,0.0772,0.0413,0.0241,0.0331,-0.0028
2,B_Acc,1,0.0607,0.0725,0.0152,0.0701,0.0819,0.0246,0.0417,0.0535,-0.0038,0.0461,0.0579,0.0006,0.0840,0.0958,0.0385,0.0437,0.0555,-0.0018
3,F1,2,-0.0172,-0.0126,0.0063,-0.0192,-0.0146,0.0043,-0.0140,-0.0094,0.0095,-0.0234,-0.0188,0.0001,-0.0204,-0.0158,0.0031,-0.0354,-0.0308,-0.0119
4,Gmean,2,-0.0134,-0.0110,0.0052,-0.0146,-0.0122,0.0040,-0.0105,-0.0081,0.0081,-0.0175,-0.0151,0.0011,-0.0163,-0.0139,0.0023,-0.0258,-0.0234,-0.0072
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
301,Gmean,104,0.0273,0.0314,-0.0095,0.0321,0.0362,-0.0047,0.0322,0.0363,-0.0046,0.0345,0.0386,-0.0023,0.0303,0.0344,-0.0065,0.0310,0.0351,-0.0058
302,B_Acc,104,0.0193,0.0228,-0.0094,0.0238,0.0273,-0.0049,0.0244,0.0279,-0.0043,0.0263,0.0298,-0.0024,0.0229,0.0264,-0.0058,0.0231,0.0266,-0.0056
303,F1,105,0.0242,0.0089,0.0497,-0.0122,-0.0275,0.0133,-0.0049,-0.0202,0.0206,-0.0592,-0.0745,-0.0337,-0.0018,-0.0171,0.0237,-0.0377,-0.0530,-0.0122
304,Gmean,105,0.0142,0.0026,-0.0096,0.0015,-0.0101,-0.0223,0.0282,0.0166,0.0044,0.0125,0.0009,-0.0113,0.0308,0.0192,0.0070,0.0193,0.0077,-0.0045


In [14]:
data = []
sample = []
feature = []
imbalance1 = []
imbalance2 = []
# Experiments for 105-3 Datasets
for i in range(1, 106):
    if i == 23 or i == 82 or i == 84:
        continue
    df = pd.read_csv('ds'+ str(i) +'.csv')
    print('+'*35, '{}th Dataset'.format(i), '+'*35)
    print('<Original Class>\n', df.iloc[:,-1].value_counts())
    
    # Make major class as '0' and minor class as '1'
    MAJOR = df.iloc[:,-1].value_counts()[df.iloc[:,-1].value_counts() == max(df.iloc[:,-1].value_counts())].index[0]
    minor = df.iloc[:,-1].value_counts()[df.iloc[:,-1].value_counts() != max(df.iloc[:,-1].value_counts())].index[0]
    df.iloc[:,-1] = df.iloc[:,-1].replace(MAJOR, -100)
    df.iloc[:,-1] = df.iloc[:,-1].replace(minor, 1)
    df.iloc[:,-1] = df.iloc[:,-1].replace(-100, 0)
    print('<Modified Class>\n', df.iloc[:,-1].value_counts())
    print('<Imabalance ratio>\n', "{: .2f}:1".format(df.iloc[:,-1].value_counts()[0]/df.iloc[:,-1].value_counts()[1]))
    data.append(i)
    imbalance1.append(df.iloc[:,-1].value_counts()[0]/len(df))
    imbalance2.append(df.iloc[:,-1].value_counts()[1]/len(df))
    
    X = df.iloc[:, :-1]
    X = (X - X.mean())/X.std()    # Features // Standardization
    y = df.iloc[:, -1]
    sample.append(X.shape[0])
    feature.append(X.shape[1])

+++++++++++++++++++++++++++++++++++ 1th Dataset +++++++++++++++++++++++++++++++++++
<Original Class>
 2    160
1     90
Name: pathology_cp_data, dtype: int64
<Modified Class>
 0    160
1     90
Name: pathology_cp_data, dtype: int64
<Imabalance ratio>
  1.78:1
+++++++++++++++++++++++++++++++++++ 2th Dataset +++++++++++++++++++++++++++++++++++
<Original Class>
 2    154
1     96
Name: surgical_lesion, dtype: int64
<Modified Class>
 0    154
1     96
Name: surgical_lesion, dtype: int64
<Imabalance ratio>
  1.60:1
+++++++++++++++++++++++++++++++++++ 3th Dataset +++++++++++++++++++++++++++++++++++
<Original Class>
 1    225
2     81
Name: Survival_status, dtype: int64
<Modified Class>
 0    225
1     81
Name: Survival_status, dtype: int64
<Imabalance ratio>
  2.78:1
+++++++++++++++++++++++++++++++++++ 4th Dataset +++++++++++++++++++++++++++++++++++
<Original Class>
 1    210
2    100
Name: Class, dtype: int64
<Modified Class>
 0    210
1    100
Name: Class, dtype: int64
<Imabalance ratio>
 

+++++++++++++++++++++++++++++++++++ 40th Dataset +++++++++++++++++++++++++++++++++++
<Original Class>
 0    608
1     37
Name: Golden.Crowned.Kinglet, dtype: int64
<Modified Class>
 0    608
1     37
Name: Golden.Crowned.Kinglet, dtype: int64
<Imabalance ratio>
  16.43:1
+++++++++++++++++++++++++++++++++++ 41th Dataset +++++++++++++++++++++++++++++++++++
<Original Class>
 1    609
2     52
Name: def, dtype: int64
<Modified Class>
 0    609
1     52
Name: def, dtype: int64
<Imabalance ratio>
  11.71:1
+++++++++++++++++++++++++++++++++++ 42th Dataset +++++++++++++++++++++++++++++++++++
<Original Class>
 1    444
2    239
Name: Class, dtype: int64
<Modified Class>
 0    444
1    239
Name: Class, dtype: int64
<Imabalance ratio>
  1.86:1
+++++++++++++++++++++++++++++++++++ 43th Dataset +++++++++++++++++++++++++++++++++++
<Original Class>
 1    644
2     61
Name: def, dtype: int64
<Modified Class>
 0    644
1     61
Name: def, dtype: int64
<Imabalance ratio>
  10.56:1
+++++++++++++++++++++++

+++++++++++++++++++++++++++++++++++ 75th Dataset +++++++++++++++++++++++++++++++++++
<Original Class>
 0    1440
1     560
Name: trees, dtype: int64
<Modified Class>
 0    1440
1     560
Name: trees, dtype: int64
<Imabalance ratio>
  2.57:1
+++++++++++++++++++++++++++++++++++ 76th Dataset +++++++++++++++++++++++++++++++++++
<Original Class>
 0    1739
1     261
Name: label7, dtype: int64
<Modified Class>
 0    1739
1     261
Name: label7, dtype: int64
<Imabalance ratio>
  6.66:1
+++++++++++++++++++++++++++++++++++ 77th Dataset +++++++++++++++++++++++++++++++++++
<Original Class>
 1    1519
2     482
Name: binaryClass, dtype: int64
<Modified Class>
 0    1519
1     482
Name: binaryClass, dtype: int64
<Imabalance ratio>
  3.15:1
+++++++++++++++++++++++++++++++++++ 78th Dataset +++++++++++++++++++++++++++++++++++
<Original Class>
 1    1783
2     326
Name: defects, dtype: int64
<Modified Class>
 0    1783
1     326
Name: defects, dtype: int64
<Imabalance ratio>
  5.47:1
++++++++++++++++++

In [15]:
df_im = pd.DataFrame(data, columns=["data#"] )
# df_im = pd.DataFrame(dummy, columns=["&1"] )
df_im["sample"] = sample
# df_im["&2"] = "&"
df_im["feature"] = feature
# df_im["&3"] = "&"
df_im["negative(0)"] = imbalance1
# df_im[":"] = ":"
df_im["positive(1)"] = imbalance2
# df_im["\\"] = "\\"+"\\"
df_im

Unnamed: 0,data#,sample,feature,negative(0),positive(1)
0,1,250,12,0.640000,0.360000
1,2,250,9,0.616000,0.384000
2,3,306,3,0.735294,0.264706
3,4,310,6,0.677419,0.322581
4,5,320,6,0.665625,0.334375
...,...,...,...,...,...
97,101,7970,39,0.931493,0.068507
98,102,8192,12,0.697632,0.302368
99,103,8192,19,0.697632,0.302368
100,104,8192,32,0.689575,0.310425


In [16]:
a = len(df_im[df_im["negative(0)"] > 0.9])
b = len(df_im[(df_im["negative(0)"] > 0.8) & (df_im["negative(0)"] <= 0.9)])
c = len(df_im[(df_im["negative(0)"] > 0.7) & (df_im["negative(0)"] <= 0.8)])
d = len(df_im[(df_im["negative(0)"] > 0.6) & (df_im["negative(0)"] <= 0.7)])

In [17]:
print(a, b, c, d)

15 37 19 31


In [18]:
list_90 = list(df_im[df_im["negative(0)"] > 0.9]['data#'])
list_80 = list(df_im[(df_im["negative(0)"] > 0.8) & (df_im["negative(0)"] <= 0.9)]['data#'])
list_70 = list(df_im[(df_im["negative(0)"] > 0.7) & (df_im["negative(0)"] <= 0.8)]['data#'])
list_60 = list(df_im[(df_im["negative(0)"] > 0.6) & (df_im["negative(0)"] <= 0.7)]['data#'])
print(list_90, len(list_90))
print(list_80, len(list_80))
print(list_70, len(list_70))
print(list_60, len(list_60))

[14, 18, 20, 28, 31, 40, 41, 43, 53, 61, 63, 85, 86, 87, 101] 15
[6, 8, 15, 19, 22, 24, 25, 27, 30, 32, 33, 37, 46, 56, 58, 60, 65, 66, 69, 70, 71, 72, 73, 74, 76, 78, 80, 81, 90, 92, 94, 96, 97, 98, 99, 100, 105] 37
[3, 9, 13, 17, 29, 35, 44, 47, 49, 55, 62, 64, 67, 75, 77, 83, 88, 89, 95] 19
[1, 2, 4, 5, 7, 10, 11, 12, 16, 21, 26, 34, 36, 38, 39, 42, 45, 48, 50, 51, 52, 54, 57, 59, 68, 79, 91, 93, 102, 103, 104] 31


In [19]:
df_90 = df_b[df_b.Dataset.isin(list_90)]
df_90 = df_90.reset_index(drop=True)
df_80 = df_b[df_b.Dataset.isin(list_80)]
df_80 = df_80.reset_index(drop=True)
df_70 = df_b[df_b.Dataset.isin(list_70)]
df_70 = df_70.reset_index(drop=True)
df_60 = df_b[df_b.Dataset.isin(list_60)]
df_60 = df_60.reset_index(drop=True)

In [91]:
df_60

Unnamed: 0,Metrics,Dataset,AF-MSE,AF-BCE,AF-WBCE,WF-MSE,WF-BCE,WF-WBCE,AG-MSE,AG-BCE,AG-WBCE,WG-MSE,WG-BCE,WG-WBCE,AB-MSE,AB-BCE,AB-WBCE,WB-MSE,WB-BCE,WB-WBCE
0,F1,1,0.0889,0.1054,0.0189,0.0966,0.1131,0.0266,0.0576,0.0741,-0.0124,0.0644,0.0809,-0.0056,0.1037,0.1202,0.0337,0.0589,0.0754,-0.0111
1,Gmean,1,0.0358,0.0448,0.0089,0.0495,0.0585,0.0226,0.0387,0.0477,0.0118,0.0265,0.0355,-0.0004,0.0682,0.0772,0.0413,0.0241,0.0331,-0.0028
2,B_Acc,1,0.0607,0.0725,0.0152,0.0701,0.0819,0.0246,0.0417,0.0535,-0.0038,0.0461,0.0579,0.0006,0.0840,0.0958,0.0385,0.0437,0.0555,-0.0018
3,F1,2,-0.0172,-0.0126,0.0063,-0.0192,-0.0146,0.0043,-0.0140,-0.0094,0.0095,-0.0234,-0.0188,0.0001,-0.0204,-0.0158,0.0031,-0.0354,-0.0308,-0.0119
4,Gmean,2,-0.0134,-0.0110,0.0052,-0.0146,-0.0122,0.0040,-0.0105,-0.0081,0.0081,-0.0175,-0.0151,0.0011,-0.0163,-0.0139,0.0023,-0.0258,-0.0234,-0.0072
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
88,Gmean,103,0.0043,0.0016,-0.0074,0.0107,0.0080,-0.0010,0.0089,0.0062,-0.0028,0.0118,0.0091,0.0001,0.0087,0.0060,-0.0030,0.0126,0.0099,0.0009
89,B_Acc,103,0.0035,0.0011,-0.0067,0.0095,0.0071,-0.0007,0.0075,0.0051,-0.0027,0.0103,0.0079,0.0001,0.0072,0.0048,-0.0030,0.0111,0.0087,0.0009
90,F1,104,0.0133,0.0183,-0.0076,0.0164,0.0214,-0.0045,0.0149,0.0199,-0.0060,0.0181,0.0231,-0.0028,0.0117,0.0167,-0.0092,0.0138,0.0188,-0.0071
91,Gmean,104,0.0273,0.0314,-0.0095,0.0321,0.0362,-0.0047,0.0322,0.0363,-0.0046,0.0345,0.0386,-0.0023,0.0303,0.0344,-0.0065,0.0310,0.0351,-0.0058


In [92]:
df_60_f1 = df_60.loc[df_60.Metrics == 'F1', :][["AF-MSE","AF-BCE","AF-WBCE","WF-MSE","WF-BCE","WF-WBCE"]]
df_60_gm = df_60.loc[df_60.Metrics == 'Gmean', :][["AG-MSE","AG-BCE","AG-WBCE","WG-MSE","WG-BCE","WG-WBCE"]]
df_60_ba = df_60.loc[df_60.Metrics == 'B_Acc', :][["AB-MSE","AB-BCE","AB-WBCE","WB-MSE","WB-BCE","WB-WBCE"]]

In [93]:
res = pd.DataFrame([0], index = ['mean ± std'], columns =['AF-MSE'])
res['AF-MSE'] = str(round(df_60_f1["AF-MSE"].describe()[1], 3))+'±'+str(round(df_60_f1["AF-MSE"].describe()[2], 3))
res['AF-BCE'] = str(round(df_60_f1["AF-BCE"].describe()[1], 3))+'±'+str(round(df_60_f1["AF-BCE"].describe()[2], 3))
res['AF-WBCE'] =str(round(df_60_f1["AF-WBCE"].describe()[1], 3))+'±'+str(round(df_60_f1["AF-WBCE"].describe()[2], 3))
res['WF-MSE'] = str(round(df_60_f1["WF-MSE"].describe()[1], 3))+'±'+str(round(df_60_f1["WF-MSE"].describe()[2], 3))
res['WF-BCE'] = str(round(df_60_f1["WF-BCE"].describe()[1], 3))+'±'+str(round(df_60_f1["WF-BCE"].describe()[2], 3))
res['WF-WBCE'] =str(round(df_60_f1["WF-WBCE"].describe()[1], 3))+'±'+str(round(df_60_f1["WF-WBCE"].describe()[2], 3))
res

Unnamed: 0,AF-MSE,AF-BCE,AF-WBCE,WF-MSE,WF-BCE,WF-WBCE
mean ± std,0.024±0.063,0.027±0.057,-0.006±0.026,0.032±0.057,0.035±0.054,0.002±0.017


In [94]:
res = pd.DataFrame([0], index = ['mean ± std'], columns =['AG-MSE'])
res['AG-MSE'] = str(round(df_60_gm["AG-MSE"].describe()[1], 3))+'±'+str(round(df_60_gm["AG-MSE"].describe()[2], 3))
res['AG-BCE'] = str(round(df_60_gm["AG-BCE"].describe()[1], 3))+'±'+str(round(df_60_gm["AG-BCE"].describe()[2], 3))
res['AG-WBCE'] =str(round(df_60_gm["AG-WBCE"].describe()[1], 3))+'±'+str(round(df_60_gm["AG-WBCE"].describe()[2], 3))
res['WG-MSE'] = str(round(df_60_gm["WG-MSE"].describe()[1], 3))+'±'+str(round(df_60_gm["WG-MSE"].describe()[2], 3))
res['WG-BCE'] = str(round(df_60_gm["WG-BCE"].describe()[1], 3))+'±'+str(round(df_60_gm["WG-BCE"].describe()[2], 3))
res['WG-WBCE'] =str(round(df_60_gm["WG-WBCE"].describe()[1], 3))+'±'+str(round(df_60_gm["WG-WBCE"].describe()[2], 3))
res

Unnamed: 0,AG-MSE,AG-BCE,AG-WBCE,WG-MSE,WG-BCE,WG-WBCE
mean ± std,0.028±0.038,0.029±0.035,-0.0±0.021,0.03±0.039,0.031±0.035,0.002±0.014


In [95]:
res = pd.DataFrame([0], index = ['mean ± std'], columns =['AB-MSE'])
res['AB-MSE'] = str(round(df_60_ba["AB-MSE"].describe()[1], 3))+'±'+str(round(df_60_ba["AB-MSE"].describe()[2], 3))
res['AB-BCE'] = str(round(df_60_ba["AB-BCE"].describe()[1], 3))+'±'+str(round(df_60_ba["AB-BCE"].describe()[2], 3))
res['AB-WBCE'] =str(round(df_60_ba["AB-WBCE"].describe()[1], 3))+'±'+str(round(df_60_ba["AB-WBCE"].describe()[2], 3))
res['WB-MSE'] = str(round(df_60_ba["WB-MSE"].describe()[1], 3))+'±'+str(round(df_60_ba["WB-MSE"].describe()[2], 3))
res['WB-BCE'] = str(round(df_60_ba["WB-BCE"].describe()[1], 3))+'±'+str(round(df_60_ba["WB-BCE"].describe()[2], 3))
res['WB-WBCE'] =str(round(df_60_ba["WB-WBCE"].describe()[1], 3))+'±'+str(round(df_60_ba["WB-WBCE"].describe()[2], 3))
res

Unnamed: 0,AB-MSE,AB-BCE,AB-WBCE,WB-MSE,WB-BCE,WB-WBCE
mean ± std,0.009±0.026,0.011±0.031,-0.006±0.025,0.014±0.021,0.016±0.019,-0.001±0.011


In [96]:
df_70

Unnamed: 0,Metrics,Dataset,AF-MSE,AF-BCE,AF-WBCE,WF-MSE,WF-BCE,WF-WBCE,AG-MSE,AG-BCE,AG-WBCE,WG-MSE,WG-BCE,WG-WBCE,AB-MSE,AB-BCE,AB-WBCE,WB-MSE,WB-BCE,WB-WBCE
0,F1,3,0.0798,0.0697,-0.0883,0.1423,0.1322,-0.0258,0.1543,0.1442,-0.0138,0.1459,0.1358,-0.0222,0.1434,0.1333,-0.0247,0.1438,0.1337,-0.0243
1,Gmean,3,-0.3545,-0.3647,-0.5289,0.1569,0.1467,-0.0175,0.1699,0.1597,-0.0045,0.158,0.1478,-0.0164,0.1549,0.1447,-0.0195,0.1561,0.1459,-0.0183
2,B_Acc,3,-0.0548,-0.0588,-0.1437,0.0733,0.0693,-0.0156,0.0792,0.0752,-0.0097,0.0764,0.0724,-0.0125,0.0694,0.0654,-0.0195,0.0742,0.0702,-0.0147
3,F1,9,-0.0107,-0.0027,0.0138,0.0064,0.0144,0.0309,0.0073,0.0153,0.0318,0.0138,0.0218,0.0383,0.005,0.013,0.0295,0.0122,0.0202,0.0367
4,Gmean,9,0.0068,0.008,-0.0255,0.048,0.0492,0.0157,0.052,0.0532,0.0197,0.0488,0.05,0.0165,0.0529,0.0541,0.0206,0.0581,0.0593,0.0258
5,B_Acc,9,-0.0012,0.0023,-0.0039,0.0223,0.0258,0.0196,0.0236,0.0271,0.0209,0.0225,0.026,0.0198,0.0249,0.0284,0.0222,0.0269,0.0304,0.0242
6,F1,13,0.0462,0.0654,0.0087,0.0428,0.062,0.0053,0.0556,0.0748,0.0181,0.0509,0.0701,0.0134,0.0524,0.0716,0.0149,0.0562,0.0754,0.0187
7,Gmean,13,0.115,0.1278,0.0068,0.1137,0.1265,0.0055,0.1263,0.1391,0.0181,0.1216,0.1344,0.0134,0.1165,0.1293,0.0083,0.125,0.1378,0.0168
8,B_Acc,13,-0.009,0.0009,0.005,-0.0092,0.0007,0.0048,0.0033,0.0132,0.0173,-0.0015,0.0084,0.0125,-0.0018,0.0081,0.0122,0.0017,0.0116,0.0157
9,F1,17,0.025,0.1258,-0.0402,0.0008,0.1016,-0.0644,0.0915,0.1923,0.0263,0.045,0.1458,-0.0202,0.0809,0.1817,0.0157,0.0476,0.1484,-0.0176


In [97]:
df_70_f1 = df_70.loc[df_70.Metrics == 'F1', :][["AF-MSE","AF-BCE","AF-WBCE","WF-MSE","WF-BCE","WF-WBCE"]]
df_70_gm = df_70.loc[df_70.Metrics == 'Gmean', :][["AG-MSE","AG-BCE","AG-WBCE","WG-MSE","WG-BCE","WG-WBCE"]]
df_70_ba = df_70.loc[df_70.Metrics == 'B_Acc', :][["AB-MSE","AB-BCE","AB-WBCE","WB-MSE","WB-BCE","WB-WBCE"]]

In [98]:
res = pd.DataFrame([0], index = ['mean ± std'], columns =['AF-MSE'])
res['AF-MSE'] = str(round(df_70_f1["AF-MSE"].describe()[1], 3))+'±'+str(round(df_70_f1["AF-MSE"].describe()[2], 3))
res['AF-BCE'] = str(round(df_70_f1["AF-BCE"].describe()[1], 3))+'±'+str(round(df_70_f1["AF-BCE"].describe()[2], 3))
res['AF-WBCE'] =str(round(df_70_f1["AF-WBCE"].describe()[1], 3))+'±'+str(round(df_70_f1["AF-WBCE"].describe()[2], 3))
res['WF-MSE'] = str(round(df_70_f1["WF-MSE"].describe()[1], 3))+'±'+str(round(df_70_f1["WF-MSE"].describe()[2], 3))
res['WF-BCE'] = str(round(df_70_f1["WF-BCE"].describe()[1], 3))+'±'+str(round(df_70_f1["WF-BCE"].describe()[2], 3))
res['WF-WBCE'] =str(round(df_70_f1["WF-WBCE"].describe()[1], 3))+'±'+str(round(df_70_f1["WF-WBCE"].describe()[2], 3))
res

Unnamed: 0,AF-MSE,AF-BCE,AF-WBCE,WF-MSE,WF-BCE,WF-WBCE
mean ± std,0.076±0.122,0.093±0.122,0.008±0.036,0.075±0.127,0.092±0.126,0.006±0.026


In [99]:
res = pd.DataFrame([0], index = ['mean ± std'], columns =['AG-MSE'])
res['AG-MSE'] = str(round(df_70_gm["AG-MSE"].describe()[1], 3))+'±'+str(round(df_70_gm["AG-MSE"].describe()[2], 3))
res['AG-BCE'] = str(round(df_70_gm["AG-BCE"].describe()[1], 3))+'±'+str(round(df_70_gm["AG-BCE"].describe()[2], 3))
res['AG-WBCE'] =str(round(df_70_gm["AG-WBCE"].describe()[1], 3))+'±'+str(round(df_70_gm["AG-WBCE"].describe()[2], 3))
res['WG-MSE'] = str(round(df_70_gm["WG-MSE"].describe()[1], 3))+'±'+str(round(df_70_gm["WG-MSE"].describe()[2], 3))
res['WG-BCE'] = str(round(df_70_gm["WG-BCE"].describe()[1], 3))+'±'+str(round(df_70_gm["WG-BCE"].describe()[2], 3))
res['WG-WBCE'] =str(round(df_70_gm["WG-WBCE"].describe()[1], 3))+'±'+str(round(df_70_gm["WG-WBCE"].describe()[2], 3))
res

Unnamed: 0,AG-MSE,AG-BCE,AG-WBCE,WG-MSE,WG-BCE,WG-WBCE
mean ± std,0.111±0.139,0.128±0.148,0.006±0.012,0.106±0.134,0.123±0.143,0.001±0.009


In [100]:
res = pd.DataFrame([0], index = ['mean ± std'], columns =['AB-MSE'])
res['AB-MSE'] = str(round(df_70_ba["AB-MSE"].describe()[1], 3))+'±'+str(round(df_70_ba["AB-MSE"].describe()[2], 3))
res['AB-BCE'] = str(round(df_70_ba["AB-BCE"].describe()[1], 3))+'±'+str(round(df_70_ba["AB-BCE"].describe()[2], 3))
res['AB-WBCE'] =str(round(df_70_ba["AB-WBCE"].describe()[1], 3))+'±'+str(round(df_70_ba["AB-WBCE"].describe()[2], 3))
res['WB-MSE'] = str(round(df_70_ba["WB-MSE"].describe()[1], 3))+'±'+str(round(df_70_ba["WB-MSE"].describe()[2], 3))
res['WB-BCE'] = str(round(df_70_ba["WB-BCE"].describe()[1], 3))+'±'+str(round(df_70_ba["WB-BCE"].describe()[2], 3))
res['WB-WBCE'] =str(round(df_70_ba["WB-WBCE"].describe()[1], 3))+'±'+str(round(df_70_ba["WB-WBCE"].describe()[2], 3))
res

Unnamed: 0,AB-MSE,AB-BCE,AB-WBCE,WB-MSE,WB-BCE,WB-WBCE
mean ± std,0.043±0.057,0.052±0.063,0.005±0.027,0.039±0.05,0.048±0.053,0.001±0.011


In [101]:
df_80

Unnamed: 0,Metrics,Dataset,AF-MSE,AF-BCE,AF-WBCE,WF-MSE,WF-BCE,WF-WBCE,AG-MSE,AG-BCE,AG-WBCE,WG-MSE,WG-BCE,WG-WBCE,AB-MSE,AB-BCE,AB-WBCE,WB-MSE,WB-BCE,WB-WBCE
0,F1,6,0.4032,0.4032,0.0222,0.3564,0.3564,-0.0246,0.3685,0.3685,-0.0125,0.4214,0.4214,0.0404,0.3430,0.3430,-0.0380,0.3980,0.3980,0.0170
1,Gmean,6,0.6643,0.6643,-0.0316,0.6531,0.6531,-0.0428,0.6755,0.6755,-0.0204,0.7383,0.7383,0.0424,0.6456,0.6456,-0.0503,0.7133,0.7133,0.0174
2,B_Acc,6,0.1947,0.1912,-0.0143,0.1704,0.1669,-0.0386,0.1889,0.1854,-0.0201,0.2451,0.2416,0.0361,0.1608,0.1573,-0.0482,0.2243,0.2208,0.0153
3,F1,8,0.0723,0.0606,0.0347,0.0780,0.0663,0.0404,0.0507,0.0390,0.0131,0.0362,0.0245,-0.0014,0.0358,0.0241,-0.0018,0.0459,0.0342,0.0083
4,Gmean,8,0.1177,0.0990,-0.0109,0.1391,0.1204,0.0105,0.1343,0.1156,0.0057,0.1221,0.1034,-0.0065,0.1222,0.1035,-0.0064,0.1326,0.1139,0.0040
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
106,Gmean,100,0.6708,0.6708,-0.0138,0.6824,0.6824,-0.0022,0.6912,0.6912,0.0066,0.6847,0.6847,0.0001,0.6652,0.6652,-0.0194,0.6800,0.6800,-0.0046
107,B_Acc,100,0.1752,0.1752,-0.0318,0.1847,0.1847,-0.0223,0.1973,0.1973,-0.0097,0.1945,0.1945,-0.0125,0.2035,0.2035,-0.0035,0.2028,0.2028,-0.0042
108,F1,105,0.0242,0.0089,0.0497,-0.0122,-0.0275,0.0133,-0.0049,-0.0202,0.0206,-0.0592,-0.0745,-0.0337,-0.0018,-0.0171,0.0237,-0.0377,-0.0530,-0.0122
109,Gmean,105,0.0142,0.0026,-0.0096,0.0015,-0.0101,-0.0223,0.0282,0.0166,0.0044,0.0125,0.0009,-0.0113,0.0308,0.0192,0.0070,0.0193,0.0077,-0.0045


In [102]:
df_80_f1 = df_80.loc[df_80.Metrics == 'F1', :][["AF-MSE","AF-BCE","AF-WBCE","WF-MSE","WF-BCE","WF-WBCE"]]
df_80_gm = df_80.loc[df_80.Metrics == 'Gmean', :][["AG-MSE","AG-BCE","AG-WBCE","WG-MSE","WG-BCE","WG-WBCE"]]
df_80_ba = df_80.loc[df_80.Metrics == 'B_Acc', :][["AB-MSE","AB-BCE","AB-WBCE","WB-MSE","WB-BCE","WB-WBCE"]]

In [103]:
res = pd.DataFrame([0], index = ['mean ± std'], columns =['AF-MSE'])
res['AF-MSE'] = str(round(df_80_f1["AF-MSE"].describe()[1], 3))+'±'+str(round(df_80_f1["AF-MSE"].describe()[2], 3))
res['AF-BCE'] = str(round(df_80_f1["AF-BCE"].describe()[1], 3))+'±'+str(round(df_80_f1["AF-BCE"].describe()[2], 3))
res['AF-WBCE'] =str(round(df_80_f1["AF-WBCE"].describe()[1], 3))+'±'+str(round(df_80_f1["AF-WBCE"].describe()[2], 3))
res['WF-MSE'] = str(round(df_80_f1["WF-MSE"].describe()[1], 3))+'±'+str(round(df_80_f1["WF-MSE"].describe()[2], 3))
res['WF-BCE'] = str(round(df_80_f1["WF-BCE"].describe()[1], 3))+'±'+str(round(df_80_f1["WF-BCE"].describe()[2], 3))
res['WF-WBCE'] =str(round(df_80_f1["WF-WBCE"].describe()[1], 3))+'±'+str(round(df_80_f1["WF-WBCE"].describe()[2], 3))
res

Unnamed: 0,AF-MSE,AF-BCE,AF-WBCE,WF-MSE,WF-BCE,WF-WBCE
mean ± std,0.137±0.157,0.152±0.17,0.01±0.058,0.132±0.16,0.147±0.173,0.005±0.064


In [104]:
res = pd.DataFrame([0], index = ['mean ± std'], columns =['AG-MSE'])
res['AG-MSE'] = str(round(df_80_gm["AG-MSE"].describe()[1], 3))+'±'+str(round(df_80_gm["AG-MSE"].describe()[2], 3))
res['AG-BCE'] = str(round(df_80_gm["AG-BCE"].describe()[1], 3))+'±'+str(round(df_80_gm["AG-BCE"].describe()[2], 3))
res['AG-WBCE'] =str(round(df_80_gm["AG-WBCE"].describe()[1], 3))+'±'+str(round(df_80_gm["AG-WBCE"].describe()[2], 3))
res['WG-MSE'] = str(round(df_80_gm["WG-MSE"].describe()[1], 3))+'±'+str(round(df_80_gm["WG-MSE"].describe()[2], 3))
res['WG-BCE'] = str(round(df_80_gm["WG-BCE"].describe()[1], 3))+'±'+str(round(df_80_gm["WG-BCE"].describe()[2], 3))
res['WG-WBCE'] =str(round(df_80_gm["WG-WBCE"].describe()[1], 3))+'±'+str(round(df_80_gm["WG-WBCE"].describe()[2], 3))
res

Unnamed: 0,AG-MSE,AG-BCE,AG-WBCE,WG-MSE,WG-BCE,WG-WBCE
mean ± std,0.263±0.279,0.284±0.295,-0.005±0.024,0.262±0.285,0.284±0.301,-0.005±0.022


In [105]:
res = pd.DataFrame([0], index = ['mean ± std'], columns =['AB-MSE'])
res['AB-MSE'] = str(round(df_80_ba["AB-MSE"].describe()[1], 3))+'±'+str(round(df_80_ba["AB-MSE"].describe()[2], 3))
res['AB-BCE'] = str(round(df_80_ba["AB-BCE"].describe()[1], 3))+'±'+str(round(df_80_ba["AB-BCE"].describe()[2], 3))
res['AB-WBCE'] =str(round(df_80_ba["AB-WBCE"].describe()[1], 3))+'±'+str(round(df_80_ba["AB-WBCE"].describe()[2], 3))
res['WB-MSE'] = str(round(df_80_ba["WB-MSE"].describe()[1], 3))+'±'+str(round(df_80_ba["WB-MSE"].describe()[2], 3))
res['WB-BCE'] = str(round(df_80_ba["WB-BCE"].describe()[1], 3))+'±'+str(round(df_80_ba["WB-BCE"].describe()[2], 3))
res['WB-WBCE'] =str(round(df_80_ba["WB-WBCE"].describe()[1], 3))+'±'+str(round(df_80_ba["WB-WBCE"].describe()[2], 3))
res

Unnamed: 0,AB-MSE,AB-BCE,AB-WBCE,WB-MSE,WB-BCE,WB-WBCE
mean ± std,0.085±0.104,0.089±0.109,-0.006±0.036,0.091±0.097,0.095±0.103,-0.0±0.012


In [106]:
df_90

Unnamed: 0,Metrics,Dataset,AF-MSE,AF-BCE,AF-WBCE,WF-MSE,WF-BCE,WF-WBCE,AG-MSE,AG-BCE,AG-WBCE,WG-MSE,WG-BCE,WG-WBCE,AB-MSE,AB-BCE,AB-WBCE,WB-MSE,WB-BCE,WB-WBCE
0,F1,14,0.1861,0.1861,0.041,0.1527,0.1527,0.0076,0.1621,0.1621,0.017,0.1117,0.1117,-0.0334,0.1544,0.1544,0.0093,0.1557,0.1557,0.0106
1,Gmean,14,0.533,0.533,0.043,0.5033,0.5033,0.0133,0.5175,0.5175,0.0275,0.3911,0.3911,-0.0989,0.4967,0.4967,0.0067,0.5153,0.5153,0.0253
2,B_Acc,14,0.0545,0.0545,0.0493,0.027,0.027,0.0218,0.0295,0.0295,0.0243,-0.0484,-0.0484,-0.0536,0.0223,0.0223,0.0171,0.0219,0.0219,0.0167
3,F1,18,0.38056,0.4377,0.0994,0.34226,0.3994,0.0611,0.30456,0.3617,0.0234,0.26286,0.32,-0.0183,0.28736,0.3445,0.0062,0.22196,0.2791,-0.0592
4,Gmean,18,0.60366,0.6853,-0.0331,0.58576,0.6674,-0.051,0.65486,0.7365,0.0181,0.62806,0.7097,-0.0087,0.60806,0.6897,-0.0287,0.56736,0.649,-0.0694
5,B_Acc,18,0.2016,0.221,-0.0076,0.1895,0.2089,-0.0197,0.2324,0.2518,0.0232,0.1997,0.2191,-0.0095,0.188,0.2074,-0.0212,0.1524,0.1718,-0.0568
6,F1,20,0.2934,0.2678,0.0619,0.3164,0.2908,0.0849,0.2795,0.2539,0.048,0.2118,0.1862,-0.0197,0.2168,0.1912,-0.0147,0.225,0.1994,-0.0065
7,Gmean,20,0.4496,0.4285,-0.0898,0.5117,0.4906,-0.0277,0.5775,0.5564,0.0381,0.5165,0.4954,-0.0229,0.5392,0.5181,-0.0002,0.5284,0.5073,-0.011
8,B_Acc,20,0.1449,0.1302,-0.0333,0.1811,0.1664,0.0029,0.2161,0.2014,0.0379,0.1572,0.1425,-0.021,0.1773,0.1626,-0.0009,0.1756,0.1609,-0.0026
9,F1,28,0.3615,0.3615,0.0938,0.3119,0.3119,0.0442,0.2884,0.2884,0.0207,0.3026,0.3026,0.0349,0.3005,0.3005,0.0328,0.2857,0.2857,0.018


In [107]:
df_90_f1 = df_90.loc[df_90.Metrics == 'F1', :][["AF-MSE","AF-BCE","AF-WBCE","WF-MSE","WF-BCE","WF-WBCE"]]
df_90_gm = df_90.loc[df_90.Metrics == 'Gmean', :][["AG-MSE","AG-BCE","AG-WBCE","WG-MSE","WG-BCE","WG-WBCE"]]
df_90_ba = df_90.loc[df_90.Metrics == 'B_Acc', :][["AB-MSE","AB-BCE","AB-WBCE","WB-MSE","WB-BCE","WB-WBCE"]]

In [108]:
res = pd.DataFrame([0], index = ['mean ± std'], columns =['AF-MSE'])
res['AF-MSE'] = str(round(df_90_f1["AF-MSE"].describe()[1], 3))+'±'+str(round(df_90_f1["AF-MSE"].describe()[2], 3))
res['AF-BCE'] = str(round(df_90_f1["AF-BCE"].describe()[1], 3))+'±'+str(round(df_90_f1["AF-BCE"].describe()[2], 3))
res['AF-WBCE'] =str(round(df_90_f1["AF-WBCE"].describe()[1], 3))+'±'+str(round(df_90_f1["AF-WBCE"].describe()[2], 3))
res['WF-MSE'] = str(round(df_90_f1["WF-MSE"].describe()[1], 3))+'±'+str(round(df_90_f1["WF-MSE"].describe()[2], 3))
res['WF-BCE'] = str(round(df_90_f1["WF-BCE"].describe()[1], 3))+'±'+str(round(df_90_f1["WF-BCE"].describe()[2], 3))
res['WF-WBCE'] =str(round(df_90_f1["WF-WBCE"].describe()[1], 3))+'±'+str(round(df_90_f1["WF-WBCE"].describe()[2], 3))
res

Unnamed: 0,AF-MSE,AF-BCE,AF-WBCE,WF-MSE,WF-BCE,WF-WBCE
mean ± std,0.273±0.148,0.291±0.138,0.047±0.066,0.27±0.151,0.288±0.148,0.044±0.047


In [109]:
res = pd.DataFrame([0], index = ['mean ± std'], columns =['AG-MSE'])
res['AG-MSE'] = str(round(df_90_gm["AG-MSE"].describe()[1], 3))+'±'+str(round(df_90_gm["AG-MSE"].describe()[2], 3))
res['AG-BCE'] = str(round(df_90_gm["AG-BCE"].describe()[1], 3))+'±'+str(round(df_90_gm["AG-BCE"].describe()[2], 3))
res['AG-WBCE'] =str(round(df_90_gm["AG-WBCE"].describe()[1], 3))+'±'+str(round(df_90_gm["AG-WBCE"].describe()[2], 3))
res['WG-MSE'] = str(round(df_90_gm["WG-MSE"].describe()[1], 3))+'±'+str(round(df_90_gm["WG-MSE"].describe()[2], 3))
res['WG-BCE'] = str(round(df_90_gm["WG-BCE"].describe()[1], 3))+'±'+str(round(df_90_gm["WG-BCE"].describe()[2], 3))
res['WG-WBCE'] =str(round(df_90_gm["WG-WBCE"].describe()[1], 3))+'±'+str(round(df_90_gm["WG-WBCE"].describe()[2], 3))
res

Unnamed: 0,AG-MSE,AG-BCE,AG-WBCE,WG-MSE,WG-BCE,WG-WBCE
mean ± std,0.563±0.228,0.596±0.237,0.007±0.022,0.547±0.236,0.579±0.244,-0.01±0.031


In [110]:
res = pd.DataFrame([0], index = ['mean ± std'], columns =['AB-MSE'])
res['AB-MSE'] = str(round(df_90_ba["AB-MSE"].describe()[1], 3))+'±'+str(round(df_90_ba["AB-MSE"].describe()[2], 3))
res['AB-BCE'] = str(round(df_90_ba["AB-BCE"].describe()[1], 3))+'±'+str(round(df_90_ba["AB-BCE"].describe()[2], 3))
res['AB-WBCE'] =str(round(df_90_ba["AB-WBCE"].describe()[1], 3))+'±'+str(round(df_90_ba["AB-WBCE"].describe()[2], 3))
res['WB-MSE'] = str(round(df_90_ba["WB-MSE"].describe()[1], 3))+'±'+str(round(df_90_ba["WB-MSE"].describe()[2], 3))
res['WB-BCE'] = str(round(df_90_ba["WB-BCE"].describe()[1], 3))+'±'+str(round(df_90_ba["WB-BCE"].describe()[2], 3))
res['WB-WBCE'] =str(round(df_90_ba["WB-WBCE"].describe()[1], 3))+'±'+str(round(df_90_ba["WB-WBCE"].describe()[2], 3))
res

Unnamed: 0,AB-MSE,AB-BCE,AB-WBCE,WB-MSE,WB-BCE,WB-WBCE
mean ± std,0.201±0.096,0.207±0.088,-0.002±0.017,0.202±0.098,0.208±0.092,-0.001±0.025
