In [None]:
import bayesiantests as bt
import matplotlib.pyplot as plt

def stacked_bar(results, category_names):
    """
    Parameters
    ----------
    results : dict
        A mapping from question labels to a list of answers per category.
        It is assumed all lists contain the same number of entries and that
        it matches the length of *category_names*.
    category_names : list of str
        The category labels.
    """
    labels = list(results.keys())
    data = np.array(list(results.values()))
    data_cum = data.cumsum(axis=1)
    category_colors = plt.colormaps['RdYlGn'](
        np.linspace(0.15, 0.85, data.shape[1]))

    fig, ax = plt.subplots(figsize=(9.2, 5))
    ax.invert_yaxis()
    ax.xaxis.set_visible(False)
    ax.set_xlim(0, np.sum(data, axis=1).max())

    for i, (colname, color) in enumerate(zip(category_names, category_colors)):
        widths = data[:, i]
        starts = data_cum[:, i] - widths
        rects = ax.barh(labels, widths, left=starts, height=0.5,
                        label=colname, color=color)

        r, g, b, _ = color
        text_color = 'white' if r * g * b < 0.5 else 'darkgrey'
        ax.bar_label(rects, label_type='center', color=text_color)
    ax.legend(ncols=len(category_names), bbox_to_anchor=(0, 1),
              loc='upper left', fontsize='small')

    return fig, ax

# 10%

In [None]:
import pandas as pd
import numpy as np
res = pd.read_csv("102datasets_5CV(SLS_sel_group_0.1).csv")  # first 10 percent samples have more weights
res = res.dropna(axis=0)
res = res.rename(columns={'SLS(0.0)':'Hard'})
res

In [None]:
df_acc = res[(res.iloc[:,0] == 'Acc') | (res.iloc[:,0] == 'G_Acc')]
df_acc = df_acc.reset_index(drop=True)
df_acc = df_acc.iloc[:,1:].astype(float)
df_acc

### take1: VIsualization

In [None]:
acc_list = []
for i in range(1, 106):
    if i == 23 or i == 82 or i == 84:
        continue
    df_i = df_acc[df_acc.Dataset == i]
    df_i2 = df_i.iloc[:,3:]
    df_i2 = df_i2.reset_index(drop=True)
    max_val_1 = max(df_i2.iloc[1,:])
    each_1 = pd.DataFrame(df_i2.iloc[1,:])
    col_list = list(each_1.index[(each_1.iloc[:,0] != max_val_1)])
    df_i2_new = df_i2
    for i in range(len(col_list)):
        df_i2_new = df_i2_new.drop(col_list[i], axis=1)
    max_val_0 = max(df_i2_new.iloc[0,:])
    each_0 = pd.DataFrame(df_i2_new.iloc[0,:])
    col = list(each_0.index[(each_0.iloc[:,0] == max_val_0)])[0]
    acc_list.append(list(df_i2.loc[:,col])[0])
    acc_list.append(list(df_i2.loc[:,col])[1])
len(acc_list)

In [None]:
df_acc_res = df_acc.iloc[:,1:3]
df_acc_res['SLS_MAX'] = acc_list
df_acc_res

In [None]:
df_acc_res_0 = df_acc_res.iloc[::2,:]    # Accuracy
df_acc_res_0["SLS-FL"] = df_acc_res_0["SLS_MAX"] - df_acc_res_0["Focal"]
df_acc_res_0

In [None]:
df_acc_res_1 = df_acc_res.iloc[1::2,:]    # G_Accuracy
df_acc_res_1["SLS-FL"] = df_acc_res_1["SLS_MAX"] - df_acc_res_1["Focal"]
df_acc_res_1

In [None]:
plt.rcParams["figure.dpi"] = 200
plt.rcParams['axes.titlesize'] = 20  
plt.rcParams['axes.linewidth'] = 2
plt.rcParams['axes.labelsize'] = 20  
plt.rcParams['font.size'] = 15
plt.rc('xtick', labelsize=10)
plt.xticks(rotation=90)

plt.title('SLS-FL (10%)')
plt.xlabel('Datasets')
plt.ylabel('Δ Acc.')

xaxis = range(1,103)

plt.axhline(0, 0.01, 0.99, color='darkgray', linestyle='--', linewidth=3)

plt.plot(xaxis, df_acc_res_0["SLS-FL"], linewidth=2.0, label = 'Overall')
plt.plot(xaxis, df_acc_res_1["SLS-FL"], linewidth=2.0, label = 'First 10%')

plt.ylim(-0.15,0.25) 
plt.legend(ncol=4, fontsize=11, loc='lower center')
plt.show()

### take2

In [None]:
df_acc_res_0

In [None]:
r = 0.001

In [None]:
# Bayesian Sign Test

rope = r
baselines = ['Focal']
ours = ['SLS_MAX']
dfs = [df_acc_res_0]

comp = []
basewin = []
draw = []
ourswin = []
z = 0
for i in range(len(ours)):
    for j in range(len(baselines)):
#         print(z)
        names = (baselines[j],ours[i])
#         print(names)
        comp.append(names)
        X = np.array(dfs[i][[baselines[j],ours[i]]])
        left, within, right = bt.signtest(X, rope=rope, verbose=True, names=names)
#         print(left, within, right)
        basewin.append(left)
        draw.append(within)
        ourswin.append(right)        
results = pd.DataFrame(comp, columns = ["Baseline","Ours"])
results["Basewin_prob"] = basewin
results["Draw_prob"] = draw
results["Ourswin_prob"] = ourswin
results

### Take3

In [None]:
df_acc_res_1

In [None]:
r = 0.001

In [None]:
# Bayesian Sign Test

rope = r
baselines = ['Focal']
ours = ['SLS_MAX']
dfs = [df_acc_res_1]

comp = []
basewin = []
draw = []
ourswin = []
z = 0
for i in range(len(ours)):
    for j in range(len(baselines)):
#         print(z)
        names = (baselines[j],ours[i])
#         print(names)
        comp.append(names)
        X = np.array(dfs[i][[baselines[j],ours[i]]])
        left, within, right = bt.signtest(X, rope=rope, verbose=True, names=names)
#         print(left, within, right)
        basewin.append(left)
        draw.append(within)
        ourswin.append(right)        
results = pd.DataFrame(comp, columns = ["Baseline","Ours"])
results["Basewin_prob"] = basewin
results["Draw_prob"] = draw
results["Ourswin_prob"] = ourswin
results

# 20%

In [None]:
res = pd.read_csv("102datasets_5CV(SLS_sel_group_0.2).csv")  # first 20 percent samples have more weights
res = res.dropna(axis=0)
res = res.rename(columns={'SLS(0.0)':'Hard'})
df_acc = res[(res.iloc[:,0] == 'Acc') | (res.iloc[:,0] == 'G_Acc')]     # Take1
df_acc = df_acc.reset_index(drop=True)
df_acc = df_acc.iloc[:,1:].astype(float)
df_acc

### take1: VIsualization

In [None]:
acc_list = []
for i in range(1, 106):
    if i == 23 or i == 82 or i == 84:
        continue
    df_i = df_acc[df_acc.Dataset == i]
    df_i2 = df_i.iloc[:,3:]
    df_i2 = df_i2.reset_index(drop=True)
    max_val_1 = max(df_i2.iloc[1,:])
    each_1 = pd.DataFrame(df_i2.iloc[1,:])
    col_list = list(each_1.index[(each_1.iloc[:,0] != max_val_1)])
    df_i2_new = df_i2
    for i in range(len(col_list)):
        df_i2_new = df_i2_new.drop(col_list[i], axis=1)
    max_val_0 = max(df_i2_new.iloc[0,:])
    each_0 = pd.DataFrame(df_i2_new.iloc[0,:])
    col = list(each_0.index[(each_0.iloc[:,0] == max_val_0)])[0]
    acc_list.append(list(df_i2.loc[:,col])[0])
    acc_list.append(list(df_i2.loc[:,col])[1])
    
df_acc_res = df_acc.iloc[:,1:3]
df_acc_res['SLS_MAX'] = acc_list
df_acc_res_0 = df_acc_res.iloc[::2,:]    # Accuracy
df_acc_res_0["SLS-FL"] = df_acc_res_0["SLS_MAX"] - df_acc_res_0["Focal"]
df_acc_res_1 = df_acc_res.iloc[1::2,:]    # G_Accuracy
df_acc_res_1["SLS-FL"] = df_acc_res_1["SLS_MAX"] - df_acc_res_1["Focal"]

plt.rcParams["figure.dpi"] = 200
plt.rcParams['axes.titlesize'] = 20  
plt.rcParams['axes.linewidth'] = 2
plt.rcParams['axes.labelsize'] = 20  
plt.rcParams['font.size'] = 15
plt.rc('xtick', labelsize=10)
plt.xticks(rotation=90)

plt.title('SLS-FL (20%)')
plt.xlabel('Datasets')
plt.ylabel('Δ Acc')

xaxis = range(1,103)

plt.axhline(0, 0.01, 0.99, color='darkgray', linestyle='--', linewidth=3)

plt.plot(xaxis, df_acc_res_0["SLS-FL"], linewidth=2.0, label = 'Overall')
plt.plot(xaxis, df_acc_res_1["SLS-FL"], linewidth=2.0, label = 'First 20%')

plt.ylim(-0.15,0.25) 
plt.legend(ncol=4, fontsize=11, loc='lower center')
plt.show()

### take2

In [None]:
df_acc_res_0
r = 0.001
# Bayesian Sign Test

rope = r
baselines = ['Focal']
ours = ['SLS_MAX']
dfs = [df_acc_res_0]

comp = []
basewin = []
draw = []
ourswin = []
z = 0
for i in range(len(ours)):
    for j in range(len(baselines)):
#         print(z)
        names = (baselines[j],ours[i])
#         print(names)
        comp.append(names)
        X = np.array(dfs[i][[baselines[j],ours[i]]])
        left, within, right = bt.signtest(X, rope=rope, verbose=True, names=names)
#         print(left, within, right)
        basewin.append(left)
        draw.append(within)
        ourswin.append(right)        
results = pd.DataFrame(comp, columns = ["Baseline","Ours"])
results["Basewin_prob"] = basewin
results["Draw_prob"] = draw
results["Ourswin_prob"] = ourswin
results

### Take3

In [None]:
df_acc_res_1
r = 0.001
# Bayesian Sign Test

rope = r
baselines = ['Focal']
ours = ['SLS_MAX']
dfs = [df_acc_res_1]

comp = []
basewin = []
draw = []
ourswin = []
z = 0
for i in range(len(ours)):
    for j in range(len(baselines)):
#         print(z)
        names = (baselines[j],ours[i])
#         print(names)
        comp.append(names)
        X = np.array(dfs[i][[baselines[j],ours[i]]])
        left, within, right = bt.signtest(X, rope=rope, verbose=True, names=names)
#         print(left, within, right)
        basewin.append(left)
        draw.append(within)
        ourswin.append(right)        
results = pd.DataFrame(comp, columns = ["Baseline","Ours"])
results["Basewin_prob"] = basewin
results["Draw_prob"] = draw
results["Ourswin_prob"] = ourswin
results

# 30%

In [None]:
res = pd.read_csv("102datasets_5CV(SLS_sel_group_0.3).csv")  # first 10 percent samples have more weights
res = res.dropna(axis=0)
res = res.rename(columns={'SLS(0.0)':'Hard'})
df_acc = res[(res.iloc[:,0] == 'Acc') | (res.iloc[:,0] == 'G_Acc')]   # Take1
df_acc = df_acc.reset_index(drop=True)
df_acc = df_acc.iloc[:,1:].astype(float)
df_acc

### take1: VIsualization

In [None]:
acc_list = []
for i in range(1, 106):
    if i == 23 or i == 82 or i == 84:
        continue
    df_i = df_acc[df_acc.Dataset == i]
    df_i2 = df_i.iloc[:,3:]
    df_i2 = df_i2.reset_index(drop=True)
    max_val_1 = max(df_i2.iloc[1,:])
    each_1 = pd.DataFrame(df_i2.iloc[1,:])
    col_list = list(each_1.index[(each_1.iloc[:,0] != max_val_1)])
    df_i2_new = df_i2
    for i in range(len(col_list)):
        df_i2_new = df_i2_new.drop(col_list[i], axis=1)
    max_val_0 = max(df_i2_new.iloc[0,:])
    each_0 = pd.DataFrame(df_i2_new.iloc[0,:])
    col = list(each_0.index[(each_0.iloc[:,0] == max_val_0)])[0]
    acc_list.append(list(df_i2.loc[:,col])[0])
    acc_list.append(list(df_i2.loc[:,col])[1])
    
df_acc_res = df_acc.iloc[:,1:3]
df_acc_res['SLS_MAX'] = acc_list
df_acc_res_0 = df_acc_res.iloc[::2,:]    # Accuracy
df_acc_res_0["SLS-FL"] = df_acc_res_0["SLS_MAX"] - df_acc_res_0["Focal"]
df_acc_res_1 = df_acc_res.iloc[1::2,:]    # G_Accuracy
df_acc_res_1["SLS-FL"] = df_acc_res_1["SLS_MAX"] - df_acc_res_1["Focal"]

plt.rcParams["figure.dpi"] = 200
plt.rcParams['axes.titlesize'] = 20  
plt.rcParams['axes.linewidth'] = 2
plt.rcParams['axes.labelsize'] = 20  
plt.rcParams['font.size'] = 15
plt.rc('xtick', labelsize=10)
plt.xticks(rotation=90)

plt.title('SLS-FL (20%)')
plt.xlabel('Datasets')
plt.ylabel('Δ Acc')

xaxis = range(1,103)

plt.axhline(0, 0.01, 0.99, color='darkgray', linestyle='--', linewidth=3)

plt.plot(xaxis, df_acc_res_0["SLS-FL"], linewidth=2.0, label = 'Overall')
plt.plot(xaxis, df_acc_res_1["SLS-FL"], linewidth=2.0, label = 'First 30%')

plt.ylim(-0.15,0.25)
plt.legend(ncol=4, fontsize=11, loc='lower center')
plt.show()

### take2

In [None]:
df_acc_res_0
r = 0.001
# Bayesian Sign Test

rope = r
baselines = ['Focal']
ours = ['SLS_MAX']
dfs = [df_acc_res_0]

comp = []
basewin = []
draw = []
ourswin = []
z = 0
for i in range(len(ours)):
    for j in range(len(baselines)):
#         print(z)
        names = (baselines[j],ours[i])
#         print(names)
        comp.append(names)
        X = np.array(dfs[i][[baselines[j],ours[i]]])
        left, within, right = bt.signtest(X, rope=rope, verbose=True, names=names)
#         print(left, within, right)
        basewin.append(left)
        draw.append(within)
        ourswin.append(right)        
results = pd.DataFrame(comp, columns = ["Baseline","Ours"])
results["Basewin_prob"] = basewin
results["Draw_prob"] = draw
results["Ourswin_prob"] = ourswin
results

### Take3

In [None]:
df_acc_res_1
r = 0.001
# Bayesian Sign Test

rope = r
baselines = ['Focal']
ours = ['SLS_MAX']
dfs = [df_acc_res_1]

comp = []
basewin = []
draw = []
ourswin = []
z = 0
for i in range(len(ours)):
    for j in range(len(baselines)):
#         print(z)
        names = (baselines[j],ours[i])
#         print(names)
        comp.append(names)
        X = np.array(dfs[i][[baselines[j],ours[i]]])
        left, within, right = bt.signtest(X, rope=rope, verbose=True, names=names)
#         print(left, within, right)
        basewin.append(left)
        draw.append(within)
        ourswin.append(right)        
results = pd.DataFrame(comp, columns = ["Baseline","Ours"])
results["Basewin_prob"] = basewin
results["Draw_prob"] = draw
results["Ourswin_prob"] = ourswin
results