## 2. 102 Data

In [None]:
import bayesiantests as bt
import matplotlib.pyplot as plt

def stacked_bar(results, category_names):
    """
    Parameters
    ----------
    results : dict
        A mapping from question labels to a list of answers per category.
        It is assumed all lists contain the same number of entries and that
        it matches the length of *category_names*.
    category_names : list of str
        The category labels.
    """
    labels = list(results.keys())
    data = np.array(list(results.values()))
    data_cum = data.cumsum(axis=1)
    category_colors = plt.colormaps['RdYlGn'](
        np.linspace(0.15, 0.85, data.shape[1]))

    fig, ax = plt.subplots(figsize=(9.2, 5))
    ax.invert_yaxis()
    ax.xaxis.set_visible(False)
    ax.set_xlim(0, np.sum(data, axis=1).max())

    for i, (colname, color) in enumerate(zip(category_names, category_colors)):
        widths = data[:, i]
        starts = data_cum[:, i] - widths
        rects = ax.barh(labels, widths, left=starts, height=0.5,
                        label=colname, color=color)

        r, g, b, _ = color
        text_color = 'white' if r * g * b < 0.5 else 'darkgrey'
        ax.bar_label(rects, label_type='center', color=text_color)
    ax.legend(ncols=len(category_names), bbox_to_anchor=(0, 1),
              loc='upper left', fontsize='small')

    return fig, ax

# C=0.1

In [None]:
import pandas as pd
import numpy as np
# opt#1
res = pd.read_csv("102datasets_5CV(SLS_opt#1_c0.1).csv")
res = res.dropna(axis=0)
res = res.rename(columns={'SLS(0.0)':'Hard'})
res

In [None]:
df_acc = res[res.iloc[:,0] == 'Acc']
df_acc = df_acc.reset_index(drop=True)
df_acc = df_acc.iloc[:,1:].astype(float)
df_acc

In [None]:
df_acc_best = df_acc.iloc[:,1:3]
df_sls = df_acc.iloc[:,3:]
max_sls_list = []
for repeat in range(len(df_acc)):
    df_sls_repeat = pd.DataFrame(df_sls.iloc[repeat,:])
    max_sls = np.max(df_sls_repeat.iloc[:,0])
    max_sls_list.append(max_sls)
df_acc_best['SLS_MAX'] = max_sls_list
df_acc_best

In [None]:
r = 0.001

In [None]:
# Bayesian Sign Test

rope = r
baselines = ['Focal','Hard']
ours = ['SLS_MAX']
dfs = [df_acc_best]

comp = []
basewin = []
draw = []
ourswin = []
z = 0
for i in range(len(ours)):
    for j in range(len(baselines)):
#         print(z)
        names = (baselines[j],ours[i])
#         print(names)
        comp.append(names)
        X = np.array(dfs[i][[baselines[j],ours[i]]])
        left, within, right = bt.signtest(X, rope=rope, verbose=True, names=names)
#         print(left, within, right)
        basewin.append(left)
        draw.append(within)
        ourswin.append(right)        
results = pd.DataFrame(comp, columns = ["Baseline","Ours"])
results["Basewin_prob"] = basewin
results["Draw_prob"] = draw
results["Ourswin_prob"] = ourswin
results

# C=0.2

In [None]:
res = pd.read_csv("102datasets_5CV(SLS_opt#1_c0.2).csv")
res = res.dropna(axis=0)
res = res.rename(columns={'SLS(0.0)':'Hard'})
df_acc = res[res.iloc[:,0] == 'Acc']
df_acc = df_acc.reset_index(drop=True)
df_acc = df_acc.iloc[:,1:].astype(float)
col_name = res.columns[2:]
ave = []
std = []
for i in range(12):
    ave.append(np.mean(list(df_acc.iloc[:,i+1])))
    std.append(np.std(list(df_acc.iloc[:,i+1])))
final = pd.DataFrame(ave, index=col_name, columns=["mean"])
final['std'] = std
final.sort_values("mean", ascending=False)

In [None]:
df_acc_best = df_acc.iloc[:,1:3]
df_sls = df_acc.iloc[:,3:]
max_sls_list = []
for repeat in range(len(df_acc)):
    df_sls_repeat = pd.DataFrame(df_sls.iloc[repeat,:])
    max_sls = np.max(df_sls_repeat.iloc[:,0])
    max_sls_list.append(max_sls)
df_acc_best['SLS_MAX'] = max_sls_list
df_acc_best

In [None]:
# Bayesian Sign Test

rope = r
baselines = ['Focal','Hard']
ours = ['SLS_MAX']
dfs = [df_acc_best]

comp = []
basewin = []
draw = []
ourswin = []
z = 0
for i in range(len(ours)):
    for j in range(len(baselines)):
#         print(z)
        names = (baselines[j],ours[i])
#         print(names)
        comp.append(names)
        X = np.array(dfs[i][[baselines[j],ours[i]]])
        left, within, right = bt.signtest(X, rope=rope, verbose=True, names=names)
#         print(left, within, right)
        basewin.append(left)
        draw.append(within)
        ourswin.append(right)        
results = pd.DataFrame(comp, columns = ["Baseline","Ours"])
results["Basewin_prob"] = basewin
results["Draw_prob"] = draw
results["Ourswin_prob"] = ourswin
results

# C=0.3

In [None]:
res = pd.read_csv("102datasets_5CV(SLS_opt#1_c0.3).csv")
res = res.dropna(axis=0)
res = res.rename(columns={'SLS(0.0)':'Hard'})
df_acc = res[res.iloc[:,0] == 'Acc']
df_acc = df_acc.reset_index(drop=True)
df_acc = df_acc.iloc[:,1:].astype(float)
col_name = res.columns[2:]
ave = []
std = []
for i in range(12):
    ave.append(np.mean(list(df_acc.iloc[:,i+1])))
    std.append(np.std(list(df_acc.iloc[:,i+1])))
final = pd.DataFrame(ave, index=col_name, columns=["mean"])
final['std'] = std
final.sort_values("mean", ascending=False)

In [None]:
df_acc_best = df_acc.iloc[:,1:3]
df_sls = df_acc.iloc[:,3:]
max_sls_list = []
for repeat in range(len(df_acc)):
    df_sls_repeat = pd.DataFrame(df_sls.iloc[repeat,:])
    max_sls = np.max(df_sls_repeat.iloc[:,0])
    max_sls_list.append(max_sls)
df_acc_best['SLS_MAX'] = max_sls_list
df_acc_best

In [None]:
# Bayesian Sign Test

rope = r
baselines = ['Focal','Hard']
ours = ['SLS_MAX']
dfs = [df_acc_best]

comp = []
basewin = []
draw = []
ourswin = []
z = 0
for i in range(len(ours)):
    for j in range(len(baselines)):
#         print(z)
        names = (baselines[j],ours[i])
#         print(names)
        comp.append(names)
        X = np.array(dfs[i][[baselines[j],ours[i]]])
        left, within, right = bt.signtest(X, rope=rope, verbose=True, names=names)
#         print(left, within, right)
        basewin.append(left)
        draw.append(within)
        ourswin.append(right)        
results = pd.DataFrame(comp, columns = ["Baseline","Ours"])
results["Basewin_prob"] = basewin
results["Draw_prob"] = draw
results["Ourswin_prob"] = ourswin
results