In [2]:
import pandas as pd
import numpy as np

seeds = {
    "uniform-cifar10": [0, 2],
    "uniform-cifar20": [0, 2],
    "clcifar10": [0, 2, 10],
    "clcifar20": [0, 2, 10],
    "clcifar10-iid": [0, 2, 10],
    "clcifar20-iid": [0, 2, 10],
}

algos = ["fwd-u", "fwd-r", "ure-ga-u", "ure-ga-r", "scl-nl", "scl-exp", "l-w", "l-uw", "pc-sigmoid"]

columns = ['selected by URE', 'URE', 'selected by SCEL', 'SCEL', 'selected by val_acc', 'valid_acc', 'best_ure', 'best_scel', 'best_val_acc']


## 1. Synthetic Uniform CL v.s. CLCIFAR

In [6]:
# "selected by val_acc", "best_val_acc" for each algorithm
exp1_result = {}

for dataset in ["uniform-cifar10", "clcifar10", "uniform-cifar20", "clcifar20"]:
    df = []
    for seed in seeds[dataset]:
        df.append(pd.read_csv(f"{seed}/{dataset}.csv"))

    def reformat(row):
        # row['acc'] = f"{row['mean'].round(2)}\scriptsize($\pm${row['std'].round(2)})"
        row['acc'] = str(row['mean'].round(2)) + "\scriptsize{$\pm$" + str(row['std'].round(2)) + '}'
        row = row.drop(['mean', 'std'])
        return row
    
    dataset_df = pd.DataFrame()
    for algo in algos:
        result = []
        for i in range(len(seeds[dataset])):
            result.append(df[i].loc[df[i]['algo'] == algo])
        if not result:
            dataset_df[algo] = "-"
            continue
        if algo == 'pc-sigmoid':
            print(dataset, len(result))
        res = pd.DataFrame()
        res['mean'] = pd.concat(result, axis=0).mean(axis=0, numeric_only=True)
        res['std'] = pd.concat(result, axis=0).std(axis=0, numeric_only=True)

        res = res.apply(reformat, axis=1).transpose()
        dataset_df[algo] = res.loc['acc']
    
    for row in ["selected by val_acc", "best_val_acc"]:
        acc_list = dataset_df.loc[row].to_list()
        max_idx = np.array(list(map(lambda x: 0 if x == '-' else float(x.split('\\')[0]), acc_list))).argmax()
        # print(np.array(map(lambda x: 0 if x == '-' else float(x.split('\\')[0]), acc_list)))
        max_idx = algos[max_idx]
        dataset_df.at[row, max_idx] = "\\textbf{" + dataset_df.loc[row, max_idx] + "}"

    dataset_df = dataset_df.transpose()
    dataset_df.drop(['selected by URE', 'URE', 'selected by SCEL', 'SCEL', 'valid_acc', 'best_ure', 'best_scel'], axis=1, inplace=True)
    dataset_df.rename(columns={"selected by val_acc": "valid_acc", "best_val_acc": "valid_acc (ES)"}, inplace=True)

    exp1_result[dataset] = dataset_df
    
exp1_result = pd.concat(exp1_result, axis=1)
# exp1_result
for idx, row in exp1_result.iterrows():
    row = row.to_list()
    print(f"{idx}     & {row[0]}     & {row[1]}         & {row[2]} & {row[3]}     & {row[4]}     & {row[5]}         & {row[6]}  & {row[7]}      \\\\")


uniform-cifar10 2
clcifar10 3
uniform-cifar20 2
clcifar20 3
fwd-u     & 48.44\scriptsize{$\pm$0.97}     & 49.33\scriptsize{$\pm$0.07}         & 34.09\scriptsize{$\pm$1.16} & 36.83\scriptsize{$\pm$1.17}     & 17.4\scriptsize{$\pm$4.12}     & 17.97\scriptsize{$\pm$2.69}         & 7.47\scriptsize{$\pm$0.37}  & 8.27\scriptsize{$\pm$0.77}      \\
fwd-r     & \textbf{nan\scriptsize{$\pm$nan}}     & \textbf{nan\scriptsize{$\pm$nan}}         & 28.88\scriptsize{$\pm$0.65} & \textbf{38.9\scriptsize{$\pm$1.57}}     & \textbf{nan\scriptsize{$\pm$nan}}     & \textbf{nan\scriptsize{$\pm$nan}}         & \textbf{16.14\scriptsize{$\pm$1.11}}  & \textbf{20.31\scriptsize{$\pm$0.25}}      \\
ure-ga-u     & 39.55\scriptsize{$\pm$0.06}     & 39.67\scriptsize{$\pm$0.11}         & \textbf{34.59\scriptsize{$\pm$0.76}} & 36.39\scriptsize{$\pm$0.67}     & 13.52\scriptsize{$\pm$2.76}     & 14.08\scriptsize{$\pm$1.97}         & 7.59\scriptsize{$\pm$0.36}  & 10.06\scriptsize{$\pm$0.72}      \\
ure-ga-r     & nan\sc

1. fwd-r suffers from overfitting on both clcifar10 and clcifar20. It is not the case when T is uniform.
2. ure-ga-u turn out to be a robust choice in clcifar20.

# 1.5 Overfit Behaviors

In [None]:
# "selected by val_acc", "best_val_acc" for each algorithm
exp1_5_result = {}
df = pd.read_csv("overfit/result.csv")

for dataset in ["clcifar10", "clcifar20"]:

    def reformat(row):
        row['acc'] = f"{row['mean'].round(2)}\scriptsize({row['std'].round(2)})"
        row['acc'] = row['acc'][:row['acc'].index('(')] + '{' + row['acc'][row['acc'].index('('):] + '}'
        row = row.drop(['mean', 'std'])
        return row
    
    dataset_df = {}
    dataset_df_es = {}
    for algo in algos:

        algo_df = df.loc[(df['dataset_name'] == dataset) & (df['algo'] == algo)]
        # dataset_df[algo] = algo_df.sort_values('valid_acc', ascending=False).iloc[0]
        dataset_df[algo] = algo_df.sort_values('valid_acc', ascending=False).iloc[0]['test_acc'] * 100
        dataset_df_es[algo] = algo_df.sort_values('best_epoch-valid_acc.valid_acc', ascending=False).iloc[0]['best_epoch-valid_acc.test_acc'] * 100
    
    # dataset_df = dataset_df.transpose()
    # dataset_df_es = dataset_df_es.transpose()
    # dataset_df.drop(['selected by URE', 'URE', 'selected by SCEL', 'SCEL', 'valid_acc', 'best_ure', 'best_scel'], axis=1, inplace=True)
    # dataset_df.rename(columns={"selected by val_acc": "valid_acc", "best_val_acc": "valid_acc (ES)"}, inplace=True)

    dataset_df = pd.DataFrame([dataset_df, dataset_df_es], index=["valid_acc", "valid_acc(ES)"]).transpose()

    exp1_5_result[dataset] = dataset_df
    
exp1_5_result = pd.concat(exp1_5_result, axis=1)
exp1_5_result
for idx, row in exp1_5_result.iterrows():
    row = row.to_list()
    print(f"{idx} & {round(row[0], 2)} & {round(row[1], 2)} & {round(row[2], 2)} & {round(row[3], 2)} \\\\")


## 2. Validation Objectives

In [None]:
# "selected by val_acc", "best_val_acc" for each algorithm
exp2_result = {}

for dataset in ["uniform-cifar10", "clcifar10", "uniform-cifar20", "clcifar20"]:
    df = []
    for seed in seeds[dataset]:
        df.append(pd.read_csv(f"{seed}/{dataset}.csv"))

    def reformat(row):
        row['acc'] = row['mean'].round(2)
        row = row.drop(['mean', 'std'])
        return row
    
    def convert_nan(row):
        if np.isnan(row['fwd-r']):
            row['fwd-r'] = '-'
        if np.isnan(row['ure-ga-r']):
            row['ure-ga-r'] = '-'
    
    dataset_df = pd.DataFrame()
    for algo in algos:
        result = []
        for i in range(len(seeds[dataset])):
            if i != 0:
                result.append(df[i].loc[df[i]['algo'] == algo])
            elif algo != 'pc-sigmoid':
                result.append(df[i].loc[df[i]['algo'] == algo])
        if not result:
            dataset_df[algo] = "-"
            continue
        res = pd.DataFrame()
        res['mean'] = pd.concat(result, axis=0).mean(axis=0, numeric_only=True)
        res['std'] = pd.concat(result, axis=0).std(axis=0, numeric_only=True)
        res = res.apply(reformat, axis=1).transpose()

        dataset_df[algo] = res.loc['acc']
    
    
    dataset_df = dataset_df.transpose()
    dataset_df.drop(['URE', 'SCEL', 'valid_acc', 'best_val_acc', 'valid_acc', "best_ure", "best_scel"], axis=1, inplace=True)
    dataset_df = dataset_df.reindex(columns=["selected by URE", "selected by SCEL", "selected by val_acc"])
    dataset_df = dataset_df.rename(columns={"selected by URE": "URE", "selected by SCEL":"SCEL", "selected by val_acc":"valid acc"})
    gap = []
    for _, row in dataset_df.iterrows():
        gap.append(row['valid acc'] - max(row['URE'], row['SCEL']))
    dataset_df['gap'] = gap
    dataset_df.apply(convert_nan)

    exp2_result[dataset] = dataset_df
    
exp2_result = pd.concat(exp2_result, axis=1)
exp2_result

## 3. Feature Independence

In [21]:
exp3_result = {d: {} for d in ["clcifar10", "clcifar10-iid", "clcifar20", "clcifar20-iid"]}

for dataset in ["clcifar10", "clcifar10-iid", "clcifar20", "clcifar20-iid"]:
    df = []
    for seed in seeds[dataset]:
        df.append(pd.read_csv(f"{seed}/{dataset}.csv"))

    def reformat(row):
        # row['acc'] = f"{row['mean'].round(2)}\scriptsize({row['std'].round(2)})"
        row['acc'] = str(row['mean'].round(2)) + "\scriptsize{$\pm$" + str(row['std'].round(2)) + '}'
        row = row.drop(['mean', 'std'])
        return row
    
    dataset_df = pd.DataFrame()
    for algo in algos:
        result = []
        for i in range(len(seeds[dataset])):
            # if df[i].loc[df[i]['algo'] == algo].shape[0] == 0:
            #     break
            if i != 0:
                result.append(df[i].loc[df[i]['algo'] == algo])
            elif algo != 'pc-sigmoid':
                result.append(df[i].loc[df[i]['algo'] == algo])
        if not result:
            dataset_df[algo] = "-"
            continue
        # print(dataset, algo, [r['selected by val_acc'].item() for r in result])
        exp3_result[dataset][algo] = [r['selected by val_acc'].item() for r in result]
        continue
        res = pd.DataFrame()
        res['mean'] = pd.concat(result, axis=0).mean(axis=0, numeric_only=True)
        res['std'] = pd.concat(result, axis=0).std(axis=0, numeric_only=True)
        res = res.apply(reformat, axis=1).transpose()

        dataset_df[algo] = res.loc['acc']
    
    
    # dataset_df = dataset_df.transpose()
    # dataset_df.drop(['selected by URE', 'URE', 'selected by SCEL', 'SCEL', 'valid_acc', 'best_ure', 'best_scel', "best_val_acc"], axis=1, inplace=True)
    # dataset_df.rename(columns={"selected by val_acc": "valid_acc"}, inplace=True)
    
    # exp3_result[dataset] = dataset_df
    
# exp3_result = pd.concat(exp3_result, axis=1)
exp3_result

for algo in algos:
    diff = [exp3_result['clcifar10-iid'][algo][i]-exp3_result['clcifar10'][algo][i] for i in range(len(exp3_result['clcifar10'][algo]))]
    print(algo, round(np.mean(diff), 2), round(np.std(diff), 2))

# for idx, row in exp3_result.iterrows():
#     row = row.to_list()
#     result_str = f"{idx}\t&\t{row[0]}\t&\t{row[1]}\t&\t{row[2]}\t&\t{row[3]}\\\\"
#     # result_str = f"{idx}" + '\t& '
#     # if float(row[0].split('\\')[0]) > float(row[1].split('\\')[0]):
#     #     result_str += "\\textbf{" + row[0] + "}" + '\t& ' + row[1] + '\t& '
#     # else:
#     #     result_str += row[0] + '\t& ' + "\\textbf{" + row[1] + "}"+  '\t& '

#     # if float(row[2].split('\\')[0]) > float(row[3].split('\\')[0]):
#     #     result_str += "\\textbf{" + row[2] + "}" + '\t& ' + row[3] + '\\\\'
#     # else:
#     #     result_str += row[2] + '\t& ' + "\\textbf{" + row[3] + "}"+  '\\\\'
#     print(result_str)



fwd-u -1.1 2.17
fwd-r -0.36 1.15
ure-ga-u -3.03 1.25
ure-ga-r 0.74 0.35
scl-nl -0.67 1.81
scl-exp -1.97 1.16
l-w -2.5 0.56
l-uw -3.53 1.36
pc-sigmoid -2.03 2.05


## 4. Different Data Cleaning Rate

In [None]:
import pandas as pd
import numpy as np
from pprint import pprint
import matplotlib.pyplot as plt

df = pd.read_csv("data_cleaning/data_cleaning-result.csv")

df['data_cleaning_rate'].value_counts()

for dataset in ['clcifar10-noiseless', 'clcifar20-noiseless']:
    print(dataset)
    for algo in ['fwd-u', 'fwd-r', 'ure-ga-u', 'ure-ga-r']:
        result = []
        zero_df = pd.read_csv(f"0/{dataset.split('-')[0]}.csv")
        result.append(round(float(zero_df.loc[(zero_df['algo'] == algo)]['selected by val_acc']), 2))
        for rate in [0.25, 0.5, 0.75, 1.0]:
            sub_df = df.loc[(df['data_cleaning_rate'] == rate) & (df['algo'] == algo) & (df['dataset_name'] == dataset)]
            result.append(round(sub_df.sort_values('valid_acc', ascending=False).iloc[0]['test_acc'] * 100, 2))
        print(algo, result)
        result = []
        result.append(round(float(zero_df.loc[(zero_df['algo'] == algo)]['best_val_acc']), 2))
        for rate in [0.25, 0.5, 0.75, 1.0]:
            sub_df = df.loc[(df['data_cleaning_rate'] == rate) & (df['algo'] == algo) & (df['dataset_name'] == dataset)]
            result.append(round(sub_df.sort_values('best_epoch-valid_acc.valid_acc', ascending=False).iloc[0]['best_epoch-valid_acc.test_acc'] * 100, 2))
        print(algo, result)
    

In [None]:
import matplotlib.pyplot as plt

# Define the data
fwd_u = [35.43, 39.35, 47.78, 54.5, 64.72]
fwd_u_es = [36.97, 39.17, 48.2, 55.51, 64.72]
fwd_r = [28.14, 36.23, 37.73, 47.86, 56.47]
fwd_r_es = [40.61, 42.88, 47.81, 51.39, 63.03]

uniform_clcifar10_fwd = 48.44

# Create the plot
fig, ax = plt.subplots()
ax.plot(fwd_u, linestyle='solid', marker='o', color='blue', label='fwd-u')
ax.plot(fwd_u_es, linestyle='--', marker='^', color='blue', label='fwd-u(ES)')
ax.plot(fwd_r, linestyle='solid', marker='o', color='red', label='fwd-r')
ax.plot(fwd_r_es, linestyle='--', marker='^', color='red', label='fwd-r(ES)')
# ax.axhline(y=uniform_clcifar10_fwd, color='tomato', label='uniform CL')

xticks = [0, 0.25, 0.5, 0.75, 1.0]
# ax.fill_between(xticks, fwd_u, fwd_u_es, where=fwd_u_es>=fwd_u, interpolate=True, color='gray', alpha=0.3)
# ax.fill_between(xticks, y1, y2, where=y2<y1, interpolate=True, color='green', alpha=0.3)

# Add labels and legend
ax.set_xlabel('Noise Cleaning Rate')

ax.set_xticks(range(len(fwd_u)))
ax.set_xticklabels(xticks)

yticks = list(range(0, 71, 10))
ax.set_yticks(yticks)

ax.set_ylabel('Accuracy')
ax.legend()

plt.savefig("data_cleaning/noise-cleaning-fwd-clcifar10.png")
plt.show()

In [None]:
import matplotlib.pyplot as plt

# Define the data
fwd_u = [7.05, 8.64, 9.46, 10.41, 10.9]
fwd_u_es = [7.38, 8.65, 10.31, 10.95, 10.6]
fwd_r = [14.96, 16.64, 18.02, 18.27, 19.24]
fwd_r_es = [20.23, 21.26, 21.96, 23.77, 24.9]

uniform_clcifar20_fwd = 17.4

# Create the plot
fig, ax = plt.subplots()
ax.plot(fwd_u, linestyle='solid', marker='o', color='blue', label='fwd-u')
ax.plot(fwd_u_es, linestyle='--', marker='^', color='blue', label='fwd-u(ES)')
ax.plot(fwd_r, linestyle='solid', marker='o', color='red', label='fwd-r')
ax.plot(fwd_r_es, linestyle='--', marker='^', color='red', label='fwd-r(ES)')
# ax.axhline(y=uniform_clcifar20_fwd, color='tomato', label='uniform CL')

# Add labels and legend
ax.set_xlabel('Noise Cleaning Rate')

xticks = [0, 0.25, 0.5, 0.75, 1.0]
ax.set_xticks(range(len(fwd_u)))
ax.set_xticklabels(xticks)

yticks = list(range(0, 71, 10))
ax.set_yticks(yticks)

ax.set_ylabel('Accuracy')
ax.legend()

plt.savefig("data_cleaning/noise-cleaning-fwd-clcifar20.png")

plt.show()

## 5. Learning with Multiple CL

In [None]:
import pandas as pd
from pprint import pprint
df = pd.read_csv("mcl/mcl_result.csv")
df.loc[(df['algo'] == 'fwd-u') & (df['num_cl'] == 2)]['seed'].value_counts()

In [None]:
df = pd.read_csv("mcl/mcl_result.csv")
result = pd.DataFrame()

for dataset in ['clcifar10-mcl', 'clcifar20-mcl']:
    sub_result = pd.DataFrame()
    for ncl in [2, 3]:
        res_df = {}


        for algo in ["fwd-u", "fwd-r", "ure-ga-u", "ure-ga-r", "scl-nl", "scl-exp", "l-w", "l-uw", "pc-sigmoid"]:
            df1 = df.loc[(df['num_cl'] == ncl) & (df['dataset_name'] == dataset) & (df['algo'] == algo)]
            last_acc = round(df1.sort_values('valid_acc', ascending=False).iloc[0]['test_acc']*100, 2)
            early_acc = round(df1.sort_values('best_epoch-valid_acc.valid_acc', ascending=False).iloc[0]['best_epoch-valid_acc.test_acc']*100, 2)
            res_df[algo] = f"{last_acc}\scriptsize" + '{' + f"({early_acc})" + '} &'
        
        sub_result[ncl] = res_df
    print(dataset)
    pprint(sub_result)

## Calculate the win/loss rate of URE, SCEL

In [None]:
datasets = ['clcifar20-iid', 'clcifar20', 'clcifar10-iid', 'clcifar10']

win_rate = {}
    
                

In [None]:
dist = {'scel':[], 'ure':[], 'tie':[]}

def scel_win(acc):
    dist['scel'].append(acc)

def ure_win(acc):
    dist['ure'].append(acc)

def tie_win(acc):
    dist['tie'].append(acc)

results = {}
for d in ['clcifar20-iid', 'clcifar20', 'clcifar10-iid', 'clcifar10']:
    count_ure, tie, count_scel = 0, 0, 0
    es_count_ure, es_tie, es_count_scel = 0, 0, 0
    scel_err_dis = []
    ure_err_dis = []
    es_scel_err_dis = []
    es_ure_err_dis = []

    scel_success = 0
    ure_success = 0
    es_scel_success = 0
    es_ure_success = 0
    for seed in [0, 2, 10]:
        df = pd.read_csv(f"{seed}/wandb_result-{seed}.csv")

        for algo in ["fwd-u", "fwd-r", "ure-ga-u", "ure-ga-r", "scl-nl", "scl-exp", "l-w", "l-uw"]:

            possible_res = df.loc[(df['algo'] == algo) & (df['dataset_name'] == d)]

            # last_scel_acc =         possible_res.sort_values('scel').iloc[0]['test_acc']
            sub_df = possible_res.sort_values('scel')
            sub_df = sub_df.loc[~sub_df['test_acc'].isna()]
            last_scel_acc = sub_df.iloc[0]['test_acc']

            early_scel_acc =        possible_res.sort_values('best_epoch-scel.scel').iloc[0]['best_epoch-scel.test_acc']
            last_ure_acc =          possible_res.sort_values('ure').iloc[0]['test_acc']
            early_ure_acc =         possible_res.sort_values('best_epoch-ure.ure').iloc[0]['best_epoch-ure.test_acc']
            last_true_label_acc =   possible_res.sort_values('valid_acc', ascending=False).iloc[0]['test_acc']
            early_true_label_acc =  possible_res.sort_values('best_epoch-valid_acc.valid_acc', ascending=False).iloc[0]['best_epoch-valid_acc.test_acc']

            if d == "clcifar20":
                print(last_scel_acc)

            ure_err_dis.append(last_true_label_acc - last_ure_acc)
            scel_err_dis.append(last_true_label_acc - last_scel_acc)
            if last_scel_acc == last_true_label_acc:
                scel_success += 1
            if last_ure_acc == last_true_label_acc:
                ure_success += 1
            
            es_ure_err_dis.append(early_true_label_acc - early_ure_acc)
            es_scel_err_dis.append(early_true_label_acc - early_scel_acc)
            if early_scel_acc == early_true_label_acc:
                es_scel_success += 1
            if early_ure_acc == early_true_label_acc:
                es_ure_success += 1
                
    print(d, scel_success/24, ure_success/24, es_scel_success/24, es_ure_success/24)
    
    scel_result = pd.DataFrame([np.mean(scel_err_dis), np.mean(es_scel_err_dis)], index=['last', 'ES'])
    ure_result = pd.DataFrame([np.mean(ure_err_dis), np.mean(es_ure_err_dis)], index=['last', 'ES'])

    results[d] = pd.concat([scel_result, ure_result]).transpose()

results = pd.concat(results, axis=0, ignore_index=False)
results

# import matplotlib.pyplot as plt

# plt.scatter([e[0] for e in ure_err_dis], [e[1] for e in ure_err_dis], color='red')
# plt.scatter([e[0] for e in scel_err_dis], [e[1] for e in scel_err_dis], color='green')
# plt.show()

# fig, axs = plt.subplots(1, 3, figsize=(10, 3))

# axs[0].hist(dist['scel'], bins=30, alpha=0.5, color='green')
# axs[0].set_title("SCEL win")

# axs[1].hist(dist['tie'], bins=30, alpha=0.5, color='blue')
# axs[1].set_title("tie")

# axs[2].hist(dist['ure'], bins=30, alpha=0.5, color='red')
# axs[2].set_title("URE win")


# plt.show()
                

## Distance to optimal accuracy
### Last epoch

In [None]:
datasets = ['clcifar20-noiseless', 'clcifar20-iid', 'clcifar20-aggregate',
           'clcifar20', 'clcifar10-noiseless', 'clcifar10-iid',
           'clcifar10-aggregate', 'clcifar10']

win_rate = []

res = {}
for d in datasets:
    for seed in [2, 10]:
        df = pd.read_csv(f"{seed}/wandb_result-{seed}.csv")

        dis = []
        for algo in ["fwd-u", "fwd-r", "ure-ga-u", "ure-ga-r", "scl-nl", "scl-exp", "l-w", "l-uw"]:

            possible_res = df.loc[(df['algo'] == algo) & (df['dataset_name'] == d)]
            dis.append(possible_res.sort_values('valid_acc', ascending=False).iloc[0]['test_acc'] - possible_res.sort_values('scel').iloc[0]['test_acc'])
            
        res[d] = round(np.mean(dis) * 100, 2)
win_rate.append(res)

res = {}
for d in datasets:
    for seed in [2, 10]:
        df = pd.read_csv(f"{seed}/wandb_result-{seed}.csv")

        dis = []
        for algo in ["fwd-u", "fwd-r", "ure-ga-u", "ure-ga-r", "scl-nl", "scl-exp", "l-w", "l-uw"]:

            possible_res = df.loc[(df['algo'] == algo) & (df['dataset_name'] == d)]
            dis.append(possible_res.sort_values('valid_acc', ascending=False).iloc[0]['test_acc'] - possible_res.sort_values('ure').iloc[0]['test_acc'])
            
        res[d] = round(np.mean(dis) * 100, 2)
win_rate.append(res)
        
win_rate = pd.DataFrame(win_rate, index=['|scel - test_acc|', '|ure - test_acc|'])
win_rate


Conclusion: 
1. The difference between URE and SCEL is small ( ~ 1%)
2. The distance to valid_acc is dataset dependent.

### Early stopping

In [None]:
datasets = ['clcifar20-noiseless', 'clcifar20-iid', 'clcifar20-aggregate',
           'clcifar20', 'clcifar10-noiseless', 'clcifar10-iid',
           'clcifar10-aggregate', 'clcifar10']

win_rate = []

res = {}
for d in datasets:
    for seed in [2, 10]:
        df = pd.read_csv(f"{seed}/wandb_result-{seed}.csv")

        dis = []
        for algo in ["fwd-u", "fwd-r", "ure-ga-u", "ure-ga-r", "scl-nl", "scl-exp", "l-w", "l-uw"]:

            possible_res = df.loc[(df['algo'] == algo) & (df['dataset_name'] == d)]
            dis.append(possible_res.sort_values('best_epoch-valid_acc.valid_acc', ascending=False).iloc[0]['best_epoch-valid_acc.test_acc'] - possible_res.sort_values('best_epoch-scel.scel').iloc[0]['best_epoch-scel.test_acc'])
            
        res[d] = round(np.mean(dis) * 100, 2)
win_rate.append(res)

res = {}
for d in datasets:
    for seed in [2, 10]:
        df = pd.read_csv(f"{seed}/wandb_result-{seed}.csv")

        dis = []
        for algo in ["fwd-u", "fwd-r", "ure-ga-u", "ure-ga-r", "scl-nl", "scl-exp", "l-w", "l-uw"]:

            possible_res = df.loc[(df['algo'] == algo) & (df['dataset_name'] == d)]
            dis.append(possible_res.sort_values('best_epoch-valid_acc.valid_acc', ascending=False).iloc[0]['best_epoch-valid_acc.test_acc'] - possible_res.sort_values('best_epoch-ure.ure').iloc[0]['best_epoch-ure.test_acc'])
            
        res[d] = round(np.mean(dis) * 100, 2)
win_rate.append(res)
        
win_rate = pd.DataFrame(win_rate, index=['|scel - test_acc|', '|ure - test_acc|'])
win_rate
