In [None]:
import os
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
plt.rc('font',family='Times New Roman', size=25)

In [None]:
results_dir = 'results/'

In [None]:
files = os.listdir(results_dir)
files = [f for f in files if '.csv' in f]
files = [f for f in files if not 'test.csv' in f]
files

In [None]:
df = pd.DataFrame()

for f in files:
    df_tmp = pd.read_csv(results_dir+f, index_col=False)
    s_tmp = f.split('_')
    df_tmp['data'] = s_tmp[0]
    df_tmp['c'] = s_tmp[1][1:-4]
    df = pd.concat([df, df_tmp])

df = df.drop(['Unnamed: 0'], axis=1).reset_index(drop=True)
df = df.rename({'fn_rates': 'fn_rate'}, axis=1)

rwr = [0]*len(df)
for i in range(len(df)):
    if df.loc[i, 'state'] == 'RWR':
        rwr[i] = 1
df['rwr'] = rwr

df

In [None]:
df_tmp = df[df['caipi_iter'] <= 100]

df_tmp = df_tmp.drop(['caipi_iter', 'state', 'labeled_size', 'unlabeled_size', 'precision', 'recall', 'accuracy'], axis=1)

df_tmp_group = df_tmp.groupby(['exp_iter', 'data', 'c']).agg({'fp_rate': 'min',
                                                              'fn_rate': 'min',
                                                              'corr_ces_pos': 'max',
                                                              'corr_ces_neg': 'max',
                                                              'rwr': 'sum'}) 

print(df_tmp_group.groupby(['data', 'c']).agg(['mean', 'std']).round(4).to_string())

In [None]:
df_tmp = df[df['caipi_iter'] == 100]

df_tmp = df_tmp.drop(['exp_iter', 'rwr', 'caipi_iter', 'state', 'labeled_size', 'unlabeled_size', 'precision', 'recall', 'accuracy'], axis=1)

print(df_tmp.groupby(['data', 'c']).agg(['mean', 'std']).round(4).to_string())

In [None]:
df_benchmark = pd.read_excel(results_dir+'benchmark.xlsx')

exp_names = []
for i in range(len(df_benchmark)):
    name = df_benchmark.loc[i, 'experiment'].split('_')[0]
    exp_names.append(name)
df_benchmark['experiment'] = exp_names

df_benchmark = df_benchmark.drop(['precision', 'recall', 'accuracy'], axis=1)

df_benchmark.groupby(['experiment']).agg(['mean', 'std'])

In [None]:
df_tmp = df.drop(['exp_iter', 'state', 'recall', 'accuracy', 'labeled_size', 'unlabeled_size', 'rwr'], axis=1)

df_credit_c0 = df_tmp[(df['data'] == 'credit') & (df['c'] == '0')]
df_credit_c0 = df_credit_c0.drop('data', axis=1)
df_credit_c0_group = df_credit_c0.groupby(['caipi_iter']).agg(['mean', 'std'])

df_credit_c0_fp_mean = np.asarray(df_credit_c0_group['fp_rate']['mean'])
df_credit_c0_fp_std = np.asarray(df_credit_c0_group['fp_rate']['std'])
df_credit_c0_fn_mean = np.asarray(df_credit_c0_group['fn_rate']['mean'])
df_credit_c0_fn_std = np.asarray(df_credit_c0_group['fn_rate']['std'])
df_credit_c0_correxplpos_mean = np.asarray(df_credit_c0_group['corr_ces_pos']['mean'])
df_credit_c0_correxplpos_std = np.asarray(df_credit_c0_group['corr_ces_pos']['std'])
df_credit_c0_correxplneg_mean = np.asarray(df_credit_c0_group['corr_ces_neg']['mean'])
df_credit_c0_correxplneg_std = np.asarray(df_credit_c0_group['corr_ces_neg']['std'])

df_credit_c5 = df_tmp[(df['data'] == 'credit') & (df['c'] == '5')]
df_credit_c5 = df_credit_c5.drop('data', axis=1)
df_credit_c5_group = df_credit_c5.groupby(['caipi_iter']).agg(['mean', 'std'])

df_credit_c5_fp_mean = np.asarray(df_credit_c5_group['fp_rate']['mean'])
df_credit_c5_fp_std = np.asarray(df_credit_c5_group['fp_rate']['std'])
df_credit_c5_fn_mean = np.asarray(df_credit_c5_group['fn_rate']['mean'])
df_credit_c5_fn_std = np.asarray(df_credit_c5_group['fn_rate']['std'])
df_credit_c5_correxplpos_mean = np.asarray(df_credit_c5_group['corr_ces_pos']['mean'])
df_credit_c5_correxplpos_std = np.asarray(df_credit_c5_group['corr_ces_pos']['std'])
df_credit_c5_correxplneg_mean = np.asarray(df_credit_c5_group['corr_ces_neg']['mean'])
df_credit_c5_correxplneg_std = np.asarray(df_credit_c5_group['corr_ces_neg']['std'])

df_creditfilter_c5 = df_tmp[(df['data'] == 'creditfilter10') & (df['c'] == '5')]
df_creditfilter_c5 = df_creditfilter_c5.drop('data', axis=1)
df_creditfilter_c5_group = df_creditfilter_c5.groupby(['caipi_iter']).agg(['mean', 'std'])

df_creditfilter_c5_fp_mean = np.asarray(df_creditfilter_c5_group['fp_rate']['mean'])
df_creditfilter_c5_fp_std = np.asarray(df_creditfilter_c5_group['fp_rate']['std'])
df_creditfilter_c5_fn_mean = np.asarray(df_creditfilter_c5_group['fn_rate']['mean'])
df_creditfilter_c5_fn_std = np.asarray(df_creditfilter_c5_group['fn_rate']['std'])
df_creditfilter_c5_correxplpos_mean = np.asarray(df_creditfilter_c5_group['corr_ces_pos']['mean'])
df_creditfilter_c5_correxplpos_std = np.asarray(df_creditfilter_c5_group['corr_ces_pos']['std'])
df_creditfilter_c5_correxplneg_mean = np.asarray(df_creditfilter_c5_group['corr_ces_neg']['mean'])
df_creditfilter_c5_correxplneg_std = np.asarray(df_creditfilter_c5_group['corr_ces_neg']['std'])

In [None]:
fig, ax = plt.subplots(2, 2, figsize=(20, 20), constrained_layout=True)

x = np.asarray(range(0,len(df_credit_c0_fp_mean)))

ax[0,0].plot(x, df_credit_c0_fp_mean, color='blue', label='c=0')
ax[0,0].fill_between(x, df_credit_c0_fp_mean-df_credit_c0_fp_std, 
                     df_credit_c0_fp_mean+df_credit_c0_fp_std, color='lightblue', alpha=0.5)

ax[0,0].plot(x, df_credit_c5_fp_mean, color='red', label='c=5')
ax[0,0].fill_between(x, df_credit_c5_fp_mean-df_credit_c5_fp_std, 
                     df_credit_c5_fp_mean+df_credit_c5_fp_std, color='pink', alpha=0.5)

ax[0,0].plot(x, df_creditfilter_c5_fp_mean, color='green', label='filter=3.5')
ax[0,0].fill_between(x, df_creditfilter_c5_fp_mean-df_creditfilter_c5_fp_std, 
                     df_creditfilter_c5_fp_mean+df_creditfilter_c5_fp_std, color='lightgreen', alpha=0.5)

ax[0,0].hlines(0.15324, 1, 101, color='gray', linestyle='dashed', label='baseline')

ax[0,0].set_ylim(0, 0.75)

ax[0,0].set_xlabel('iteration')
ax[0,0].set_ylabel('false positive rate')

ax[0,1].plot(x, df_credit_c0_fn_mean, color='blue', label='c=0')
ax[0,1].fill_between(x, df_credit_c0_fn_mean-df_credit_c0_fn_std, 
                     df_credit_c0_fn_mean+df_credit_c0_fn_std, color='lightblue', alpha=0.5)

ax[0,1].plot(x, df_credit_c5_fn_mean, color='red', label='c=5')
ax[0,1].fill_between(x, df_credit_c5_fn_mean-df_credit_c5_fn_std, 
                     df_credit_c5_fn_mean+df_credit_c5_fn_std, color='pink', alpha=0.5)

ax[0,1].plot(x, df_creditfilter_c5_fn_mean, color='green', label='filter=3.5')
ax[0,1].fill_between(x, df_creditfilter_c5_fn_mean-df_creditfilter_c5_fn_std, 
                     df_creditfilter_c5_fn_mean+df_creditfilter_c5_fn_std, color='lightgreen', alpha=0.5)

ax[0,1].hlines(0.068034, 1, 101, color='gray', linestyle='dashed', label='baseline')

ax[0,1].set_ylim(0, 0.75)

ax[0,1].set_xlabel('iteration')
ax[0,1].set_ylabel('false negative rate')

ax[1,0].plot(x, df_credit_c0_correxplpos_mean, color='blue', label='c=0')
ax[1,0].fill_between(x, df_credit_c0_correxplpos_mean-df_credit_c0_correxplpos_std, 
                     df_credit_c0_correxplpos_mean+df_credit_c0_correxplpos_std, color='lightblue', alpha=0.5)

ax[1,0].plot(x, df_credit_c5_correxplpos_mean, color='red', label='c=5')
ax[1,0].fill_between(x, df_credit_c5_correxplpos_mean-df_credit_c5_correxplpos_std, 
                     df_credit_c5_correxplpos_mean+df_credit_c5_correxplpos_std, color='pink', alpha=0.5)

ax[1,0].plot(x, df_creditfilter_c5_correxplpos_mean, color='green', label='filter=3.5')
ax[1,0].fill_between(x, df_creditfilter_c5_correxplpos_mean-df_creditfilter_c5_correxplpos_std, 
                     df_creditfilter_c5_correxplpos_mean+df_creditfilter_c5_correxplpos_std, color='lightgreen', alpha=0.5)

ax[1,0].hlines(0.57938, 1, 101, color='gray', linestyle='dashed', label='baseline')

ax[1,0].set_ylim(0, 0.75)

ax[1,0].set_xlabel('iteration')
ax[1,0].set_ylabel('correct positive explanation rate')

ax[1,1].plot(x, df_credit_c0_correxplneg_mean, color='blue', label='c=0')
ax[1,1].fill_between(x, df_credit_c0_correxplneg_mean-df_credit_c0_correxplneg_std, 
                     df_credit_c0_correxplneg_mean+df_credit_c0_correxplneg_std, color='lightblue', alpha=0.5)

ax[1,1].plot(x, df_credit_c5_correxplneg_mean, color='red', label='c=5')
ax[1,1].fill_between(x, df_credit_c5_correxplneg_mean-df_credit_c5_correxplneg_std, 
                     df_credit_c5_correxplneg_mean+df_credit_c5_correxplneg_std, color='pink', alpha=0.5)

ax[1,1].plot(x, df_creditfilter_c5_correxplneg_mean, color='green', label='filter=1.0')
ax[1,1].fill_between(x, df_creditfilter_c5_correxplneg_mean-df_creditfilter_c5_correxplneg_std, 
                     df_creditfilter_c5_correxplneg_mean+df_creditfilter_c5_correxplneg_std, color='lightgreen', alpha=0.5)

ax[1,1].hlines(0.29930, 1, 101, color='gray', linestyle='dashed', label='baseline')

ax[1,1].set_ylim(0, 0.75)

ax[1,1].set_xlabel('iteration')
ax[1,1].set_ylabel('correct negative explanation rate')

ax[1,1].legend(loc='lower right')

plt.show()