# CUB Read and Export Results


To get the data, please visit `Download.md` in `data/` folder

In [1]:
from datetime import datetime
import seaborn as sns
sns.set()

In [2]:
import glob
import pickle
import pandas as pd
import os
import math
import numpy as np

In [3]:
pd.set_option("display.max_columns", None)
pd.set_option("display.max_rows", None)

In [4]:
DEBUG=True

In [5]:
# Build the ground truth dictionary 

# Map method # -> method name
mapping = {'No-XAI':1 , 'KNNs': 2, 'EMD-NNs': 3, 'EMD-Corrs': 3, 'CHM-NNs': 4, 'CHM-Corrs': 4}
exp = 'CUB'

DEBUG = False
if DEBUG:
    exp = 'CUB'

with open('../../data/raw_data/Task1-CUB-ALL.pickle', 'rb') as f:
  TASK1_Data = pickle.load(f)

# Load Data

In [6]:
np.mean([x['CHM-Output'] for x in TASK1_Data.values()])

0.8327580255436658

## GT Labels

In [7]:
ground_truth = {}

ground_truth['No-XAI']     =  {v['query-path']:True if v['ResNet-Output'] else False for k, v in TASK1_Data.items()}
ground_truth['KNNs']       =  {v['query-path']:True if v['KNN-Output'] else False    for k, v in TASK1_Data.items()}
ground_truth['EMD-NNs']    =  {v['query-path']:True if v['EMD-Output'] else False    for k, v in TASK1_Data.items()}
ground_truth['EMD-Corrs']  =  {v['query-path']:True if v['EMD-Output'] else False    for k, v in TASK1_Data.items()}
ground_truth['CHM-NNs']    =  {v['query-path']:True if v['CHM-Output'] else False   for k, v in TASK1_Data.items()}
ground_truth['CHM-Corrs']  =  {v['query-path']:True if v['CHM-Output'] else False   for k, v in TASK1_Data.items()}

In [8]:
confidence_data = {}

confidence_data['No-XAI']     =  {v['query-path']:v['ResNet-Confidence']/100 for k, v in TASK1_Data.items()}
confidence_data['KNNs']       =  {v['query-path']:v['KNN-confidence']/20    for k, v in TASK1_Data.items()}
confidence_data['EMD-NNs']    =  {v['query-path']:v['EMD-confidence']/20    for k, v in TASK1_Data.items()}
confidence_data['EMD-Corrs']  =  {v['query-path']:v['EMD-confidence']/20    for k, v in TASK1_Data.items()}
confidence_data['CHM-NNs']    =  {v['query-path']:v['CHM-confidence']/20   for k, v in TASK1_Data.items()}
confidence_data['CHM-Corrs']  =  {v['query-path']:v['CHM-confidence']/20   for k, v in TASK1_Data.items()}

In [9]:
classifier_predictions = {}

classifier_predictions['No-XAI']     =  {v['query-path']:v['ResNet-Prediction'] for k, v in TASK1_Data.items()}
classifier_predictions['KNNs']       =  {v['query-path']:v['KNN-predictions']    for k, v in TASK1_Data.items()}

classifier_predictions['EMD-NNs']    =  {v['query-path']:v['EMD-predictions']    for k, v in TASK1_Data.items()}
classifier_predictions['EMD-Corrs']  =  {v['query-path']:v['EMD-predictions']    for k, v in TASK1_Data.items()}

classifier_predictions['CHM-NNs']    =  {v['query-path']:v['CHM-predictions']   for k, v in TASK1_Data.items()}
classifier_predictions['CHM-Corrs']  =  {v['query-path']:v['CHM-predictions']   for k, v in TASK1_Data.items()}

In [10]:
GT_WNIDS     = {os.path.basename(v['query-path']):v['gt_wnid']for k,v in TASK1_Data.items()}
RESNEt_WNIDS = {os.path.basename(v['query-path']):v['ResNet-Prediction']for k,v in TASK1_Data.items()}
KNNs_WNIDS   = {os.path.basename(v['query-path']):v['KNN-predictions']for k,v in TASK1_Data.items()}
EMD_WNIDS    = {os.path.basename(v['query-path']):v['EMD-predictions']for k,v in TASK1_Data.items()}
CHM_WNIDS    = {os.path.basename(v['query-path']):v['CHM-predictions']for k,v in TASK1_Data.items()}

In [11]:
# 1. Read the information from csv files to dictionary. Each user ID is an entry

# Check if a number is float?
def is_float(s):
    try:
        float(s)
        return True
    except ValueError:
        return False

# Task aliases assigned by Gorilla
exp_hierarchy = ['randomiser-tjl7', {'No-XAI':'counterbalance-lxdj' ,
                                     'KNNs': 'counterbalance-yqqs', 
                                     'EMD-NNs': 'counterbalance-jiws', 'EMD-Corrs': 'counterbalance-eqty', 
                                     'CHM-NNs': 'counterbalance-xsf1', 'CHM-Corrs': 'counterbalance-4ym2'}]

methods = mapping.keys()
files = []

list_1 = glob.glob('../../data/raw_data/CUB/*.csv')

for file in list_1:
    if '.csv' in file:
        files.append(file)
        
threshold = 1

# Store information of bad users to a dict

final_result = dict()
trial_cnt = dict()

# Initialize the dictionary for users' responses (answer)
final_result[exp] = dict()
trial_cnt[exp] = dict()
for method in methods:
    final_result[exp][method] = dict()
    trial_cnt[exp][method] = dict()

# Correct answer dictionary in validation 
val_correct_dict = dict()
# Incorrect answer dictionary in validation 
val_incorrect_trials_dict = dict()

test_correct_trials_dict = dict()
test_incorrect_trials_dict = dict()

# Numbers of users for methods
user_cnt_dict = dict()

# Reaction time dictionary 
users_avg_reaction_time_dict = dict()
users_stdev_reaction_time_dict = dict()
below_stdev_reaction_time_dict = dict()

# Counter-balances for methods
counter_balances_dict = dict()

# Numbers of good users for methods
good_user_cnt_dict = dict()
good_user_cnt = 0
bad_user_cnt = 0
users_reaction_times = []
user_dict = dict()


for file in files:
    # We skip the Questionaire Node
    if 'questionnaire-9g37' in file:
        continue
    
    # We skip the Complete and Reject Node
    if '7pqn' in file or '77vw' in file:
        continue
    
    reaction_time_correct_cnt_dict = dict()
    users_reaction_time = []
    users_val_incorrect_trials = []
    counter_balances = []
    user_cnt = 0
    trail_date_tile = []
    df = pd.read_csv(file)
    # 
    for index, row in df.iterrows():
        # Start the spreadsheet for a user
        if row['Event Index'] != 'END OF FILE' and int(row['Event Index']) == 1:
            trail_date_tile
            user_cnt += 1
            reaction_time = 0
            reaction_times = []

            val_correct = 0
            val_incorrect = 0
            val_incorrect_trials = []
            val_trial_cnt = 0
            
            test_reaction_time = 0
            val_reaction_time = 0

            test_trial_answers = {}

            task_name = row['Task Name']
            
            task = exp
            
            # Extract the method name. E.g, KNNs
            
            method = row['randomiser-tjl7']
            counter_balance = int(row[exp_hierarchy[1][method]])
            if DEBUG:
                public_id = int(row['Participant Private ID'])
            else:
                public_id = row['Participant Public ID']
            
            if task not in user_dict:
                user_dict[task] = dict()
                    
            if method not in user_dict[task]:
                user_dict[task][method] = dict()
            if public_id not in user_dict[task][method]:
                user_dict[task][method][public_id] = dict()
                user_dict[task][method][public_id]['Trials'] = dict()
                user_dict[task][method][public_id]['Counter balance'] = counter_balance

        
        trial_time = row['Reaction Time']
            
        # Check users' responses in validation -- Screen 3 is where users make decision -- 
        # TODO: re-check this as we re-layout the test UI (maybe the screen 3 is not where users make decision)
        if row['display'] == 'Validation' and row['Screen Name'] == 'Screen 3':
            val_trial_cnt += 1
            if row['Correct'] == 1:
                val_correct += 1
            elif row['Incorrect'] == 1:
                val_incorrect += 1
                val_incorrect_trials.append(val_trial_cnt)
            else:
                raise ValueError("Wrong value!")
                
            user_dict[task][method][public_id]['Validation Correct'] = val_correct
            user_dict[task][method][public_id]['Validation Incorrect'] = val_incorrect
            user_dict[task][method][public_id]['Incorrect Validation Trials'] = val_incorrect_trials
            
                
        # Check users' responses in test
        elif row['display'] == 'Trial' and row['Screen Name'] == 'Screen 3':
            file_name = row['file_name' + str(counter_balance)]
            test_trial_answers[file_name] = row['Response']
            
            user_dict[task][method][public_id]['Trials'][file_name] = row['Response']
        
        # Time from instructions -> the end of Validation
        if row['display'] != 'Trial':
            if (isinstance(row['Reaction Time'], str) and is_float(row['Reaction Time'])) or (isinstance(row['Reaction Time'], float) and not math.isnan(row['Reaction Time'])):
                val_reaction_time += float(row['Reaction Time'])
        
        # End the spreadsheet for a user
        if row['Trial Number'] == 'END TASK':
            reaction_time = float(row['Reaction Time'])
            
            # Validation and Test are combined
            user_dict[task][method][public_id]['Validation Reaction Time'] = val_reaction_time
            user_dict[task][method][public_id]['Test Reaction Time'] = reaction_time - test_reaction_time
            
            dt_object = datetime.fromtimestamp(row['UTC Timestamp']/1000)
            user_dict[task][method][public_id]['DATE'] = dt_object


  df = pd.read_csv(file)
  df = pd.read_csv(file)
  df = pd.read_csv(file)
  df = pd.read_csv(file)
  df = pd.read_csv(file)
  df = pd.read_csv(file)
  df = pd.read_csv(file)
  df = pd.read_csv(file)
  df = pd.read_csv(file)
  df = pd.read_csv(file)
  df = pd.read_csv(file)
  df = pd.read_csv(file)


# Dataframe

In [12]:
Method_name_map = {'No-XAI': 'Method1', 'KNNs': 'Method2', 'EMD-NNs':'Method3', 'EMD-Corrs':'Method3', 'CHM-Corrs':'Method4', 'CHM-NNs':'Method4'}

In [13]:
def check_performance(user_inputs, method_name):
  correctness = []
  for k, v in user_inputs.items():
    
    gt_label = ground_truth[method_name][k]

    if gt_label==True and v == 'Yes':
      correctness.append(1)
    elif gt_label==False and v == 'No':
      correctness.append(1)
    else:
      correctness.append(0)
  
  return np.asarray(correctness)

In [14]:
user_information = {}
bad_user = 0

for k in user_dict['CUB'].keys():
  for u in user_dict['CUB'][k].keys():
    ukey = f"{u}{k}"
    user_information[ukey] = user_dict['CUB'][k][u]
    user_information[ukey]['Correctness'] = check_performance(user_information[ukey]['Trials'], k)
    user_information[ukey]['Accuracy'] = np.mean(user_information[ukey]['Correctness']) 
    user_information[ukey]['Corrects'] = np.count_nonzero(user_information[ukey]['Correctness'] == 1) 
    user_information[ukey]['Method'] = k

  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)


In [15]:
len(user_information)

363

In [16]:
df = pd.DataFrame.from_dict(user_information).T
df = df.convert_dtypes() 

In [17]:
df = df.dropna()

In [18]:
np.all([datetime.utcfromtimestamp(x.astype(datetime)/1e9) > datetime(2022, 5, 5) for x in df['DATE'].values])

True

In [19]:
df.groupby('Method')['Accuracy'].agg(['mean', 'std']).reset_index().sort_values(by='Method', ascending=False)

Unnamed: 0,Method,mean,std
5,No-XAI,0.652459,0.076608
4,KNNs,0.647458,0.071439
3,EMD-NNs,0.638889,0.072399
2,EMD-Corrs,0.670556,0.079805
1,CHM-NNs,0.654645,0.083229
0,CHM-Corrs,0.689617,0.098421


In [20]:
df.explode(['Trials', 'Correctness']).reset_index(drop=False).drop(['Incorrect Validation Trials', 'Validation Incorrect', 'Validation Correct'], 1).columns

  df.explode(['Trials', 'Correctness']).reset_index(drop=False).drop(['Incorrect Validation Trials', 'Validation Incorrect', 'Validation Correct'], 1).columns


Index(['index', 'Trials', 'Counter balance', 'Validation Reaction Time',
       'Test Reaction Time', 'DATE', 'Correctness', 'Accuracy', 'Corrects',
       'Method'],
      dtype='object')

In [21]:
df2 = df.explode(['Trials', 'Correctness']).reset_index(drop=False).drop(['Incorrect Validation Trials', 'Validation Incorrect', 'Validation Correct'], 1)
df2.columns = ['UID', 'Query', 'Counter balance', 'Validation Reaction Time', 'Test Reaction Time', 'Date', 'Correctness', 'Accuracy', 'Total Corrects', 'Method']
wnids = [GT_WNIDS[x] for x in  df2.Query]
df2['GroundTruth'] = wnids
df2 = df2.convert_dtypes()
df2.head()

  df2 = df.explode(['Trials', 'Correctness']).reset_index(drop=False).drop(['Incorrect Validation Trials', 'Validation Incorrect', 'Validation Correct'], 1)


Unnamed: 0,UID,Query,Counter balance,Validation Reaction Time,Test Reaction Time,Date,Correctness,Accuracy,Total Corrects,Method,GroundTruth
0,5791dae225cb71000128ad5bNo-XAI,Hooded_Oriole_0105_90875.jpg,6,412124.0,411022,2022-05-05 14:14:35.473,1,0.566667,17,No-XAI,096.Hooded_Oriole
1,5791dae225cb71000128ad5bNo-XAI,Chuck_Will_Widow_0051_796991.jpg,6,412124.0,411022,2022-05-05 14:14:35.473,0,0.566667,17,No-XAI,022.Chuck_will_Widow
2,5791dae225cb71000128ad5bNo-XAI,Vesper_Sparrow_0065_125446.jpg,6,412124.0,411022,2022-05-05 14:14:35.473,0,0.566667,17,No-XAI,131.Vesper_Sparrow
3,5791dae225cb71000128ad5bNo-XAI,Western_Wood_Pewee_0009_98115.jpg,6,412124.0,411022,2022-05-05 14:14:35.473,0,0.566667,17,No-XAI,102.Western_Wood_Pewee
4,5791dae225cb71000128ad5bNo-XAI,Warbling_Vireo_0126_158696.jpg,6,412124.0,411022,2022-05-05 14:14:35.473,1,0.566667,17,No-XAI,155.Warbling_Vireo


# Classifier Correctness, Confidence Score, Difficulty Level

In [22]:
ClassifierCorrectness = []

for i, MQ in df2[['Method', 'Query']].iterrows():
  if ground_truth[MQ['Method']][MQ['Query']]:
    ClassifierCorrectness.append(1)
  elif not ground_truth[MQ['Method']][MQ['Query']]:
    ClassifierCorrectness.append(0)
  else:
    raise

In [23]:
ground_truth[MQ['Method']][MQ['Query']]

True

In [24]:
ConfidenceRow = []

for i, MQ in df2[['Method', 'Query']].iterrows():
  ConfidenceRow.append(confidence_data[MQ['Method']][MQ['Query']])

In [25]:
DifficultyLevel = []

for i, MQ in df2[['Method', 'Query']].iterrows():
  conf = confidence_data[MQ['Method']][MQ['Query']]
  
  if ground_truth[MQ['Method']][MQ['Query']]: # CORRECT AI 
    if conf>=0 and conf <0.35:
      DifficultyLevel.append('Hard')
    elif conf>=0.35 and conf <0.75:
      DifficultyLevel.append('Medium')
    elif conf>=0.75 and conf <=1:
      DifficultyLevel.append('Easy')
    else:
      DifficultyLevel.append('UnCat')
    
  elif not ground_truth[MQ['Method']][MQ['Query']]: # WRONG AI 
    
    if conf>=0 and conf <0.35:
      DifficultyLevel.append('Easy')
    elif conf>=0.35 and conf <0.75:
      DifficultyLevel.append('Medium')
    elif conf>=0.75 and conf <=1:
      DifficultyLevel.append('Hard')
    else:
      DifficultyLevel.append('UnCat')
    
  else:
    raise

In [26]:
df2['ClassifierCorrectness'] = ClassifierCorrectness
df2['ConfidenceScore'] = ConfidenceRow
df2['DifficultyLevel'] = DifficultyLevel

In [27]:
df2.head(10)

Unnamed: 0,UID,Query,Counter balance,Validation Reaction Time,Test Reaction Time,Date,Correctness,Accuracy,Total Corrects,Method,GroundTruth,ClassifierCorrectness,ConfidenceScore,DifficultyLevel
0,5791dae225cb71000128ad5bNo-XAI,Hooded_Oriole_0105_90875.jpg,6,412124.0,411022,2022-05-05 14:14:35.473,1,0.566667,17,No-XAI,096.Hooded_Oriole,0,0.88,Hard
1,5791dae225cb71000128ad5bNo-XAI,Chuck_Will_Widow_0051_796991.jpg,6,412124.0,411022,2022-05-05 14:14:35.473,0,0.566667,17,No-XAI,022.Chuck_will_Widow,1,0.72,Medium
2,5791dae225cb71000128ad5bNo-XAI,Vesper_Sparrow_0065_125446.jpg,6,412124.0,411022,2022-05-05 14:14:35.473,0,0.566667,17,No-XAI,131.Vesper_Sparrow,0,0.63,Medium
3,5791dae225cb71000128ad5bNo-XAI,Western_Wood_Pewee_0009_98115.jpg,6,412124.0,411022,2022-05-05 14:14:35.473,0,0.566667,17,No-XAI,102.Western_Wood_Pewee,0,0.78,Hard
4,5791dae225cb71000128ad5bNo-XAI,Warbling_Vireo_0126_158696.jpg,6,412124.0,411022,2022-05-05 14:14:35.473,1,0.566667,17,No-XAI,155.Warbling_Vireo,0,0.97,Hard
5,5791dae225cb71000128ad5bNo-XAI,Indigo_Bunting_0061_13259.jpg,6,412124.0,411022,2022-05-05 14:14:35.473,1,0.566667,17,No-XAI,014.Indigo_Bunting,0,0.6,Medium
6,5791dae225cb71000128ad5bNo-XAI,Anna_Hummingbird_0130_56122.jpg,6,412124.0,411022,2022-05-05 14:14:35.473,1,0.566667,17,No-XAI,067.Anna_Hummingbird,0,0.89,Hard
7,5791dae225cb71000128ad5bNo-XAI,Common_Raven_0060_102013.jpg,6,412124.0,411022,2022-05-05 14:14:35.473,1,0.566667,17,No-XAI,107.Common_Raven,1,0.58,Medium
8,5791dae225cb71000128ad5bNo-XAI,Nighthawk_0021_82562.jpg,6,412124.0,411022,2022-05-05 14:14:35.473,1,0.566667,17,No-XAI,092.Nighthawk,1,0.84,Easy
9,5791dae225cb71000128ad5bNo-XAI,Barn_Swallow_0086_132477.jpg,6,412124.0,411022,2022-05-05 14:14:35.473,1,0.566667,17,No-XAI,136.Barn_Swallow,0,0.81,Hard


# Add GT and All Method's prediction

In [28]:
TASK1_Data[0]

{'CHM-Output': True,
 'KNN-Output': True,
 'query-path': 'Black_Footed_Albatross_0001_796111.jpg',
 'CHM-predictions': '001.Black_footed_Albatross',
 'CHM-confidence': 11,
 'KNN-predictions': '001.Black_footed_Albatross',
 'KNN-confidence': 15,
 'ID': 0,
 'gt_wnid': '001.Black_footed_Albatross',
 'EMD-Output': True,
 'EMD-predictions': '001.Black_footed_Albatross',
 'EMD-confidence': 12,
 'ResNet-Prediction': '001.Black_footed_Albatross',
 'ResNet-Confidence': 58,
 'ResNet-Output': True}

In [29]:
RESNET_Preds = {v['query-path']:v['ResNet-Prediction'] for k,v in TASK1_Data.items()} 
KNN_Preds    = {v['query-path']:v['KNN-predictions']   for k,v in TASK1_Data.items()} 
CHM_Preds    = {v['query-path']:v['CHM-predictions']   for k,v in TASK1_Data.items()} 
EMD_Preds    = {v['query-path']:v['EMD-predictions']   for k,v in TASK1_Data.items()} 

In [30]:
RESNET_CONF = {v['query-path']:v['ResNet-Confidence']/100  for k,v in TASK1_Data.items()} 
KNN_CONF    = {v['query-path']:v['KNN-confidence'] /20    for k,v in TASK1_Data.items()} 
EMD_CONF    = {v['query-path']:v['EMD-confidence'] /20    for k,v in TASK1_Data.items()} 
CHM_CONF    = {v['query-path']:v['CHM-confidence'] /20    for k,v in TASK1_Data.items()} 

In [31]:
methd_1_prediction = [RESNET_Preds[x] for x in df2['Query']]
methd_2_prediction = [KNN_Preds[x] for x in df2['Query']]
methd_3_prediction = [EMD_Preds[x] for x in df2['Query']]
methd_4_prediction = [CHM_Preds[x] for x in df2['Query']]

methd_1_conf = [RESNET_CONF[x] for x in df2['Query']]
methd_2_conf = [KNN_CONF[x]/20.0 for x in df2['Query']]
methd_3_conf = [EMD_CONF[x]/20.0 for x in df2['Query']]
methd_4_conf = [CHM_CONF[x]/20.0 for x in df2['Query']]

In [32]:
df2['ResNet-Prediction']= methd_1_prediction 
df2['KNN-Prediction']   = methd_2_prediction 
df2['EMD-Prediction']   = methd_3_prediction 
df2['CHM-Prediction']   = methd_4_prediction 

In [33]:
df2['ResNet-Conf']= methd_1_conf
df2['KNN-Conf']   = methd_2_conf
df2['EMD-Conf']   = methd_3_conf
df2['CHM-Conf']   = methd_4_conf

In [34]:
df2.head(5)

Unnamed: 0,UID,Query,Counter balance,Validation Reaction Time,Test Reaction Time,Date,Correctness,Accuracy,Total Corrects,Method,GroundTruth,ClassifierCorrectness,ConfidenceScore,DifficultyLevel,ResNet-Prediction,KNN-Prediction,EMD-Prediction,CHM-Prediction,ResNet-Conf,KNN-Conf,EMD-Conf,CHM-Conf
0,5791dae225cb71000128ad5bNo-XAI,Hooded_Oriole_0105_90875.jpg,6,412124.0,411022,2022-05-05 14:14:35.473,1,0.566667,17,No-XAI,096.Hooded_Oriole,0,0.88,Hard,175.Pine_Warbler,175.Pine_Warbler,175.Pine_Warbler,175.Pine_Warbler,0.88,0.0125,0.0175,0.0125
1,5791dae225cb71000128ad5bNo-XAI,Chuck_Will_Widow_0051_796991.jpg,6,412124.0,411022,2022-05-05 14:14:35.473,0,0.566667,17,No-XAI,022.Chuck_will_Widow,1,0.72,Medium,022.Chuck_will_Widow,105.Whip_poor_Will,105.Whip_poor_Will,105.Whip_poor_Will,0.72,0.0275,0.0225,0.03
2,5791dae225cb71000128ad5bNo-XAI,Vesper_Sparrow_0065_125446.jpg,6,412124.0,411022,2022-05-05 14:14:35.473,0,0.566667,17,No-XAI,131.Vesper_Sparrow,0,0.63,Medium,115.Brewer_Sparrow,131.Vesper_Sparrow,131.Vesper_Sparrow,131.Vesper_Sparrow,0.63,0.0225,0.0275,0.03
3,5791dae225cb71000128ad5bNo-XAI,Western_Wood_Pewee_0009_98115.jpg,6,412124.0,411022,2022-05-05 14:14:35.473,0,0.566667,17,No-XAI,102.Western_Wood_Pewee,0,0.78,Hard,039.Least_Flycatcher,102.Western_Wood_Pewee,040.Olive_sided_Flycatcher,102.Western_Wood_Pewee,0.78,0.03,0.02,0.0225
4,5791dae225cb71000128ad5bNo-XAI,Warbling_Vireo_0126_158696.jpg,6,412124.0,411022,2022-05-05 14:14:35.473,1,0.566667,17,No-XAI,155.Warbling_Vireo,0,0.97,Hard,153.Philadelphia_Vireo,153.Philadelphia_Vireo,155.Warbling_Vireo,155.Warbling_Vireo,0.97,0.025,0.025,0.025


### Rename Methods to Standrad ones

In [35]:
PAPER_METHOD_NAMES = {'No-XAI': 'ResNet-50', 
                      'KNNs': 'kNN',
                      'EMD-NNs':'EMD-NN', 
                      'EMD-Corrs':'EMD-Corr',
                      'CHM-NNs':'CHM-NN',
                      'CHM-Corrs':'CHM-Corr'}

In [36]:
df2.Method = df2.Method.map(PAPER_METHOD_NAMES)

In [37]:
df2.tail(20)

Unnamed: 0,UID,Query,Counter balance,Validation Reaction Time,Test Reaction Time,Date,Correctness,Accuracy,Total Corrects,Method,GroundTruth,ClassifierCorrectness,ConfidenceScore,DifficultyLevel,ResNet-Prediction,KNN-Prediction,EMD-Prediction,CHM-Prediction,ResNet-Conf,KNN-Conf,EMD-Conf,CHM-Conf
10840,609ad2b55f8a40c8848c5744EMD-NNs,White_Necked_Raven_0049_102713.jpg,8,424578.4,423268,2022-05-06 10:30:32.930,1,0.533333,16,EMD-NN,108.White_necked_Raven,1,0.45,Medium,108.White_necked_Raven,107.Common_Raven,108.White_necked_Raven,029.American_Crow,0.81,0.02,0.0225,0.0175
10841,609ad2b55f8a40c8848c5744EMD-NNs,Common_Tern_0106_149345.jpg,8,424578.4,423268,2022-05-06 10:30:32.930,0,0.533333,16,EMD-NN,144.Common_Tern,0,0.4,Medium,141.Artic_Tern,144.Common_Tern,141.Artic_Tern,141.Artic_Tern,0.42,0.02,0.02,0.02
10842,609ad2b55f8a40c8848c5744EMD-NNs,Least_Flycatcher_0017_30460.jpg,8,424578.4,423268,2022-05-06 10:30:32.930,0,0.533333,16,EMD-NN,039.Least_Flycatcher,0,0.9,Hard,091.Mockingbird,091.Mockingbird,091.Mockingbird,091.Mockingbird,0.99,0.0475,0.045,0.0425
10843,609ad2b55f8a40c8848c5744EMD-NNs,Blue_Winged_Warbler_0063_161810.jpg,8,424578.4,423268,2022-05-06 10:30:32.930,1,0.533333,16,EMD-NN,161.Blue_winged_Warbler,1,0.6,Medium,161.Blue_winged_Warbler,161.Blue_winged_Warbler,161.Blue_winged_Warbler,161.Blue_winged_Warbler,0.99,0.04,0.03,0.0325
10844,609ad2b55f8a40c8848c5744EMD-NNs,Hooded_Oriole_0095_90337.jpg,8,424578.4,423268,2022-05-06 10:30:32.930,0,0.533333,16,EMD-NN,096.Hooded_Oriole,0,0.45,Medium,098.Scott_Oriole,096.Hooded_Oriole,095.Baltimore_Oriole,096.Hooded_Oriole,0.44,0.0225,0.0225,0.0175
10845,609ad2b55f8a40c8848c5744EMD-NNs,Yellow_Breasted_Chat_0088_21686.jpg,8,424578.4,423268,2022-05-06 10:30:32.930,1,0.533333,16,EMD-NN,020.Yellow_breasted_Chat,1,0.85,Easy,157.Yellow_throated_Vireo,020.Yellow_breasted_Chat,020.Yellow_breasted_Chat,020.Yellow_breasted_Chat,0.81,0.04,0.0425,0.045
10846,609ad2b55f8a40c8848c5744EMD-NNs,Bobolink_0026_11057.jpg,8,424578.4,423268,2022-05-06 10:30:32.930,1,0.533333,16,EMD-NN,013.Bobolink,0,0.5,Medium,013.Bobolink,097.Orchard_Oriole,097.Orchard_Oriole,097.Orchard_Oriole,0.9,0.02,0.025,0.015
10847,609ad2b55f8a40c8848c5744EMD-NNs,Sooty_Albatross_0004_796366.jpg,8,424578.4,423268,2022-05-06 10:30:32.930,1,0.533333,16,EMD-NN,003.Sooty_Albatross,1,0.55,Medium,003.Sooty_Albatross,001.Black_footed_Albatross,003.Sooty_Albatross,001.Black_footed_Albatross,0.91,0.0275,0.0275,0.025
10848,609ad2b55f8a40c8848c5744EMD-NNs,Carolina_Wren_0069_186230.jpg,8,424578.4,423268,2022-05-06 10:30:32.930,1,0.533333,16,EMD-NN,195.Carolina_Wren,1,0.65,Medium,195.Carolina_Wren,195.Carolina_Wren,195.Carolina_Wren,195.Carolina_Wren,0.86,0.025,0.0325,0.0175
10849,609ad2b55f8a40c8848c5744EMD-NNs,Bobolink_0035_11117.jpg,8,424578.4,423268,2022-05-06 10:30:32.930,1,0.533333,16,EMD-NN,013.Bobolink,1,1.0,Easy,013.Bobolink,013.Bobolink,013.Bobolink,013.Bobolink,0.99,0.05,0.05,0.045


# Export to CSV 

In [38]:
df2.to_csv('CUB-Human-Study-Summary.csv', index=False)