In [None]:
from glob import glob
import os
import json
import pandas as pd
import numpy as np

In [None]:
exp_name = "t2b"
responses = {'benchmark - SM.xlsx': None, 'benchmark - RP.xlsx': None}
#responses = {'benchmark - RP.xlsx': None}

In [None]:
# load responses
parts = None
for r in list(responses.keys()):
    r_path = os.path.join(exp_name, 'benchmark', r)    
    responses[r] = pd.read_excel(r_path, header=0, index_col=0).dropna()   
    if parts is None:
        parts = responses[r].columns

In [None]:
# add answers to responses
for name, r in responses.items():
    for c in r.columns:
        answers_path = os.path.join(exp_name, 'benchmark', c, 'answers.json')        
        answers = pd.read_json(answers_path, orient='records').set_index('test_idx')
        r[f"{c}-answers"] = answers.label_override                

In [None]:
# load the label map
label_map_path = os.path.join(exp_name, f"{exp_name}-label-map.json")
with open(label_map_path, 'r') as f:
    label_map = json.load(f)
    
reverse_label_map = {v:k for k,v in label_map.items()}    

print(label_map)

In [None]:
# create confusion matrices
n = len(label_map)
con_mat = {k:np.zeros((len(parts), n, n)) for k in responses.keys()}

In [None]:
# score responses
for name, r in responses.items():
    top_1 = 0
    for pid, p in enumerate(parts):
        joined = pd.concat([r[p], r[f"{p}-answers"]], axis=1)    
        
        total = 0
        correct = 0
        
        for i in range(len(joined)):
            guess, truth = joined.iloc[i]                  
            try:
                guess, truth = label_map[guess.lower()], label_map[truth]               
                con_mat[name][pid][truth][guess] += 1
                total += 1
                if guess == truth:
                    correct += 1
            except KeyError:
                print(f"Key error on {name} {p} {i} '{guess}'")
        
        #print(name, pid, correct, total, round(100*correct/total, 2))
        top_1 += (correct/total)
    print(name, round(100 * top_1/len(parts), 1))
        
        

In [None]:
n = len(label_map)
con_mat_summary = {k:np.zeros((len(parts), n, n)) for k in responses.keys()}

for name, cm in con_mat.items():    
    print(name)
    for i in range(cm.shape[0]):
        #print(name, i)
        
        #print(cm[i])
        
        cm_r = cm[i].copy()
        for r in range(len(reverse_label_map)):
            r_sum = cm[i][r, :].sum()
            for c in range(len(reverse_label_map)):
                cm_r[r, c] = cm[i][r, c] / r_sum
                
        con_mat_summary[name][i] = cm_r
        #print(np.round(cm_r * 100))    
        
    print(np.round(con_mat_summary[name].mean(axis=0) * 100, 1))

In [None]:
print(label_map)