# Parse /results/

In [None]:
# only works for using NPSR as the algorithm

import os
import numpy as np
import matplotlib.pyplot as plt

print('Datasets in /results/ -', os.listdir('results'))

res = {}
for dset in os.listdir('results'):
    res[dset] = {}
    for algo in os.listdir('results/' + dset):
        assert algo == 'NPSR'
        res[dset][algo] = []
        print(f'Dataset: {dset}, Algorithm: {algo}')
        num_res_files = np.sum(['_result.txt' in file for file in os.listdir(f'results/{dset}/{algo}')])
        if num_res_files >= 1:
            print(f'  Found {num_res_files} result files.')
        else:
            print('  Found no result files. Skipping directory.')
            continue
        
        for fname in os.listdir(f'results/{dset}/{algo}'):
            if '_result.txt' not in fname:
                continue
            res_ent = {}
            res_ent['datetime'] = fname.replace('_result.txt', '')
            with open(f'results/{dset}/{algo}/{fname}', 'r') as file:
                lines = file.readlines()
                lines = [line.replace('\n','') for line in lines]
            res_ent['dset_config'] = lines[0]
            res_ent['model_config'] = lines[1]
            res_ent['epoch'] = []

            epochs = int(res_ent['model_config'].split('epochs=')[-1].split(',')[0])
            lines_per_epoch = (len(lines)-2) // epochs
            for epoch in range(epochs):
                lines_ep = lines[2 + epoch*lines_per_epoch: 2 + (epoch+1)*lines_per_epoch]
                assert lines_ep[0] == f'epoch={epoch}'
                # only record F1 and AUC
                res_ep = {}
                res_ep['M_pt_F1'] = float(lines_ep[1].split('F1:')[-1].split(' ')[0])
                res_ep['M_pt_AUC'] = float(lines_ep[1].split('AUC:')[-1].split(' ')[0])
                res_ep['M_seq_F1'] = float(lines_ep[2].split('F1:')[-1].split(' ')[0])
                res_ep['M_seq_AUC'] = float(lines_ep[2].split('AUC:')[-1].split(' ')[0])
                res_ep['d'] = {}
                for line in lines_ep[3:]:
                    d = int(line.split('d ')[1].split(' ')[0])
                    res_ep['d'][d] = {}
                    tmp = line.split('Soft ')[1].split('  Hard ')
                    tmp = [tmp[0]] + tmp[1].split('  Inf ')
                    tmp = [[float(x) for x in t.replace('F1:', '').split('  AUC:')] for t in tmp]
                    res_ep['d'][d]['Soft F1'] = tmp[0][0]
                    res_ep['d'][d]['Soft AUC'] = tmp[0][1]
                    res_ep['d'][d]['Hard F1'] = tmp[1][0]
                    res_ep['d'][d]['Hard AUC'] = tmp[1][1]
                    res_ep['d'][d]['Inf F1'] = tmp[2][0]
                    res_ep['d'][d]['Inf AUC'] = tmp[2][1]
                res_ent['epoch'].append(res_ep)

            res[dset][algo].append(res_ent)

In [None]:
for dset in res.keys():
    for algo in res[dset].keys():
        if len(res[dset][algo]) == 1: # only plot dset + algos with only one result file
            res_ent = res[dset][algo][0]
            print(dset, algo, res_ent['datetime'])
            
            print(f'  {res_ent["dset_config"]}')
            print(f'  {res_ent["model_config"]}')
            
            plt.figure()
            # select which model + metrics to plot
            for plt_name in ['M_pt_F1', 'M_pt_AUC', 'M_seq_F1', 'M_seq_AUC']:
                if 'AUC' in plt_name:
                    continue
                model_metric = [ep[plt_name] for ep in res_ent['epoch']]
                plt.plot(np.arange(len(res_ent['epoch'])), model_metric, label=plt_name)
            plt.legend()
            plt.xlabel('epoch')
            plt.ylabel('metric value')
            plt.title(f'{dset} - {algo} - {res_ent["datetime"]}')
            
            ds = res_ent['epoch'][0]['d'].keys()
            for d in ds:
                plt.figure()
                # select which [Soft, Hard, Inf] + metrics to plot
                for plt_name in ['Soft F1', 'Soft AUC', 'Hard F1', 'Hard AUC', 'Inf F1', 'Inf AUC']:
                    if 'AUC' in plt_name:
                        continue
                    model_metric = [ep['d'][d][plt_name] for ep in res_ent['epoch']]
                    plt.plot(np.arange(len(res_ent['epoch'])), model_metric, label=plt_name)
                plt.legend()
                plt.xlabel('epoch')
                plt.ylabel('metric value')
                plt.title(f'{dset} - {algo} - {res_ent["datetime"]} - d={d}')