In [19]:
import matplotlib.pyplot as plt
import numpy as np
import os
import sys
from matplotlib import colors as mcolors
import torch
import random
import pickle
from pathlib import Path
import json
import sklearn.metrics

from argparse import Namespace
from os.path import exists


# --------------- Inputs --------------------------------

dataset = 'inaturalist21' # dataset name (inaturalist21, cifar10, cifar100, emnist, 8dset)
run_name = '120322_163621' # run ID of results folder 


# --------------------------------------------------------



results_dir = '../src/novelty_dfm_CL/Results_DFM_CL/%s/'%dataset

random.seed(1)

experiment_path = results_dir +run_name+'/'



# Gather performance metrics


In [20]:

def get_config_value(j, key):
    if key in j:
        return j[key]
    else:
        return None

def compute_f1(precision, recall):
    
    fscore = (2*(precision*recall))/(precision+recall)
    
    return fscore



def compute_auroc_aupr(results_curves, summary_stats, test=False):
    
    if test:
        summary_stats['auroc_test']={'avg':round(np.mean(results_curves[:, 1]),4), 'last':round(results_curves[-1, 1],4)}
        summary_stats['aupr_test']={'avg':round(np.mean(results_curves[:, 2]),4), 'last':round(results_curves[-1, 2],4)}

    else:
        summary_stats['auroc']={'avg':round(np.mean(results_curves[:, 1]),4), 'last':round(results_curves[-1, 1],4)}
        summary_stats['aupr']={'avg':round(np.mean(results_curves[:, 2]),4), 'last':round(results_curves[-1, 2],4)}

    return summary_stats


def summarize_metric_task_incdfm(results, summary_stats):
        
    tasks = np.unique(results.T[0,...])
    # print('tasks', tasks)
    
    # Last - metrics 
    summary_stats['precision']={'last':round(results[-1,-2],4)}
    summary_stats['recall']={'last':round(results[-1,-1],4)}
    summary_stats['f1']={'last':round(compute_f1(summary_stats['precision']['last'], summary_stats['recall']['last']),4)}

    ## Get last value of each task to later compute averages 
    p_avg_tasks=[]
    r_avg_tasks=[]
    for t in tasks:
        inds_task = np.where(results_iters[:,0]==t)[0]
        p_avg_tasks.append(results_iters[inds_task[-1],-2])
        r_avg_tasks.append(results_iters[inds_task[-1],-1])
        
    # print('p_avg_tasks', p_avg_tasks)
    # print('r_avg_tasks', r_avg_tasks)
       
    
    summary_stats['precision']['avg']=round(np.mean(p_avg_tasks),4)
    summary_stats['recall']['avg']=round(np.mean(r_avg_tasks),4)
    summary_stats['f1']['avg']=round(compute_f1(summary_stats['precision']['avg'], summary_stats['recall']['avg']),4)

    return summary_stats
    
    
    
def parse_results(results_iters, ood_name, summary_stats):

    ## P, R and f1
    if ood_name=='incdfm':
        summary_stats = summarize_metric_task_incdfm(results_iters, summary_stats)
    else:
        summary_stats['precision']={'avg':round(np.mean(results_iters[:, -2]),4), 'last':round(results_iters[-1, -2],4)}
        summary_stats['recall']={'avg':round(np.mean(results_iters[:, -1]),4), 'last':round(results_iters[-1, -1],4)}
        summary_stats['f1']={'avg':round(compute_f1(summary_stats['precision']['avg'], summary_stats['recall']['avg']),4), \
            'last':round(compute_f1(summary_stats['precision']['last'], summary_stats['recall']['last']),4)}

    ## Auroc, Aupr, Auroc_test, Aupr_test
    

    return summary_stats


def compute_acc_avg(accs, summary_stats):
    tasks = np.unique(accs.T[0,...])
    epochs = np.unique(accs.T[1,...])
    
    num_epochs = epochs.shape[0]
    
    accs_last = accs.T[-1,...][-num_epochs:]
    
    summary_stats['acc_last_clf']=np.max(accs_last)
    
    return summary_stats
    
    




In [21]:
summary_stats={}
    
    
with open(Path(experiment_path) / Path("config_model.json"), "r") as config_file:
    j = json.load(config_file)
# correct version mismatch for naming
ood_name = get_config_value(j, "novelty_detector_name")
ood_th = get_config_value(j, "threshold_type")
if (ood_name=='dfm') and (ood_th=='iter'):
    ood_name = 'incdfm'


try:
    results_iters = np.genfromtxt(Path(experiment_path) / Path("novelty_accuracies_iter.txt"), delimiter=' ', dtype=float, skip_header=1)
    if results_iters.ndim == 1:
        print('No novelty_accuracies_iter file', experiment_path)
    summary_stats = parse_results(results_iters, ood_name, summary_stats)
except:
    print('No novelty_accuracies_iter file', experiment_path)
        
        

# 2) ------- get Auroc/Aupr for holdout_old x train_new
try:
    results_curves = np.genfromtxt(Path(experiment_path) / Path("novelty_eval.txt"), delimiter=' ', dtype=float, skip_header=1)
    if results_curves.ndim == 1:
        print('No novelty_eval file', experiment_path)
        summary_stats = compute_auroc_aupr(results_curves, summary_stats, test=False)
except:
    ## Will probably be multitask runs 
    ## separate those from traditional CL runs in a separate folder 
    print('No novelty_eval file', experiment_path)

    

# 2) ------- get Auroc_test/Aupr_test for test_old x test_new
try:
    results_curves = np.genfromtxt(Path(experiment_path) / Path("novelty_eval_test.txt"), delimiter=' ', dtype=float, skip_header=1)
    if results_curves.ndim == 1:
        print('No novelty_eval_test file', experiment_path)
    summary_stats = compute_auroc_aupr(results_curves, summary_stats, test=True)    
except:
    print('No novelty_eval_test file', experiment_path)
    
    
try:
    results_acc_clf = np.genfromtxt(Path(experiment_path) / Path("acc_avg.txt"), delimiter=' ', dtype=float)
    summary_stats = compute_acc_avg(results_acc_clf, summary_stats)    
except:
    print('No acc_avg file', experiment_path)


print(summary_stats)

No acc_avg file /lab/arios/ProjIntel/incDFM/src/novelty_dfm_CL/Results_DFM_CL/inaturalist21/120322_163621/
{'precision': {'avg': 0.6031, 'last': 0.6113}, 'recall': {'avg': 0.7366, 'last': 0.6564}, 'f1': {'avg': 0.6632, 'last': 0.633}, 'auroc_test': {'avg': 0.6038, 'last': 0.5545}, 'aupr_test': {'avg': 0.6131, 'last': 0.565}}
