In [3]:
import matplotlib.pyplot as plt
import numpy as np
import os
import sys
from matplotlib import colors as mcolors
import torch
import random
import pickle

import sklearn.metrics

from argparse import Namespace



def compute_stats_per_task(t, results):

    gt_novelty = results.gt_novelty[t]

    inds_above = results.preds[t].astype(bool)

    # precision_novelty = gt_novelty[inds_above].sum()/gt_novelty[inds_above].shape[0]
    recall_novelty = gt_novelty[inds_above].sum()/gt_novelty.sum()

    inds_new = (gt_novelty==1)

    accuracy_overall = sum(inds_new == inds_above)/inds_new.shape[0]

    precision_novelty = np.sum(gt_novelty[inds_above]==np.ones(gt_novelty[inds_above].shape[0]))/gt_novelty[inds_above].shape[0]


    # Compute F-scores 
    labels = gt_novelty
    predictions = np.zeros(gt_novelty.shape)
    predictions[inds_above]=1
    f_1 = sklearn.metrics.fbeta_score(labels, predictions, beta=1.0)
    f_half = sklearn.metrics.fbeta_score(labels, predictions, beta=0.5)
    f_2 = sklearn.metrics.fbeta_score(labels, predictions, beta=2.0)

    return (precision_novelty, recall_novelty, f_half, f_1, f_2)


# Performance (F1,Fbeta, etc) x Tasks 

In [None]:
metric='f_1'
dataset = 'cifar10'
exp_type='1class'
num_tasks=8

results_dir = '../src/novelty_dfm_CL/Results_DFM_CL/%s/'%dataset

random.seed(1)



versions = []
names  = ['incDFM (Ours)', 'DFM', 'Mahalanobis', 'Generalized Odin', 'Softmax']
lines = ['-' for i in range(len(names))]



In [None]:

number_of_colors = len(names)

colors = ["#"+''.join([random.choice('0123456789ABCDEF') for j in range(number_of_colors)])
             for i in range(number_of_colors)]

plot_order={ 'Precision':0, 'Recall':1, 'f_0.5':2, 'f_1': 3, 'f_2': 4}
num_tasks_map = {'cifar10':8, 'emnist':24, 'svhn':8, 'inaturalist21':7, 'cifar100':18, '8dsets':6}

num_tasks = num_tasks_map[dataset]


# Issue: in IterTh version, one does not select via a single threshold. Would have needed to save the indices selected, or the predictions 


scores = []
tasks = []
plt.rcParams['figure.dpi'] = 200 # default for me was 75

plt.figure(figsize=(8,4))
plt.title('%s through continual tasks for %s'%(metric.upper(), dataset))

for n in range(len(versions)):

    scores.append([])
    
    with open('%s/%s/results_tasks.pickle'%(results_dir, versions[n]), 'rb') as handle:
        per_task_results_comp = pickle.load(handle)
        
    if isinstance(per_task_results_comp, dict):
        per_task_results_comp = Namespace(**per_task_results_comp)
    
    for t in np.arange(1,num_tasks+1):
        vals = compute_stats_per_task(t, per_task_results_comp)
        scores[n].append(vals[plot_order[metric]])


    plt.plot(np.arange(1,len(scores[n])+1), scores[n], marker='.', linestyle=lines[n], label=names[n], color=colors[n])


# plt.tick_params(top='off',  right='off')

plt.ylim([0.3,1.02])
# plt.xlim([-0.01+1,0.01 +len(tasks)-1])
plt.xlabel('Tasks')
plt.ylabel(metric)
plt.legend(fontsize='large', loc='center left', bbox_to_anchor=(1, 0.5))
# plt.savefig('%s/Scores_tasks_time.png'%dir_save)

