In [1]:
import sys
import os
import os.path
sys.path.insert(0, os.path.abspath("./simple-dnn"))

#Import the libraries we will need.
from IPython.display import display
import tensorflow as tf
import tensorflow.contrib.slim as slim
import numpy as np
from tensorflow.examples.tutorials.mnist import input_data
import pandas as pd
import tensorflow.contrib.slim as slim
import scipy.misc
import scipy
import scipy.io
from sklearn import metrics, preprocessing
from sklearn.neighbors import KernelDensity
import time
import pickle
import matplotlib.cm as cm
import pandas as pd

from util.openworld_sim import OpenWorldSim, OpenWorldMsData
from util.visualization import visualize_dataset_2d
from simple_dnn.cnn.dcnn import DCNN
from simple_dnn.util.format import UnitPosNegScale, reshape_pad
from simple_dnn.generative.vae import VariationalAutoencoder
from simple_dnn.generative.gan import MultiClassGAN
from simple_dnn.generative.discriminator import DiscriminatorDC
from simple_dnn.generative.generator import GeneratorDC
from simple_dnn.util.sample_writer import ImageGridWriter

from open_net import OpenNetFlat, OpenNetCNN
from exp_opennet_util import load_pickle_gz, save_pickle_gz, load_open_dataset
from util.metrics import auc

%matplotlib inline
import matplotlib.pyplot as plt
# To avoid type 3 fonts. ACM Digital library complain about this
# based on the recomendations here http://phyletica.org/matplotlib-fonts/
plt.rcParams['pdf.fonttype'] = 42
plt.rcParams['ps.fonttype'] = 42

The TensorFlow contrib module will not be included in TensorFlow 2.0.
For more information, please see:
  * https://github.com/tensorflow/community/blob/master/rfcs/20180907-contrib-sunset.md
  * https://github.com/tensorflow/addons
  * https://github.com/tensorflow/io (for I/O related ops)
If you depend on functionality not listed there, please file an issue.




In [2]:
def auc_list(pkl_files, max_fpr=1.0, use_logit_os=False):
    auc_list = []
    for pkl in pkl_files:
        result = load_pickle_gz(pkl)
        if use_logit_os: # Only for openmax model where min test_dist_all_class is not test_decision_function.
            result_auc = auc(result['test_true_y'][:,-1], 
                               np.amin(result['test_dist_all_class'], axis=1), 
                               pos_label=1, max_fpr=max_fpr,
                               plot=False, loc = 'lower right', 
                               figsize=[6,4], plot_threshold=False)
        else:
            result_auc = auc(result['test_true_y'][:,-1], 
                               result['test_decision_function'], 
                               pos_label=1, max_fpr=max_fpr,
                               plot=False, loc = 'lower right', 
                               figsize=[6,4], plot_threshold=False)
        auc_list.append(result_auc)
        
    
    return np.array(auc_list)

def acc_list(pkl_files):
    acc_list = []
    for pkl in pkl_files:
        result = load_pickle_gz(pkl)
        acc_list.append(metrics.accuracy_score(
            np.argmax(result['test_true_y'], axis=1), result['test_closed_predict_y']))
        
    
    return np.array(acc_list)



def prf_list(pkl_files):
    p_list = []
    r_list = []
    f_list = []
    for pkl in pkl_files:
        result = load_pickle_gz(pkl)
        p, r, f, _ = metrics.precision_recall_fscore_support(
            np.argmax(result['test_true_y'], axis=1), result['test_open_predict_y'])
        p_list.append(p)
        r_list.append(r)
        f_list.append(f)
        
    
    return np.array(p_list), np.array(r_list), np.array(f_list)

def prf_global_threshold_list(pkl_files):
    p_list = []
    r_list = []
    f_list = []
    for pkl in pkl_files:
        result = load_pickle_gz(pkl)
        cutoff = max(1, int(result['train_decision_function'].shape[0] * 0.01))
        threshold = sorted(result['train_decision_function'])[-cutoff]

        pred_y = result['test_closed_predict_y']
        score = result['test_decision_function']
        unknown_label = len(result['class_mean'])
        pred_y[score > threshold] = unknown_label

        p, r, f, _ = metrics.precision_recall_fscore_support(
            np.argmax(result['test_true_y'], axis=1), pred_y)
        p_list.append(p)
        r_list.append(r)
        f_list.append(f)

    return np.array(p_list), np.array(r_list), np.array(f_list)


def prf_closed_list(pkl_files):
    p_list = []
    r_list = []
    f_list = []
    for pkl in pkl_files:
        result = load_pickle_gz(pkl)
        p, r, f, _ = metrics.precision_recall_fscore_support(
            np.argmax(result['test_true_y'], axis=1), result['test_closed_predict_y'])
        p_list.append(p)
        r_list.append(r)
        f_list.append(f)
        
    
    return np.array(p_list), np.array(r_list), np.array(f_list)


In [3]:
def filter_files(result_path, dataset, network=None, model=None, exp_id=None):
    files = [f for f in os.listdir(result_path) if os.path.isfile(os.path.join(result_path, f))]
    
    dataset = dataset + '_'
    files = [f for f in files if dataset in f]
        
    if network:
        network = '_' + network + '_'
        files = [f for f in files if network in f]
    if model:
        model = '_' + model + '_'
        files = [f for f in files if model in f]
    if exp_id:
        exp_id = '_e' + str(exp_id) + '.'
        files = [f for f in files if str(exp_id) in f]
        
    return [os.path.join(result_path, f) for f in files]
    

In [4]:
def concat_files(result_path):
    files = [f for f in os.listdir(result_path) if os.path.isfile(os.path.join(result_path, f))]
    return [os.path.join(result_path, f) for f in files]

In [5]:
def compare_auc(result_dir, dataset, network, models=['ii', 'iimmf','ce', 'cemmf','triplet','tripletmmf','ceii','ceiimmf'], exp_id=None, max_fpr=1., ):
    """
    use_logit_os: if True, then report using the softmax logits to for calculating outlier score. 
                    this only for Openmax models.
    """
    auc_dic = {}
    for m in models:
        auc_dic[m] = {}
        files = filter_files(result_dir, dataset, network=network, exp_id=exp_id,
                             model=m if m != 'mav_dist' else 'openmax')
        if m == 'mav_dist':
            auc_dic[m]['auc'] = auc_list(files, max_fpr=max_fpr, use_logit_os=True)
        else:
            auc_dic[m]['auc'] = auc_list(files, max_fpr=max_fpr)
        auc_dic[m]['auc_avg'] = auc_dic[m]['auc'].mean()
        auc_dic[m]['auc_std'] = auc_dic[m]['auc'].std()
        
        
    display(pd.DataFrame(auc_dic)) 
    print('AUC AVG and STD ')
    
    
    ttest_p_value = {}
    for i in range(len(models)):
        ttest_p_value[models[i]] = {}
        for j in range(len(models)):# range(i, len(models)):
            _, pvalue = scipy.stats.ttest_ind(
                auc_dic[models[i]]['auc'],
                auc_dic[models[j]]['auc'])
            ttest_p_value[models[i]][models[j]] = pvalue
            
    display(pd.DataFrame(ttest_p_value)) 
    print('T-Test PValue')

In [6]:
def compare_prf(result_dir, dataset, network, models=['ii', 'iimmf','ce', 'cemmf','triplet','tripletmmf','ceii','ceiimmf'], exp_id=None,
#                 threshold_type='global',#'per_class'
                bbox_to_anchor=(1,1), figsize=(16, 8), loc='upper left', ncol=8,
                save=None, font_size=24, show_text=True, width=0.8,
                model_label_lookup={'iimmf': 'ii+mmf', 'ii':'ii', 'ce':'ce','cemmf':'mmf+ce', 'ceiimmf':'ii+mmf+ce','ceii':'ii+ce', 'triplet':'triplet','tripletmmf':'triplet+mmf', 'openmax':' openmax', 
                                    'g_openmax':'g_openmax', 'central':'central', 'tcl': "tcl"},
                openset=True, ylim=(0.2,1.1)):
        
    def autolabel(rects):
        """
        Attach a text label above each bar displaying its height
        """
        for rect in rects:
            height = rect.get_height()
            plt.text(rect.get_x() + rect.get_width()/2., 1.01*height,
                    '%.3f' % float(height),
                    ha='center', va='bottom')

    
    import matplotlib as mpl
    font = {'family' : 'normal',
#         'weight' : 'bold',
        'size'   : font_size}

    mpl.rc('font', **font)
    
    prf_dic = {}
    prf_table = {}
    colors=[ "purple", "black", "blue", "grey", "red", "green", "orange", "yellow", "pink"]
    plt.figure(figsize=figsize)
    legend=[]
    for i, m in enumerate(models):
        prf_dic[m] = {}
        prf_table[m] = {}
        if openset:
#             if threshold_type == 'per_class' or m == 'openmax': ## Open max does not have global threshold
            p, r, f = prf_list(filter_files(result_dir, dataset, network=network, exp_id=exp_id, model=m))
#             elif threshold_type == 'global':
#                 p, r, f = prf_global_threshold_list(
#                     filter_files(result_dir, dataset, network=network, exp_id=exp_id, model=m))
#             else:
#                 ValueError('threshold_type should be either "per_class" or "global"')
        else:
            p, r, f = prf_closed_list(filter_files(result_dir, dataset, network=network, exp_id=exp_id, model=m))
            
            
        if openset:
            prf_dic[m]['known_p_avg'] = p[:,:-1].mean(axis=1).mean()
            prf_dic[m]['known_p_std'] = p[:,:-1].mean(axis=1).std()
            prf_dic[m]['known_r_avg'] = r[:,:-1].mean(axis=1).mean()
            prf_dic[m]['known_r_std'] = r[:,:-1].mean(axis=1).std()
            prf_dic[m]['known_f_avg'] = f[:,:-1].mean(axis=1).mean()
            prf_dic[m]['known_f_std'] = f[:,:-1].mean(axis=1).std()
            prf_dic[m]['unknown_p_avg'] = p[:,-1].mean()
            prf_dic[m]['unknown_p_std'] = p[:,-1].std()
            prf_dic[m]['unknown_r_avg'] = r[:,-1].mean()
            prf_dic[m]['unknown_r_std'] = r[:,-1].std()
            prf_dic[m]['unknown_f_avg'] = f[:,-1].mean()
            prf_dic[m]['unknown_f_std'] = f[:,-1].std()
        prf_dic[m]['all_p_avg'] = p.mean(axis=1).mean()
        prf_dic[m]['all_p_std'] = p.mean(axis=1).std()
        prf_dic[m]['all_r_avg'] = r.mean(axis=1).mean()
        prf_dic[m]['all_r_std'] = r.mean(axis=1).std()
        prf_dic[m]['all_f_avg'] = f.mean(axis=1).mean()
        prf_dic[m]['all_f_std'] = f.mean(axis=1).std()
        prf_table[m]['Precision'] = p.mean(axis=1)
        prf_table[m]['Recall'] = r.mean(axis=1)
        prf_table[m]['F-Score'] = f.mean(axis=1)
        
        bar = plt.bar(
            [i, i+len(models)+1, i+(len(models)+1)*2], 
            [prf_table[m]['Precision'].mean(), prf_table[m]['Recall'].mean(), prf_table[m]['F-Score'].mean()],
            yerr=[prf_table[m]['Precision'].std(), prf_table[m]['Recall'].std(), prf_table[m]['F-Score'].std()],
            color=colors[i], align="center", width=width)
        legend.append((bar, m if model_label_lookup is None else model_label_lookup[m]))
        if show_text:
            autolabel(bar)
        
    lgd = plt.legend(zip(*legend)[0], zip(*legend)[1], bbox_to_anchor=bbox_to_anchor, loc=loc,
                     ncol=ncol, mode="expand", borderaxespad=0.)
    plt.xticks([ int(len(models)/2), int(len(models)/2 + len(models)+1), int(len(models)/2 + 2*(len(models)+1))], 
               [ 'Precision', 'Recall', 'F-Score'])
    plt.ylim(ylim)
    plt.tight_layout()
    if save:
        if not os.path.exists(os.path.dirname(save)):
            os.makedirs(os.path.dirname(save))
        plt.savefig(save, bbox_extra_artists=(lgd,), bbox_inches='tight')
    plt.show()
    
    mpl.rcdefaults() 
    
    display(pd.DataFrame(prf_dic)) 
    print('PRF AVG and STD ')
    
    ttest_p_value = {}
    for i in range(len(models)):
        ttest_p_value[models[i]] = {}
        for j in range(len(models)):# range(i, len(models)):
            _, pvalue = scipy.stats.ttest_ind(
                prf_table[models[i]]['F-Score'],
                prf_table[models[j]]['F-Score'])
            ttest_p_value[models[i]][models[j]] = pvalue
            
    display(pd.DataFrame(ttest_p_value)) 
    print('T-Test PValue')
    
# Global Threshold
# compare_prf(mnist_cnn_dir, 'mnist', 'cnn', models=['ii', 'ce', 'ceii', 'openmax'],
# #             save='data/results/fig/mnist_cnn_prf_all.pdf', 
#             threshold_type='global',
#             bbox_to_anchor=(0., 1.02, 1., .102), loc='lower left', ncol=4, figsize=(16, 4))

In [7]:
def compare_accuracy(result_dir, dataset, network, models=['ii', 'iimmf','ce', 'cemmf','triplet','tripletmmf','ceii','ceiimmf'], exp_id=None, 
                     report_error_rate=False):
    acc_dic = {}
    for m in models:
        acc_dic[m] = {}
        files = filter_files(result_dir, dataset, network=network, exp_id=exp_id,
                             model=m)
        acc_dic[m]['acc'] = acc_list(files) * 100
        if report_error_rate:
            acc_dic[m]['acc'] = 100.0 - acc_dic[m]['acc']
        acc_dic[m]['acc_avg'] = acc_dic[m]['acc'].mean()
        acc_dic[m]['acc_std'] = acc_dic[m]['acc'].std()
        
        
    display(pd.DataFrame(acc_dic)) 
    print('Accuracy AVG and STD ')
    
    
    ttest_p_value = {}
    for i in range(len(models)):
        ttest_p_value[models[i]] = {}
        for j in range(len(models)):# range(i, len(models)):
            _, pvalue = scipy.stats.ttest_ind(
                acc_dic[models[i]]['acc'],
                acc_dic[models[j]]['acc'])
            ttest_p_value[models[i]][models[j]] = pvalue
            
    display(pd.DataFrame(ttest_p_value)) 
    print('T-Test PValue')


In [8]:
def training_time_list(pkl_files):
    time_list = []
    for pkl in pkl_files:
        result = load_pickle_gz(pkl)
        time_list.append(result['train_time'])
        
    return np.array(time_list)


def compare_training_time(result_dir, dataset, network, models=['ii', 'iimmf','ce', 'cemmf','triplet','tripletmmf','ceii','ceiimmf'],):
    t_dict = {}
    for i, m in enumerate(models):
        t_dict[m] = {}
        t = training_time_list(filter_files(result_dir, dataset, network=network, model=m))
        t_dict[m]['avg_time'] = t.mean()
        t_dict[m]['std_time'] = t.std()
        
    display(pd.DataFrame(t_dict)) 
    print('Training Time AVG and STD ')

In [9]:
def plot_kde(files, file_index=0, bins = 200, kde=None):
    files = sorted(files)
    result = load_pickle_gz(files[file_index])

    x_train = result['train_decision_function'][:, None]
    x_test = result['test_decision_function'][:, None]

    # [‘gaussian’|’tophat’|’epanechnikov’|’exponential’|’linear’|’cosine’] 
    if kde:
        kde = KernelDensity(bandwidth=1.0, algorithm='auto', kernel='gaussian', 
                            metric='euclidean', atol=0, rtol=0, breadth_first=True, 
                            leaf_size=40, metric_params=None).fit(x_train)
    else:
        kde = kde.fit(x_train)

    print('On Training')
    _ = plt.hist(x_train, bins=bins, normed=True)
    xspace = np.linspace(np.amin(x_train), np.amax(x_train), 300)
    plt.plot(xspace, np.exp(kde.score_samples(xspace[:, None])))
    plt.show()

    # On Test
    test_unknown_mask = result['test_true_y'][:, -1].astype(bool)
    test_known_mask = np.logical_not(test_unknown_mask)
    x_test_known = x_test[test_known_mask]
    x_test_unknown = x_test[test_unknown_mask]

    print('On Test Known')
    _ = plt.hist(x_test_known, bins=bins, normed=True)
    xspace = np.linspace(np.amin(x_test_known), np.amax(x_test_known), 300)
    plt.plot(xspace, np.exp(kde.score_samples(xspace[:, None])))
    plt.show()


    print('On Test Unknown')
    _ = plt.hist(x_test_unknown, bins=bins, normed=True, color='red')
    xspace = np.linspace(np.amin(x_test_unknown), np.amax(x_test), 300)
    plt.plot(xspace, np.exp(kde.score_samples(xspace[:, None])))
    plt.show()
    

In [10]:
mnist = 'data/results/cnn/mnist'
compare_auc(mnist, 'mnist', 'cnn', models=['ce','cemmf','triplet','tripletmmf','ii','iimmf'], max_fpr=1.)
compare_prf(mnist, 'mnist', 'cnn', models=['ce','cemmf','triplet','tripletmmf','ii','iimmf'],
    #        save='data/results/fig/mnist_cnn_prf_all.pdf', 
#             threshold_type='global', 
            font_size=17,
            bbox_to_anchor=(0., 1.02, 1., .102), loc='lower left', ncol=4, figsize=(17, 4))
compare_training_time(mnist, 'mnist', 'cnn', models=['ce','cecmmf','triplet','tripletmmf','ii','iimmf'])

  from ipykernel import kernelapp as app
  ret = ret.dtype.type(ret / rcount)
  keepdims=keepdims)
  arrmean, rcount, out=arrmean, casting='unsafe', subok=False)
  ret = ret.dtype.type(ret / rcount)


Unnamed: 0,ce,cemmf,ii,iimmf,triplet,tripletmmf
auc,[],[],[],[],[],[]
auc_avg,,,,,,
auc_std,,,,,,


AUC AVG and STD 


Unnamed: 0,ce,cemmf,ii,iimmf,triplet,tripletmmf
ce,,,,,,
cemmf,,,,,,
ii,,,,,,
iimmf,,,,,,
triplet,,,,,,
tripletmmf,,,,,,


T-Test PValue



IndexError: too many indices for array

<Figure size 1224x288 with 0 Axes>

In [None]:
ms = 'data/results/cnn/msadjmat'
compare_auc(ms, 'msadjmat', 'cnn', models=['ce','cemmf','triplet','tripletmmf','ii','iimmf'], max_fpr=1.)
compare_prf(ms, 'msadjmat', 'cnn', models=['ce','cemmf','triplet','tripletmmf','ii','iimmf'],
        #    save='data/results/fig/msadjmat_cnn_prf_all.pdf', 
#             threshold_type='global', 
            font_size=17,
            bbox_to_anchor=(0., 1.02, 1., .102), loc='lower left', ncol=4, figsize=(19, 4))

In [None]:
android = 'data/results/flat/android'
compare_auc(android, 'android', 'flat', models=['ce','cemmf','triplet','tripletmmf','ii','iimmf'], max_fpr=1.)
compare_prf(android, 'android', 'flat', models=['ce','cemmf','triplet','tripletmmf','ii','iimmf'],
    #        save='data/results/fig/android_flat_prf_all.pdf', 
#             threshold_type='global', 
            font_size=17,
            bbox_to_anchor=(0., 1.02, 1., .102), loc='lower left', ncol=4, figsize=(19, 4))