In [1]:
import pickle

import matplotlib as mpl
import matplotlib.pyplot as plt
import nbfigtulz as ftl

import numpy as np
import pandas as pd
import scipy.stats

from tqdm import tqdm

In [2]:
!rm -rf img/ && mkdir img/

In [3]:
def flatten_df(df, keys=['Target', 'Mu', 'Sigma']):
    required_keys = ['Method', 'Model Path']
    keys = required_keys + keys
    max_shape = max([np.prod(np.shape(df[key].iloc[0])) for key in keys])

    contents = {}
    for key in keys:
        if np.prod(np.shape(df[key].iloc[0])) == 1:
            contents[key] = np.repeat(df[key], max_shape)
        else:
            contents[key] = np.stack(df[key], axis=0).flatten()

    return pd.DataFrame(contents)

In [4]:
def make_cutoff(pxl):
    df = pd.DataFrame(columns=['Method', 'Percentile', 'Error'])
    
    for method in pxl['Method'].unique():
        sel = (pxl['Method'] == method)
        df_sorted = pxl[sel].sort_values('Sigma', ascending=False)
            
        percentiles = np.arange(100) / 100.0
        idx = (percentiles * df_sorted.shape[0]).astype(int)

        error = np.abs(df_sorted['Mu'] - df_sorted['Target'])
        mean_error = [error[i:].mean() for i in idx]
        df = pd.concat((df, pd.DataFrame.from_dict({
            'Method': method,
            'Percentile': percentiles,
            'Error': mean_error,
        })), ignore_index=True)

    return df

In [5]:
def make_calibration(pxl):
    df = pd.DataFrame(columns=['Method', 'Model Path', 'Expected Conf.', 'Observed Conf.'])

    expected_p = np.arange(41) / 40.0
    for method in pxl['Method'].unique():
        sel = (pxl['Method'] == method)
        x = pxl[sel]
        
        observed_p = [
            (x['Target'] < scipy.stats.norm.ppf(p, x['Mu'], x['Sigma'])).mean()
        for p in expected_p]

        df_single = pd.DataFrame(
            {
                
            }
        )
        df = pd.concat((df, pd.DataFrame.from_dict({
            'Method': method,
            'Expected Conf.': expected_p,
            'Observed Conf.': observed_p,
        })), ignore_index=True)

    return df

In [6]:
def parse_file(file_name):
    df = pickle.load(open(file_name, 'rb'))
    
    sel = (df['Epsilon'].abs() < 1e-8) & (df['OOD'] == False)
    pxl = flatten_df(df[sel], keys=['Target', 'Mu', 'Sigma'])
    
    return make_cutoff(pxl), make_calibration(pxl)


cutoffs = dict()
calibs = dict()
for k in tqdm(['original', 'al', 'ep']):
    cutoff, calib = parse_file(f'cached_depth_results_{k}.pkl') 
    cutoffs[k] = cutoff
    calibs[k] = calib

100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 3/3 [04:41<00:00, 93.80s/it]


In [9]:
@ftl.with_context
def make_cutoff_plot(dfs):
    fig, ax = plt.subplots()
    ax.set_xlabel('CL (%)')
    ax.set_ylabel(r'$|y_i - \gamma_i|$')
    
    style = {
        'original': {'color': 'black', 'linestyle': '--', 'dashes': (3, 5), 'label': r'$u_{\mathrm{ep}}$'},
        'al': {'color': 'C1', 'label': r"$u'_{\mathrm{al}}$"},
        'ep': {'color': 'C0', 'label': r"$u'_{\mathrm{ep}}$"},
    }
    
    for k in ['ep', 'al', 'original']:
        df = dfs[k]
        sel = (df['Method'] == 'Evidential')
        x = df[sel]['Percentile'] * 100
        y = df[sel]['Error']
        ax.plot(x, y, **style[k])
        
    ax.legend()
    
    return ftl.save_fig(fig, 'cutoffs')
            
            
make_cutoff_plot(cutoffs)

cutoffs.png

In [10]:
@ftl.with_context
def make_calibration_plot(dfs):
    fig, ax = plt.subplots()
    ax.set_xlabel('Expected CL (%)')
    ax.set_ylabel('Observed CL (%)')
    
    style = {
        'original': {'color': 'black', 'linestyle': '--', 'dashes': (3, 5), 'label': r'$u_{\mathrm{ep}}$'},
        'al': {'color': 'C1', 'label': r"$u'_{\mathrm{al}}$"},
        'ep': {'color': 'C0', 'label': r"$u'_{\mathrm{ep}}$"},
    }
    
    for k in ['ep', 'al', 'original']:
        df = dfs[k]
        sel = (df['Method'] == 'Evidential')
        x = df[sel]['Expected Conf.'] * 100
        y = df[sel]['Observed Conf.'] * 100
        ax.plot(x, y, **style[k])
        
    ax.legend()
    
    return ftl.save_fig(fig, 'calibs')
            
            
make_calibration_plot(calibs)

calibs.png