In [None]:
%load_ext autoreload
%autoreload 2
%matplotlib widget
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from IPython.display import display, Markdown, clear_output
import ipywidgets as widgets
import analysis.yinf as yinf
import analysis.products as products
import analysis.util as util
from data.util import *
import os, sys, re
from scipy.stats import gaussian_kde
ML_DIR = os.path.expandvars("$SCRATCH/mlreco_cell/")
NETS = dict(enumerate(sorted([d for d in os.listdir(os.path.expandvars(ML_DIR))])))
SIM_DIR = os.path.expandvars("$SCRATCH/larsim/")
SIMS = dict(enumerate(sorted([d for d in os.listdir(os.path.expandvars(SIM_DIR)) if "reco" in d])))

def parse_stats_dir(stats_dir = './stats'):
    stats_files = []
    for root, dirs, files in os.walk(stats_dir):
        rel_path = os.path.relpath(root, stats_dir)
        if rel_path.count('/') != 1: continue
        slash_i = rel_path.find('/')
        header = [rel_path[:slash_i], rel_path[slash_i+1:]]
        for f in files:
            epoch, _ = inf_file_info(f)
            thres = re.findall('thres([0-9]*[.]?[0-9]+)', f)
            if len(thres) == 1: 
                thres = float(thres[0])
            else:
                thres = -1
            stats_files.append(header+[epoch, thres, root+'/'+f])
    return pd.DataFrame(stats_files, columns=['net', 'inf', 'epoch', 'thres', 'file'])

STATS = parse_stats_dir().sort_values(by=['epoch'])

def select_stats(net=None, inf=None, thres=None, first=0, last=-1, stride=1, stats_df=STATS):
    if net is not None:
        stats_df = stats_df[stats_df.net==net]
    if inf is not None:
        stats_df = stats_df[stats_df.inf==inf]
    if thres is not None:
        stats_df = stats_df[stats_df.thres==thres]
    if last == -1:
        epochs = sorted(set(stats_df.epoch))[first::stride]
    else:
        epochs = sorted(set(stats_df.epoch))[first:last+1:stride]
    return list(stats_df[stats_df.epoch.isin(epochs)]['file'])


In [None]:
NETS

In [None]:
SIMS

In [None]:
def errdist(event, true_thres, thres, xy_dir, yinf_file):
    xy_file = yinf.get_xy_file(xy_dir, yinf_file)
    voxel_truth, voxel_active, event_info = products.parse_xy(event, 1, xy_file)
    voxel_truth = util.filter_voxel_val(voxel_truth, true_thres)
    voxel_ghost, _, voxel_truth, _ = util.comp_voxels(voxel_active, voxel_truth)
    voxel_yinf = yinf.parse_yinf(event_info, yinf_file)
    voxel_P = util.filter_voxel_val(voxel_yinf, thres)
    voxel_FN, _, _, voxel_FP = util.comp_voxels(voxel_truth, voxel_P)
    from scipy.spatial import KDTree
    kd_tree_truth = KDTree(list(voxel_truth.keys()))
    kd_tree_ghost = KDTree(list(voxel_ghost.keys()))
    FN_dists, _ = kd_tree_ghost.query(list(voxel_FN.keys()))
    FP_dists, _ = kd_tree_truth.query(list(voxel_FP.keys()))
    return FN_dists, FP_dists

FN_dists, FP_dists = [], []
for index in range(20, 50):
    for event in range(5):
        FN_dist, FP_dist = errdist(event, 0.17, .5, xy_dir=SIM_DIR+"reco_1GeV_BeamCosmic_xy/", yinf_file=ML_DIR+"singleE-BeamCosmic-tpc1_ghost3D-164-lr0.01/inference/epoch24-batch0-reco_BeamCosmic_%03d_yinf-TPC1.npy"%index)
        FN_dists.extend(FN_dist)
        FP_dists.extend(FP_dist)

In [None]:
fig, ax = plt.subplots()

Ns, bins, _ = ax.hist([FN_dists, FP_dists], histtype='step', label=["FN: N=%d"%len(FN_dists), "FP: N=%d"%len(FP_dists)], bins=np.arange(20), weights=[np.ones(len(FN_dists))/len(FN_dists), np.ones(len(FP_dists))/len(FP_dists)])
ax.text(3, 0.2, "Proportion of FN with distance < 2: %.3f"%Ns[0][1])
ax.text(3, 0.5, "Proportion of FP with distance < 2: %.3f"%Ns[1][1])
ax.set_xticks(bins)
ax.set_yscale('log', basey=10)
ax.set_title("Distance of Misclassified Voxel to its Predicted Class of Voxels")
ax.set_xlabel("Distance [voxel size]")
ax.set_ylabel("Proportion")
ax.legend()

In [None]:
def errdist(event, true_thres, thres, xy_dir, yinf_file):
    xy_file = yinf.get_xy_file(xy_dir, yinf_file)
    voxel_truth, voxel_active, event_info = products.parse_xy(event, 1, xy_file)
    voxel_truth = util.filter_voxel_val(voxel_truth, true_thres)
    voxel_T_active = util.filter_voxels_coords(voxel_active.keys(), voxel_truth)[0]
    voxel_yinf = yinf.parse_yinf(event_info, yinf_file)
    voxel_P = util.filter_voxel_val(voxel_yinf, thres)
    voxel_FN, _, _, voxel_FP = util.comp_voxels(voxel_truth, voxel_P)
    from scipy.spatial import distance
    #print(voxel_P)
    return np.min(distance.cdist(list(voxel_FP.keys()), list(voxel_T_active.keys())), axis=1)

FN_dists = []
for index in range(953, 954):
    for event in range(10):
        #FN_dists.extend(errdist(event, 0., 0, xy_dir=SIM_DIR+"reco_1GeV_BeamCosmic_xy/", yinf_file=ML_DIR+"singleE-BeamCosmic-tpc1_ghost3D-164-lr0.01/inference/epoch24-batch0-reco_BeamCosmic_%03d_yinf-TPC1.npy"%index))
        
        FN_dists.extend(errdist(event, 0., 0, xy_dir=SIM_DIR+"reco_1GeV_ElectronWire_xy-v3_3/", yinf_file=ML_DIR+"singleE-BeamCosmic-tpc1_ghost3D-164-lr0.01/inference/epoch24-batch0-reco_singleElectron_%03d_yinf-TPC1.npy"%index))


In [None]:
fig, ax = plt.subplots()

Ns, bins, _ = ax.hist(FN_dists, histtype='step', label="FP: N=%d"%len(FN_dists), bins=np.arange(20), cumulative=True, weights=np.ones(len(FN_dists))/len(FN_dists))
ax.text(3, 0.2, "Proportion of FN with distance < 2: %.3f"%Ns[1])
#ax.text(3, 0.5, "Proportion of FP with distance < 2: %.3f"%Ns[1][1])
ax.set_xticks(bins)
ax.set_yscale('log', basey=10)
ax.set_title("Distance of Misclassified Voxel to its Predicted Class of Voxels")
ax.set_xlabel("Distance [voxel size]")
ax.set_ylabel("Proportion")
ax.legend()