## _Track Performance_

Note: We can't find Purity/Fake Rate and Duplication Rate from the `particles.h5` with current variables we have.

In [None]:
import glob, os, sys, yaml

In [None]:
import numpy as np
import scipy as sp
import pandas as pd
import matplotlib.pyplot as plt
%matplotlib inline

In [None]:
import torch
import time

In [None]:
from sklearn.cluster import DBSCAN
from multiprocessing import Pool
from functools import partial
from utils_plot import make_cmp_plot, pt_configs, eta_configs, get_ratio

In [None]:
sys.path.append('..')
device = 'cuda' if torch.cuda.is_available() else 'cpu'

### _Read Particles HDF5 Store_

In [None]:
path = "../run_all/fwp_gnn_segmenting/eval/all_particles.h5"
store = pd.HDFStore(path)

In [None]:
# store.get('data').head()
df = store['data']

In [None]:
df.head()

In [None]:
outdir = "./"
out_prefix = "all"

In [None]:
# Filtering: We can filter charge, pdgcode vs Tracking efficiency
# df = df[df['pdgcode'].isin([-211, 211])].reset_index(drop=True)
# df = df[df['pdgcode'].isin([-2212, 2212])].reset_index(drop=True)
# df = df[df['pdgcode'].isin([211])].reset_index(drop=True)
# df = df[df['q'] == 1].reset_index(drop=True)

In [None]:
pt = df.pt.values
peta = df.peta.values
vx = df.vx.values
vy = df.vy.values
d0 = np.sqrt(vx**2 + vy**2)
z0 = df.vz.values
ptheta = (df.ptheta.values*180)/np.pi   # radians to degrees
pphi = (df.pphi.values*180)/np.pi       # radians to degrees

rectable_idx = df.is_trackable          # reconstructable
matched_idx = df.is_matched             # matched

In [None]:
# plot the efficiency as a function of pT, eta
make_cmp_plot_fn = partial(make_cmp_plot,
                           legends=["Selected", "Reconstructable", "Matched"],
                           ylabel="Particles", ratio_label='Track Efficiency',
                           ratio_legends=["Tracking Efficiency (phys.)", "Tracking Efficiency (tech.)"])
                           # ratio_legends=[r"$\epsilon$", r"$\epsilon_{tech.}$"])

## _Overall Efficiencies_

In [None]:
# Lets find Phys. and Tech. Efficicencies
gen_pt = pt                                   # selected particles
true_pt = pt[rectable_idx]                    # reconstructable particles
reco_pt = pt[rectable_idx & matched_idx]      # selected and reconstructable

In [None]:
gen_pt.shape[0], true_pt.shape[0], reco_pt.shape[0]

In [None]:
# Physics Efficiency = (sel, matched)/sel
print("efficiency (phys.): {:.4f}".format(pt[matched_idx].shape[0]/gen_pt.shape[0]*100))

In [None]:
# Technical Efficiency = (sel, reconstructable, matched)/(sel, reconstructable)
print("efficiency (tech.): {:.4f}".format(reco_pt.shape[0]/true_pt.shape[0]*100))

### _1. Tranverse Momentum_

In [None]:
min(pt), max(pt)

In [None]:
pt_min, pt_max = 0.0026, 0.6

# linear scale on x-axis
pt_bins = np.linspace(pt_min, pt_max, num=20)

# log scale on x-axis
# pt_bins = np.logspace(np.log10(pt_min), np.log10(pt_max), 10)

pt_configs = {
    'bins': pt_bins,
    'histtype': 'step',
    'lw': 2,
    'log': False
}

In [None]:
# vertex cuts
all_cuts = [0.0]

for (cut_pt) in all_cuts:
    print("cut_pt: {}".format(cut_pt))

    cuts = (pt > cut_pt)

    # make pt plots
    gen_pt = pt[cuts]
    true_pt = pt[cuts & rectable_idx]
    reco_pt = pt[cuts & rectable_idx & matched_idx]
    
    print("efficiency (phys.): ", (reco_pt.shape[0]/gen_pt.shape[0]*100))
    print("efficiency (tech.): ", (reco_pt.shape[0]/true_pt.shape[0]*100))
    
    make_cmp_plot_fn([gen_pt, true_pt, reco_pt], 
        configs=pt_configs, xlabel=r"$p_t$ [GeV]",
        outname=os.path.join(outdir, "{}_pt_cut{}".format(out_prefix, cut_pt)),
        ymin=0.6)

### _2. Theta Angle_

In [None]:
min(ptheta), max(ptheta)

In [None]:
ptheta_bins = np.linspace(22, 130, num=20)

ptheta_configs = {
    'bins': ptheta_bins,
    'histtype': 'step',
    'lw': 2,
    'log': False
}

In [None]:
# vertex cuts
all_cuts = [0.0]

for (cut_ptheta) in all_cuts:
    print("cut_ptheta: {}".format(cut_ptheta))

    cuts = (ptheta > cut_ptheta)

    # make pt plots
    gen_ptheta  = ptheta[cuts]
    true_ptheta = ptheta[cuts & rectable_idx]
    reco_ptheta = ptheta[cuts & rectable_idx & matched_idx]
    
    print("efficiency (phys.): ", (reco_ptheta.shape[0]/gen_pt.shape[0]*100))
    print("efficiency (tech.): ", (reco_ptheta.shape[0]/true_pt.shape[0]*100))
    
    make_cmp_plot_fn([gen_ptheta, true_ptheta, reco_ptheta], 
        configs=ptheta_configs, xlabel=r"$\theta$ [Deg.]",
        outname=os.path.join(outdir, "{}_ptheta_cut{}".format(out_prefix, cut_ptheta)),
        ymin=0.6)

### _3. Phi Angle_

In [None]:
min(pphi), max(pphi)

In [None]:
pphi_bins = np.linspace(-80, 180, num=20)

pphi_configs = {
    'bins': pphi_bins,
    'histtype': 'step',
    'lw': 2,
    'log': False
}

In [None]:
# vertex cuts
all_cuts = [0.0]

for (cut_pphi) in all_cuts:
    print("cut_pphi: {}".format(cut_pphi))

    cuts = (pphi > cut_pphi)

    # make pt plots
    gen_pphi  = pphi[cuts]
    true_pphi = pphi[cuts & rectable_idx]
    reco_pphi = pphi[cuts & rectable_idx & matched_idx]

    make_cmp_plot_fn([gen_pphi, true_pphi, reco_pphi], 
        configs=pphi_configs, xlabel=r"$\phi$ [Deg.]",
        outname=os.path.join(outdir, "{}_pphi_cut{}".format(out_prefix, cut_pphi)),
        ymin=0.6)

### _4. Decay Vertex_

In [None]:
min(d0), max(d0)

In [None]:
d0_bins = np.linspace(0.0, 15, num=15)   # will give 15 bins

d0_configs = {
    'bins': d0_bins,
    'histtype': 'step',
    'lw': 2,
    'log': False
}

In [None]:
# vertex cuts
all_cuts = [0.0]

for (cut_d0) in all_cuts:
    print("cut_d0: {}".format(cut_d0))

    cuts = (d0 > cut_d0)

    # make pt plots
    gen_d0  = d0[cuts]
    true_d0 = d0[cuts & rectable_idx]
    reco_d0 = d0[cuts & rectable_idx & matched_idx]

    make_cmp_plot_fn([gen_d0, true_d0, reco_d0], 
        configs=d0_configs, xlabel=r"$d_0$ [cm]",
        outname=os.path.join(outdir, "{}_d0_cut{}".format(out_prefix, cut_d0)),
        ymin=0.6)

## _Recreate the Summary from Track Evalution Script_

In [None]:
df.head()

- _Physics Efficiency_

In [None]:
n_sel_particles = df.shape[0]
n_matched_sel_particles = df[df.is_matched].shape[0]
print("Tracking Efficiency (Phys.): {:>5.3f}%".format(100 * n_matched_sel_particles/n_sel_particles))

In [None]:
df.shape[0], df.is_matched.shape[0], df.is_trackable.shape[0]

In [None]:
df[df.is_matched].shape[0]

- _Technical Efficiency_

In [None]:
n_reco_particles = df[df.is_trackable].shape[0]
n_matched_reco_particles = df[df.is_trackable & df.is_matched].shape[0]
print("Tracking Efficiency (Tech.): {:>5.3f}%".format(100*n_matched_reco_particles/n_reco_particles))

In [None]:
df.shape[0]

In [None]:
df[df.is_matched].shape[0]

In [None]:
df[df.is_trackable].shape[0]

In [None]:
df[df.is_trackable & df.is_matched].shape[0]

### _Plotting Efficiency_

In [None]:
df.head()

In [None]:
pt_bins = np.linspace(0.0, 0.60, num=20)   # will give 15 bins

pt_configs = {
    'bins': pt_bins,
    'histtype': 'step',
    'lw': 2,
    'log': False
}

In [None]:
def plot_observable_performance(particles: pd.DataFrame):

    pt = particles.pt.values
    eta = particles.peta.values

    trackable = particles.is_trackable
    matched = particles.is_matched


    # plot the performance `metric` as a function of `observable`
    make_cmp_plot_fn = partial(make_cmp_plot,
        legends=["Generated", "Reconstructable", "Matched"],
        ylabel="Num. particles", ratio_label='Track efficiency',
        ratio_legends=["Physics Eff", "Technical Eff"])

    all_cuts = [(0, 4)]
    for (cut_pt, cut_eta) in all_cuts:
        cuts = (pt > cut_pt) & (np.abs(eta) < cut_eta)
        
        gen_pt = pt[cuts]
        true_pt = pt[cuts & trackable]
        reco_pt = pt[cuts & trackable & matched]
        
        make_cmp_plot_fn([gen_pt, true_pt, reco_pt], 
                         configs=pt_configs, xlabel="pT [MeV]",
                         outname=os.path.join(outdir, "test_pt_cut{}".format(cut_pt)),
                         ymin=0.6)


In [None]:
plot_observable_performance(df)

In [None]:
def plot_pt_eff(particles):
    """Phys Eff: pt vs reco_pt, Tech Eff: true_pt vs reco_pt"""
    
    pt = particles.pt.values
    gen_pt = pt
    true_pt = pt[particles["is_trackable"]]
    reco_pt = pt[particles["is_trackable"] & particles["is_matched"]]

    # Get histogram values of true_pt and reco_pt
    gen_vals, gen_bins = np.histogram(gen_pt, bins=pt_bins)
    true_vals, true_bins = np.histogram(true_pt, bins=pt_bins)    # change gen_pt, true_pt to get phys/tech eff.
    reco_vals, reco_bins = np.histogram(reco_pt, bins=pt_bins)

    # Plot the ratio of the histograms as an efficiency
    

    fig, ax = plt.subplots(figsize=(8,6))
    
    eff, err = get_ratio(reco_vals, gen_vals)
    xvals = (true_bins[1:] + true_bins[:-1]) / 2
    xerrs = (true_bins[1:] - true_bins[:-1]) / 2
    
    ax.errorbar(xvals, eff, xerr=xerrs, yerr=err, fmt='o', label='Phys. Efficiency')
    
    eff, err = get_ratio(reco_vals, true_vals)
    xvals = (true_bins[1:] + true_bins[:-1]) / 2
    xerrs = (true_bins[1:] - true_bins[:-1]) / 2
    
    ax.errorbar(xvals, eff, xerr=xerrs, yerr=err, fmt='o', label='Tech. Efficiency')    
    
    # Add x and y labels
    ax.set_xlabel('$p_t [GeV]$', fontsize=20)
    ax.set_ylabel('Track Efficiency', fontsize=20)
    ax.tick_params(axis='both', which='major', labelsize=15)
    ax.tick_params(axis='both', which='minor', labelsize=12)
    # ax.set_xlim(0, 0.6)
    ax.set_ylim(0, 1.5)
    # ax.legend(fontsize=20, loc='best')
    ax.grid(False)
    fig.tight_layout()
    # fig.savefig("test_eff.pdf")

In [None]:
plot_pt_eff(df)

In [None]:
store.close()