In [None]:
%matplotlib inline
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import uproot

In [None]:
input_file = 'truth_charged.root'

In [None]:
#load the ntuples as preliminary uncut dataframes
uncut_pion = uproot.open(input_file)['pion'].arrays(library='pd')
uncut_kaon = uproot.open(input_file)['kaon'].arrays(library='pd')
uncut_proton = uproot.open(input_file)['proton'].arrays(library='pd')
uncut_electron = uproot.open(input_file)['electron'].arrays(library='pd')
uncut_muon = uproot.open(input_file)['muon'].arrays(library='pd')

In [None]:
#apply a first momentum selection
pion = uncut_pion[(uncut_pion.p<4)&(uncut_pion.p>1)]
kaon = uncut_kaon[(uncut_kaon.p<4)&(uncut_kaon.p>1)]
proton = uncut_proton[(uncut_proton.p<4)&(uncut_proton.p>1)]
electron = uncut_electron[(uncut_electron.p<4)&(uncut_electron.p>1)]
muon = uncut_muon[(uncut_muon.p<4)&(uncut_muon.p>1)]

## Copy and paste these values into analysis/scripts/stdCharged.py

In [None]:
#find 5th, 10th and 15th percentiles, corresponding to 95%, 90% and 85% efficiency cut points
digits=4
[[round(np.percentile(pion[(abs(pion.mcPDG)==211)].pionID, 5),digits),
  round(np.percentile(pion[(abs(pion.mcPDG)==211)].pionID, 10),digits),
  round(np.percentile(pion[(abs(pion.mcPDG)==211)].pionID, 15),digits)],
 [round(np.percentile(kaon[(abs(kaon.mcPDG)==321)].kaonID, 5),digits),
  round(np.percentile(kaon[(abs(kaon.mcPDG)==321)].kaonID, 10),digits),
  round(np.percentile(kaon[(abs(kaon.mcPDG)==321)].kaonID, 15),digits)],
 [round(np.percentile(proton[(abs(proton.mcPDG)==2212)].protonID, 5),digits),
  round(np.percentile(proton[(abs(proton.mcPDG)==2212)].protonID, 10),digits),
  round(np.percentile(proton[(abs(proton.mcPDG)==2212)].protonID, 15),digits)],
 [round(np.percentile(electron[(abs(electron.mcPDG)==11)].electronID, 5),digits),
  round(np.percentile(electron[(abs(electron.mcPDG)==11)].electronID, 10),digits),
  round(np.percentile(electron[(abs(electron.mcPDG)==11)].electronID, 15),digits)],
 [round(np.percentile(muon[(abs(muon.mcPDG)==13)].muonID, 5),digits),
  round(np.percentile(muon[(abs(muon.mcPDG)==13)].muonID, 10),digits),
  round(np.percentile(muon[(abs(muon.mcPDG)==13)].muonID, 15),digits)],
]

Note that if a working point is <0.0001, the output will be 0.0 (quite obviously) and stdCharged will not create such a list. If a specific list is used for skimming or physics validation, it may be wise to increase the value of **digits** or switch to a different list. Coordinate with validation liaisons to figure out the best approach.

## Confusion Matrices

In [None]:
#some definitions for plotting
dsets = [pion,kaon,proton,electron,muon]
setlabels = ['Pion','Kaon','Proton','Electron','Muon']
pidlabels = ['pionID','kaonID','protonID','electronID','muonID']
colorset = ['tab:blue', 'tab:red', 'tab:orange', 'tab:green', 'tab:purple']
pdgid = [211,321,2212,11,13]
confusion = [[0 for x in range(5)] for y in range(5)]
purity = [[0 for x in range(5)] for y in range(5)]
pidCut = 0.5

In [None]:
for x in range(5):
    confusion[x][0] = len(dsets[x][(dsets[x].pionID>pidCut)&(abs(dsets[x].mcPDG)==pdgid[x])].index)/len(dsets[x][(abs(dsets[x].mcPDG)==pdgid[x])].index)
    confusion[x][1] = len(dsets[x][(dsets[x].kaonID>pidCut)&(abs(dsets[x].mcPDG)==pdgid[x])].index)/len(dsets[x][(abs(dsets[x].mcPDG)==pdgid[x])].index)
    confusion[x][2] = len(dsets[x][(dsets[x].protonID>pidCut)&(abs(dsets[x].mcPDG)==pdgid[x])].index)/len(dsets[x][(abs(dsets[x].mcPDG)==pdgid[x])].index)
    confusion[x][3] = len(dsets[x][(dsets[x].electronID>pidCut)&(abs(dsets[x].mcPDG)==pdgid[x])].index)/len(dsets[x][(abs(dsets[x].mcPDG)==pdgid[x])].index)
    confusion[x][4] = len(dsets[x][(dsets[x].muonID>pidCut)&(abs(dsets[x].mcPDG)==pdgid[x])].index)/len(dsets[x][(abs(dsets[x].mcPDG)==pdgid[x])].index)
for x in range(5):
    purity[x][0] = len(dsets[x][(dsets[x].pionID>pidCut)&(abs(dsets[x].mcPDG)==pdgid[x])].index)/len(dsets[x][(dsets[x].pionID>pidCut)].index)
    purity[x][1] = len(dsets[x][(dsets[x].kaonID>pidCut)&(abs(dsets[x].mcPDG)==pdgid[x])].index)/len(dsets[x][(dsets[x].kaonID>pidCut)].index)
    purity[x][2] = len(dsets[x][(dsets[x].protonID>pidCut)&(abs(dsets[x].mcPDG)==pdgid[x])].index)/len(dsets[x][(dsets[x].protonID>pidCut)].index)
    purity[x][3] = len(dsets[x][(dsets[x].electronID>pidCut)&(abs(dsets[x].mcPDG)==pdgid[x])].index)/len(dsets[x][(dsets[x].electronID>pidCut)].index)
    purity[x][4] = len(dsets[x][(dsets[x].muonID>pidCut)&(abs(dsets[x].mcPDG)==pdgid[x])].index)/len(dsets[x][(dsets[x].muonID>pidCut)].index)

In [None]:
plt.figure(figsize=(10,8)) 
sns.heatmap(confusion,
            xticklabels=pidlabels,
            yticklabels=setlabels,
            cmap='YlGnBu',
            vmin=0,vmax=1,
            annot=True,
            square=True,
            )
plt.title('Confusion Matrix')
plt.tick_params(axis='x', which='minor', bottom=False, top=False, labelbottom=True) 
plt.tick_params(axis='y', which='minor', left=False, right=False, labelbottom=True)
plt.savefig('confusion_matrix.pdf',bbox_inches='tight')
plt.show()

plt.figure(figsize=(10,8)) 
sns.heatmap(purity,
            xticklabels=pidlabels,
            yticklabels=setlabels,
            cmap='YlGnBu',
            vmin=0,vmax=1,
            annot=True,
            square=True,
            )
plt.title('Purity Matrix')
plt.tick_params(axis='x', which='minor', bottom=False, top=False, labelbottom=True) 
plt.tick_params(axis='y', which='minor', left=False, right=False, labelbottom=True)
plt.savefig('purity_matrix.pdf',bbox_inches='tight')
plt.show()

## ROC Plots

In [None]:
roc_bins=1000
pi = [0 for y in range(roc_bins)] 
ka = [0 for y in range(roc_bins)] 
pr = [0 for y in range(roc_bins)] 
el = [0 for y in range(roc_bins)] 
mu = [0 for y in range(roc_bins)] 

In [None]:
for i in range(roc_bins):
        pi[i]=len(pion[(pion.pionID>(i/roc_bins))&(abs(pion.mcPDG)==211)].index)/len(pion[(abs(pion.mcPDG)==211)].index)
        ka[i]=len(kaon[(kaon.pionID>(i/roc_bins))&(abs(kaon.mcPDG)==321)].index)/len(kaon[(abs(kaon.mcPDG)==321)].index)
        pr[i]=len(proton[(proton.pionID>(i/roc_bins))&(abs(proton.mcPDG)==2212)].index)/len(proton[(abs(proton.mcPDG)==2212)].index)
        el[i]=len(electron[(electron.pionID>(i/roc_bins))&(abs(electron.mcPDG)==11)].index)/len(electron[(abs(electron.mcPDG)==11)].index)
        mu[i]=len(muon[(muon.pionID>(i/roc_bins))&(abs(muon.mcPDG)==13)].index)/len(muon[(abs(muon.mcPDG)==13)].index)
plt.figure(figsize=(10,5))
plt.plot(ka, pi, color=colorset[1], lw=2, label=setlabels[1])
plt.plot(pr, pi, color=colorset[2], lw=2, label=setlabels[2])
plt.plot(el, pi, color=colorset[3], lw=2, label=setlabels[3])
plt.plot(mu, pi, color=colorset[4], lw=2, label=setlabels[4])
plt.xlabel('False Pions')
plt.ylabel('True Pions')
plt.legend(loc=4)

In [None]:
for i in range(roc_bins):
        pi[i]=len(pion[(pion.kaonID>(i/roc_bins))&(abs(pion.mcPDG)==211)].index)/len(pion[(abs(pion.mcPDG)==211)].index)
        ka[i]=len(kaon[(kaon.kaonID>(i/roc_bins))&(abs(kaon.mcPDG)==321)].index)/len(kaon[(abs(kaon.mcPDG)==321)].index)
        pr[i]=len(proton[(proton.kaonID>(i/roc_bins))&(abs(proton.mcPDG)==2212)].index)/len(proton[(abs(proton.mcPDG)==2212)].index)
        el[i]=len(electron[(electron.kaonID>(i/roc_bins))&(abs(electron.mcPDG)==11)].index)/len(electron[(abs(electron.mcPDG)==11)].index)
        mu[i]=len(muon[(muon.kaonID>(i/roc_bins))&(abs(muon.mcPDG)==13)].index)/len(muon[(abs(muon.mcPDG)==13)].index)
plt.figure(figsize=(10,5))
plt.plot(pi, ka, color=colorset[0], lw=2, label=setlabels[0])
plt.plot(pr, ka, color=colorset[2], lw=2, label=setlabels[2])
plt.plot(el, ka, color=colorset[3], lw=2, label=setlabels[3])
plt.plot(mu, ka, color=colorset[4], lw=2, label=setlabels[4])
plt.xlabel('False Kaons')
plt.ylabel('True Kaons')
plt.legend(loc=4)

In [None]:
for i in range(roc_bins):
        pi[i]=len(pion[(pion.protonID>(i/roc_bins))&(abs(pion.mcPDG)==211)].index)/len(pion[(abs(pion.mcPDG)==211)].index)
        ka[i]=len(kaon[(kaon.protonID>(i/roc_bins))&(abs(kaon.mcPDG)==321)].index)/len(kaon[(abs(kaon.mcPDG)==321)].index)
        pr[i]=len(proton[(proton.protonID>(i/roc_bins))&(abs(proton.mcPDG)==2212)].index)/len(proton[(abs(proton.mcPDG)==2212)].index)
        el[i]=len(electron[(electron.protonID>(i/roc_bins))&(abs(electron.mcPDG)==11)].index)/len(electron[(abs(electron.mcPDG)==11)].index)
        mu[i]=len(muon[(muon.protonID>(i/roc_bins))&(abs(muon.mcPDG)==13)].index)/len(muon[(abs(muon.mcPDG)==13)].index)
plt.figure(figsize=(10,5))
plt.plot(pi, pr, color=colorset[0], lw=2, label=setlabels[0])
plt.plot(ka, pr, color=colorset[1], lw=2, label=setlabels[1])
plt.plot(el, pr, color=colorset[3], lw=2, label=setlabels[3])
plt.plot(mu, pr, color=colorset[4], lw=2, label=setlabels[4])
plt.xlabel('False Protons')
plt.ylabel('True Protons')
plt.legend(loc=4)

In [None]:
for i in range(roc_bins):
        pi[i]=len(pion[(pion.electronID>(i/roc_bins))&(abs(pion.mcPDG)==211)].index)/len(pion[(abs(pion.mcPDG)==211)].index)
        ka[i]=len(kaon[(kaon.electronID>(i/roc_bins))&(abs(kaon.mcPDG)==321)].index)/len(kaon[(abs(kaon.mcPDG)==321)].index)
        pr[i]=len(proton[(proton.electronID>(i/roc_bins))&(abs(proton.mcPDG)==2212)].index)/len(proton[(abs(proton.mcPDG)==2212)].index)
        el[i]=len(electron[(electron.electronID>(i/roc_bins))&(abs(electron.mcPDG)==11)].index)/len(electron[(abs(electron.mcPDG)==11)].index)
        mu[i]=len(muon[(muon.electronID>(i/roc_bins))&(abs(muon.mcPDG)==13)].index)/len(muon[(abs(muon.mcPDG)==13)].index)
plt.figure(figsize=(10,5))
plt.plot(pi, el, color=colorset[0], lw=2, label=setlabels[0])
plt.plot(ka, el, color=colorset[1], lw=2, label=setlabels[1])
plt.plot(pr, el, color=colorset[2], lw=2, label=setlabels[2])
plt.plot(mu, el, color=colorset[4], lw=2, label=setlabels[4])
plt.xlabel('False Electrons')
plt.ylabel('True Electrons')
plt.legend(loc=4)

In [None]:
for i in range(roc_bins):
        pi[i]=len(pion[(pion.muonID>(i/roc_bins))&(abs(pion.mcPDG)==211)].index)/len(pion[(abs(pion.mcPDG)==211)].index)
        ka[i]=len(kaon[(kaon.muonID>(i/roc_bins))&(abs(kaon.mcPDG)==321)].index)/len(kaon[(abs(kaon.mcPDG)==321)].index)
        pr[i]=len(proton[(proton.muonID>(i/roc_bins))&(abs(proton.mcPDG)==2212)].index)/len(proton[(abs(proton.mcPDG)==2212)].index)
        el[i]=len(electron[(electron.muonID>(i/roc_bins))&(abs(electron.mcPDG)==11)].index)/len(electron[(abs(electron.mcPDG)==11)].index)
        mu[i]=len(muon[(muon.muonID>(i/roc_bins))&(abs(muon.mcPDG)==13)].index)/len(muon[(abs(muon.mcPDG)==13)].index)
plt.figure(figsize=(10,5))
plt.plot(pi, mu, color=colorset[0], lw=2, label=setlabels[0])
plt.plot(ka, mu, color=colorset[1], lw=2, label=setlabels[1])
plt.plot(pr, mu, color=colorset[2], lw=2, label=setlabels[2])
plt.plot(el, mu, color=colorset[3], lw=2, label=setlabels[3])
plt.xlabel('False Muons')
plt.ylabel('True Muons')
plt.legend(loc=4)

## [Expert] Binary PID matrices

In [None]:
b_confusion = [[0 for x in range(5)] for y in range(5)]
b_purity = [[0 for x in range(5)] for y in range(5)]
b_pidCut = 0.5

for x in range(5):
    b_confusion[x][0] = len(dsets[x][(dsets[x].kBinaryID<b_pidCut)&(abs(dsets[x].mcPDG)==pdgid[x])].index)/len(dsets[x][(abs(dsets[x].mcPDG)==pdgid[x])].index)
    b_confusion[x][1] = len(dsets[x][(dsets[x].kBinaryID>b_pidCut)&(abs(dsets[x].mcPDG)==pdgid[x])].index)/len(dsets[x][(abs(dsets[x].mcPDG)==pdgid[x])].index)
    b_confusion[x][2] = len(dsets[x][(dsets[x].pBinaryID>b_pidCut)&(abs(dsets[x].mcPDG)==pdgid[x])].index)/len(dsets[x][(abs(dsets[x].mcPDG)==pdgid[x])].index)
    b_confusion[x][3] = len(dsets[x][(dsets[x].eBinaryID>b_pidCut)&(abs(dsets[x].mcPDG)==pdgid[x])].index)/len(dsets[x][(abs(dsets[x].mcPDG)==pdgid[x])].index)
    b_confusion[x][4] = len(dsets[x][(dsets[x].muBinaryID>b_pidCut)&(abs(dsets[x].mcPDG)==pdgid[x])].index)/len(dsets[x][(abs(dsets[x].mcPDG)==pdgid[x])].index)
for x in range(5):
    b_purity[x][0] = len(dsets[x][(dsets[x].kBinaryID<b_pidCut)&(abs(dsets[x].mcPDG)==pdgid[x])].index)/len(dsets[x][(dsets[x].kBinaryID>b_pidCut)].index)
    b_purity[x][1] = len(dsets[x][(dsets[x].kBinaryID>b_pidCut)&(abs(dsets[x].mcPDG)==pdgid[x])].index)/len(dsets[x][(dsets[x].kBinaryID>b_pidCut)].index)
    b_purity[x][2] = len(dsets[x][(dsets[x].pBinaryID>b_pidCut)&(abs(dsets[x].mcPDG)==pdgid[x])].index)/len(dsets[x][(dsets[x].pBinaryID>b_pidCut)].index)
    b_purity[x][3] = len(dsets[x][(dsets[x].eBinaryID>b_pidCut)&(abs(dsets[x].mcPDG)==pdgid[x])].index)/len(dsets[x][(dsets[x].eBinaryID>b_pidCut)].index)
    b_purity[x][4] = len(dsets[x][(dsets[x].muBinaryID>b_pidCut)&(abs(dsets[x].mcPDG)==pdgid[x])].index)/len(dsets[x][(dsets[x].muBinaryID>b_pidCut)].index)

plt.figure(figsize=(10,8)) 
sns.heatmap(b_confusion,
            xticklabels=pidlabels,
            yticklabels=setlabels,
            cmap='YlGnBu',
            vmin=0,vmax=1,
            annot=True,
            square=True,
            )
plt.title('Binary PID Confusion Matrix')
plt.tick_params(axis='x', which='minor', bottom=False, top=False, labelbottom=True) 
plt.tick_params(axis='y', which='minor', left=False, right=False, labelbottom=True)
plt.show()

plt.figure(figsize=(10,8)) 
sns.heatmap(b_purity,
            xticklabels=pidlabels,
            yticklabels=setlabels,
            cmap='YlGnBu',
            vmin=0,vmax=1,
            annot=True,
            square=True,
            )
plt.title('Binary PID Purity Matrix')
plt.tick_params(axis='x', which='minor', bottom=False, top=False, labelbottom=True) 
plt.tick_params(axis='y', which='minor', left=False, right=False, labelbottom=True)
plt.show()