In [None]:
import pandas as pd
import json
import glob
import matplotlib.pyplot as plt
import numpy as np

import sklearn
import sklearn.metrics
import matplotlib
import scipy
import mplhep

import pandas

In [None]:
def flatten(arr):
    return arr.reshape((arr.shape[0]*arr.shape[1], arr.shape[2]))

In [None]:
def cms_label(x0=0.12, x1=0.23, x2=0.67, y=0.90):
    plt.figtext(x0, y,'CMS',fontweight='bold', wrap=True, horizontalalignment='left', fontsize=12)
    plt.figtext(x1, y,'Simulation Preliminary', style='italic', wrap=True, horizontalalignment='left', fontsize=10)
    plt.figtext(x2, y,'Run 3 (14 TeV), $\mathrm{t}\overline{\mathrm{t}}$ events',  wrap=False, horizontalalignment='left', fontsize=10)

def sample_label(ax, x=0.03, y=0.98):
    plt.text(x, y, "$\mathrm{t}\overline{\mathrm{t}}$ events", va="top", ha="left", size=10, transform=ax.transAxes)


In [None]:
def apply_thresholds_f(ypred_raw_f, thresholds):
    msk = np.ones_like(ypred_raw_f)
    for i in range(len(thresholds)):
        msk[:, i+1] = ypred_raw_f[:, i+1]>thresholds[i]
    ypred_id_f = np.argmax(ypred_raw_f*msk, axis=-1)
    
#     best_2 = np.partition(ypred_raw_f, -2, axis=-1)[..., -2:]
#     diff = np.abs(best_2[:, -1] - best_2[:, -2])
#     ypred_id_f[diff<0.05] = 0
    
    return ypred_id_f

def apply_thresholds(ypred_raw, thresholds):
    msk = np.ones_like(ypred_raw)
    for i in range(len(thresholds)):
        msk[:, :, i+1] = ypred_raw[:, :, i+1]>thresholds[i]
    ypred_id = np.argmax(ypred_raw*msk, axis=-1)
    
#     best_2 = np.partition(ypred_raw, -2, axis=-1)[..., -2:]
#     diff = np.abs(best_2[:, :, -1] - best_2[:, :, -2])
#     ypred_id[diff<0.05] = 0
    
    return ypred_id

In [None]:
pid_names = {
    0: "no ptcl",
    1: "ch.had",
    2: "n.had",
    3: "HFEM",
    4: "HFHAD",
    5: "g",
    6: "el",
    7: "mu"
}

pid_names_long = {
    0: "no particle",
    1: "charged hadrons",
    2: "neutral hadrons",
    3: "HFEM",
    4: "HFHAD",
    5: "photons",
    6: "electrons",
    7: "muons"
}

var_names = {
    1: "charge",
    2: "pt",
    3: "eta",
    4: "sin phi",
    5: "cos phi",
    6: "energy"
}

x_labels = [
    "track", "PS1", "PS2", "ECAL", "HCAL", "GSF", "BREM", "HFEM", "HFHAD", "SC", "HO"
]
y_labels = [pid_names[i] for i in range(0,8)]

In [None]:
#path = "../experiments/cms-dev_20210831_225815_541048.gpu0.local/evaluation/"
path = "../experiments/cms-gen_20210903_114315_805349.joosep-desktop-work/evaluation/"

In [None]:
Xs = []
ygens = []
ycands = []
ypreds = []
ypreds_raw = []
for fi in glob.glob(path + "/pred_batch*.npz"):
    dd = np.load(fi)
    Xs.append(dd["X"])
    ygens.append(dd["ygen"])
    ycands.append(dd["ycand"])
    ypreds.append(dd["ypred"])
    ypreds_raw.append(dd["ypred_raw"])

X = np.concatenate(Xs)
msk_X = X[:, :, 0]!=0

ygen = np.concatenate(ygens)
ycand = np.concatenate(ycands)
ypred = np.concatenate(ypreds)

ypred_raw = np.concatenate(ypreds_raw)

X_f = X.reshape((X.shape[0]*X.shape[1], X.shape[2]))
msk_X_f = X_f[:, 0]!=0
ygen_f = ygen.reshape((ygen.shape[0]*ygen.shape[1], ygen.shape[2]))
ycand_f = ycand.reshape((ycand.shape[0]*ycand.shape[1], ycand.shape[2]))
ypred_f = ypred.reshape((ypred.shape[0]*ypred.shape[1], ypred.shape[2]))

# ypred_raw[X[:, :, 0]==1, 6] = 0.0

# ypred_raw[X[:, :, 0]==4, 1] = 0.0
# #ypred_raw[X[:, :, 0]==4, 6] = 0.0

# ypred_raw[X[:, :, 0]==5, 0] += ypred_raw[X[:, :, 0]==5, 1]
# ypred_raw[X[:, :, 0]==5, 0] += ypred_raw[X[:, :, 0]==5, 7]
# ypred_raw[X[:, :, 0]==5, 1] = 0.0
# ypred_raw[X[:, :, 0]==5, 7] = 0.0

# ypred_raw[X[:, :, 0]==8, 3] += ypred_raw[X[:, :, 0]==8, 1]
# ypred_raw[X[:, :, 0]==8, 3] += ypred_raw[X[:, :, 0]==8, 2]
# ypred_raw[X[:, :, 0]==8, 1] = 0.0
# ypred_raw[X[:, :, 0]==8, 2] = 0.0


# ypred_raw[X[:, :, 0]==9, 3] += ypred_raw[X[:, :, 0]==9, 1]
# ypred_raw[X[:, :, 0]==9, 3] += ypred_raw[X[:, :, 0]==9, 2]
# ypred_raw[X[:, :, 0]==9, 1] = 0.0
# ypred_raw[X[:, :, 0]==9, 2] = 0.0
# ypred_raw[X[:, :, 0]==9, 1] = 0.0

# ypred_raw[X[:, :, 0]==8, 2] = 0.0
# ypred_raw[X[:, :, 0]==9, 2] = 0.0

ypred_raw_f = ypred_raw.reshape((ypred_raw.shape[0]*ypred_raw.shape[1], ypred_raw.shape[2]))

ypred_id = np.argmax(ypred_raw, axis=-1)
ypred_id_f = ypred_id.flatten()

In [None]:
thresholds = [0.0, 0.0, 0.0, 0, 0, 0, 0]
ypred_id = apply_thresholds(ypred_raw, thresholds)
ypred_id_f = apply_thresholds_f(ypred_raw_f, thresholds)

In [None]:
icls = 2
ielem = 5

energy_msk = (X_f[:, 4]>0)
elem_msk = (X_f[:, 0]==ielem)

vals_sig = ypred_raw_f[energy_msk & elem_msk & (ycand_f[:, 0]==icls), icls]
vals_bkg = ypred_raw_f[energy_msk & elem_msk & (ycand_f[:, 0]!=icls), icls]

bins = np.linspace(0,1,100)
hsig = np.histogram(vals_sig, bins=bins)[0]
hbkg = np.histogram(vals_bkg, bins=bins)[0]

a = np.cumsum(hsig)/np.sum(hsig)
b = np.cumsum(hbkg)/np.sum(hbkg)

plt.figure(figsize=(4,4))
plt.plot(a, b, marker=".")
plt.plot([0,1], [0,1], color="black", lw=0.5, ls="--")

In [None]:
b = np.linspace(0,1,100)
mplhep.histplot(np.histogram(vals_sig, bins=b, density=1), label="sig");
mplhep.histplot(np.histogram(vals_bkg, bins=b, density=1), label="bkg");
plt.legend(loc=2)

In [None]:
for icls in range(1,8):
    npred = np.sum(ypred_id == icls, axis=1)
    ncand = np.sum(ycand[:, :, 0] == icls, axis=1)
    plt.figure(figsize=(6,6))
    plt.scatter(ncand, npred, marker=".", alpha=0.8)
    a = 0.5*min(np.min(npred), np.min(ncand))
    b = 1.5*max(np.max(npred), np.max(ncand))
    plt.xlim(a,b)
    plt.ylim(a,b)
    plt.plot([a,b],[a,b], color="black", ls="--")
    plt.title(pid_names_long[icls],y=1.05)
    plt.xlabel("number of PFCandidates")
    plt.ylabel("number of MLPFCandidates")
    cms_label(x2=0.6, y=0.89)
    plt.savefig("num_cls{}.pdf".format(icls))


In [None]:
energy_bins_classwise = {
    1: [-2, 5],
    2: [-2, 6],
    3: [1, 7],
    4: [2, 5],
    5: [2, 5],
    6: [2, 5],
    7: [2, 5],
}

energy_correction_factors = {
    1: [1, 1],
    2: [1, 1],
    3: [1.0, 1.2],
    4: [1, 1],
    5: [1, 1],
    6: [1, 1],
    7: [1, 1],
}

In [None]:
b = np.linspace(0,1,101)
plt.figure(figsize=(4,4))
plt.hist(ypred_raw_f[(X_f[:, 0]==1) & (ycand_f[:, 0]==0), 1], bins=b, histtype="step", lw=2, label="no PFCandidate", density=True);
plt.hist(ypred_raw_f[(X_f[:, 0]==1) & (ycand_f[:, 0]==1), 1], bins=b, histtype="step", lw=2, label="charged PFCandidate", density=True);
plt.legend(loc=2, frameon=False)
plt.xlabel("Charged hadron probability")
plt.title("Tracks")

In [None]:
b = np.linspace(0,1,101)
plt.figure(figsize=(4,4))
plt.hist(ypred_raw_f[(X_f[:, 0]==1) & (ycand_f[:, 0]==0), 0], bins=b, histtype="step", lw=2, label="no PFCandidate", density=True);
plt.hist(ypred_raw_f[(X_f[:, 0]==1) & (ycand_f[:, 0]==1), 0], bins=b, histtype="step", lw=2, label="charged PFCandidate", density=True);
plt.legend(loc=1, frameon=False)
plt.xlabel("No particle probability")
plt.title("Tracks")

In [None]:
b = np.linspace(0,1,101)
plt.figure(figsize=(4,4))
plt.hist(ypred_raw_f[(X_f[:, 0]==5) & (ycand_f[:, 0]==0), 2], bins=b, histtype="step", lw=2, label="no PFCandidate", density=True);
plt.hist(ypred_raw_f[(X_f[:, 0]==5) & (ycand_f[:, 0]==2), 2], bins=b, histtype="step", lw=2, label="neutral PFCandidate", density=True);
plt.legend(loc=2, frameon=False)
plt.xlabel("Neutral probability")
plt.title("HCAL clusters")

In [None]:
b = np.linspace(0,1,101)
plt.figure(figsize=(4,4))
plt.hist(ypred_raw_f[(X_f[:, 0]==5) & (ycand_f[:, 0]==0), 0], bins=b, histtype="step", lw=2, label="no PFCandidate", density=True);
plt.hist(ypred_raw_f[(X_f[:, 0]==5) & (ycand_f[:, 0]==2), 0], bins=b, histtype="step", lw=2, label="neutral PFCandidate", density=True);
plt.legend(loc="best", frameon=False)
plt.xlabel("No particle probability")
plt.title("HCAL clusters")

In [None]:
elem_type = 5
icls = 2

def plot_elem_energy_cls_prob(elem_type):
    plt.figure(figsize=(4*5,2*4))
    plt.suptitle("PFElement type {}".format(elem_type))
    
    for icls in range(8):
        plt.subplot(2,4,icls+1)
        plt.hist2d(
            np.log10(X_f[X_f[:, 0]==elem_type, 4]),
            ypred_raw_f[X_f[:, 0]==elem_type, icls],
            bins=(np.linspace(-2,4,100), np.linspace(0,1,100)), cmap="Blues");
        plt.colorbar()
        plt.xlabel("PFElement log[E/GeV]")
        plt.ylabel("MLPF probability for class {}".format(icls))
    plt.tight_layout()

In [None]:
plot_elem_energy_cls_prob(1)

In [None]:
plot_elem_energy_cls_prob(4)

In [None]:
plot_elem_energy_cls_prob(5)

In [None]:
plot_elem_energy_cls_prob(8)

In [None]:
plot_elem_energy_cls_prob(9)

In [None]:
reco_label = X_f[X_f[:, 0]!=0, 0]
cand_label = ycand_f[X_f[:, 0]!=0, 0]
pred_label = ypred_id_f[X_f[:, 0]!=0]

In [None]:
cm1 = sklearn.metrics.confusion_matrix(reco_label, cand_label, labels=range(12))
cm2 = sklearn.metrics.confusion_matrix(reco_label, pred_label, labels=range(12))

In [None]:
plt.imshow(cm1, cmap="Blues", norm=matplotlib.colors.LogNorm())
plt.xticks(range(12));
plt.yticks(range(12));
plt.colorbar()

In [None]:
plt.imshow(cm2, cmap="Blues", norm=matplotlib.colors.LogNorm())
plt.xticks(range(12));
plt.yticks(range(12));
plt.colorbar()

In [None]:
ycand_id_f = ycand_f[:, 0]

b = np.linspace(-3,6,100)

icls = 2

def plot_eff_and_fake_rate(
    icls,
    ivar=4,
    bins=np.linspace(-3,6,100),
    xlabel="PFElement log[E/GeV]", log=True
    ):
    
    values = X_f[:, ivar]
    if log:
        values = np.log(values)
        
    hist_cand = np.histogram(values[(ycand_id_f==icls)], bins=bins);
    hist_cand_true = np.histogram(values[(ycand_id_f==icls) & (ypred_id_f==icls)], bins=bins);

    hist_pred = np.histogram(values[(ypred_id_f==icls)], bins=bins);
    hist_pred_fake = np.histogram(values[(ycand_id_f!=icls) & (ypred_id_f==icls)], bins=bins);

    eff = hist_cand_true[0]/hist_cand[0]
    fake = hist_pred_fake[0]/hist_pred[0]

    plt.figure(figsize=(8,8))
    ax1 = plt.subplot(2,1,1)
    mplhep.histplot(hist_cand, label="with PF candidate")
    mplhep.histplot(hist_pred, label="with MLPF candidate")
    plt.legend(frameon=False)
    plt.xlabel(xlabel)
    plt.ylabel("Number of particles")

    ax2 = plt.subplot(2,1,2, sharex=ax1)
    mplhep.histplot(eff, bins=hist_cand[1], label="efficiency", color="black")
    mplhep.histplot(fake, bins=hist_cand[1], label="fake rate", color="red")
    plt.legend(frameon=False)
    plt.ylim(0,1.4)
    plt.xlabel(xlabel)
    plt.ylabel("Fraction of particles / bin")
    
    return ax1, ax2

In [None]:
b = np.linspace(0,100, 100)
plt.hist(X_f[(X_f[:, 0]==5), 4], bins=b, histtype="step", lw=2, label="all clusters");
plt.hist(X_f[(X_f[:, 0]==5) & (ycand_f[:, 0]==2), 4], bins=b, histtype="step", lw=2, label="with PF candidate");
plt.hist(X_f[(X_f[:, 0]==5) & (ypred_id_f==2), 4], bins=b, histtype="step", lw=2, label="with MLPF candidate");
plt.yscale("log")
plt.legend()

In [None]:
ax1, ax2 = plot_eff_and_fake_rate(1, bins=np.linspace(0, 300, 100), log=False)
ax1.set_yscale("log")
ax1.set_title("track, charged hadron predictions")

In [None]:
ax1, ax2 = plot_eff_and_fake_rate(2, bins=np.linspace(0, 300, 100), log=False)
ax1.set_yscale("log")
ax1.set_title("HCAL cluster, neutral hadron predictions")

In [None]:
ax1, ax2 = plot_eff_and_fake_rate(3, bins=np.linspace(0, 300, 100), log=False)
ax1.set_yscale("log")

In [None]:
ax1, ax2 = plot_eff_and_fake_rate(4, bins=np.linspace(0, 300, 100), log=False)
ax1.set_yscale("log")

In [None]:
def load_history(path, max_epoch=None):
    ret = {}
    for fi in glob.glob(path):
        data = json.load(open(fi))
        epoch = int(fi.split("_")[-1].split(".")[0])
        ret[epoch] = data
    
    if not max_epoch:
        max_epoch = max(ret.keys())
    ret2 = []
    for i in range(max_epoch):
        ret2.append(ret[i])
    return pandas.DataFrame(ret2)

In [None]:
history = load_history(path + "/../history/history_*.json")

In [None]:
def loss_plot(train, test, margin=0.05, smoothing=False):
    fig = plt.figure(figsize=(8,4))
    ax = plt.axes()
    
    alpha = 0.2 if smoothing else 1.0
    l0 = None if smoothing else "train"
    l1 = None if smoothing else "test"
    p0 = plt.plot(train, alpha=alpha, label=l0)
    p1 = plt.plot(test, alpha=alpha, label=l1)
    
    if smoothing:
        train_smooth = np.convolve(train, np.ones(5)/5, mode='valid')
        plt.plot(train_smooth, color=p0[0].get_color(), lw=2, label="train")
        test_smooth = np.convolve(test, np.ones(5)/5, mode='valid')
        plt.plot(test_smooth, color=p1[0].get_color(), lw=2, label="test")
    
    plt.ylim(test[-1]*(1.0-margin), test[-1]*(1.0+margin))
    plt.legend(loc="best", frameon=False)
    plt.xlabel("epoch")
    cms_label(x1=0.18)
    #sample_label(ax, x=0.03, y=0.10)

In [None]:
p0 = loss_plot(history["loss"].values, history["val_loss"].values, margin=0.02)
plt.ylabel("Total loss")
plt.savefig("loss.pdf", bbox_inches="tight")

In [None]:
p0 = loss_plot(history["cls_loss"].values, history["val_cls_loss"].values, margin=0.05)
plt.ylabel("Multiclassification loss")
plt.savefig("cls_loss.pdf", bbox_inches="tight")

In [None]:
p0 = loss_plot(history["energy_loss"].values, history["val_energy_loss"].values, margin=0.01)
plt.ylabel("Energy loss")
plt.savefig("energy_loss.pdf", bbox_inches="tight")

In [None]:
p0 = loss_plot(history["pt_loss"].values, history["val_pt_loss"].values, margin=0.02)
plt.ylabel("$p_T$ loss")
plt.savefig("pt_loss.pdf", bbox_inches="tight")

In [None]:
p0 = loss_plot(history["sin_phi_loss"].values, history["val_sin_phi_loss"].values, margin=0.02)
plt.ylabel("$\sin \phi$ loss")
plt.savefig("sin_phi_loss.pdf", bbox_inches="tight")

In [None]:
p0 = loss_plot(history["cos_phi_loss"].values, history["val_cos_phi_loss"].values, margin=0.01)
plt.ylabel("$\cos \phi$ loss")
plt.savefig("cos_phi_loss.pdf", bbox_inches="tight")

In [None]:
p0 = loss_plot(history["eta_loss"].values, history["val_eta_loss"].values, margin=0.005)
plt.ylabel("$\eta$ loss")
plt.savefig("eta_loss.pdf", bbox_inches="tight")

In [None]:
p0 = loss_plot(history["charge_loss"].values, history["charge_loss"].values, margin=0.005)
plt.ylabel("charge loss")
plt.savefig("charge_loss.pdf", bbox_inches="tight")

In [None]:
for icls in range(1,8):
    fig = plt.figure()
    ax = plt.axes()
    msk = (ycand_f[:, 0] == icls)
    plt.hist(ypred_raw_f[msk & (X_f[:, 0] != 0), icls], bins=100, density=1, histtype="step", lw=2, color="blue", label="true "+pid_names[icls]);
    plt.hist(ypred_raw_f[~msk & (X_f[:, 0] != 0), icls], bins=100, density=1, histtype="step", lw=2, color="red", label="other particles");
    plt.yscale("log")
    plt.title("Particle reconstruction for {}".format(pid_names[icls]), y=1.05)
    plt.xlabel("Classification output {}".format(icls))
    plt.ylabel("Normalized number of particles [a.u.]")
    plt.legend(loc=2, frameon=False)
    plt.ylim(1e-2, 1e4)
    cms_label(x1=0.2, x2=0.6)
    plt.savefig("cls_output_{}.pdf".format(icls))

In [None]:
#perm = np.random.permutation(ycand_f[msk_X].shape[0])[:100000]

cm_norm = sklearn.metrics.confusion_matrix(
    ycand_f[msk_X_f, 0],
    ypred_id_f[msk_X_f],
    labels=range(0,8),
    normalize="true"
)

cm = sklearn.metrics.confusion_matrix(
    ycand_f[msk_X_f, 0],
    ypred_id_f[msk_X_f],
    labels=range(0,8),
)

In [None]:
plt.figure(figsize=(8, 8))
ax = plt.axes()
plt.imshow(cm_norm, cmap="Blues")
plt.colorbar()

cms_label(x1=0.18, x2=0.52, y=0.82)
#sample_label(ax, x=0.8, y=1.0)
plt.xticks(range(len(y_labels)), y_labels);
plt.yticks(range(len(y_labels)), y_labels);
plt.xlabel("Predicted PFCandidate")
plt.ylabel("True PFCandidate")
plt.title("MLPF trained on PF", y=1.03)
#plt.tight_layout()
plt.savefig("cm_normed.pdf", bbox_inches="tight")

In [None]:
plt.figure(figsize=(8, 8))
ax = plt.axes()
plt.imshow(cm, cmap="Blues")
plt.colorbar()

cms_label(x1=0.18, x2=0.52, y=0.82)
#sample_label(ax, x=0.8, y=1.0)
plt.xticks(range(len(y_labels)), y_labels);
plt.yticks(range(len(y_labels)), y_labels);
plt.xlabel("Predicted PFCandidate")
plt.ylabel("True PFCandidate")
plt.title("MLPF trained on PF", y=1.03)
plt.savefig("cm.pdf", bbox_inches="tight")

In [None]:
bins = {
    2: np.linspace(0,100,100),
    3: np.linspace(-8,8,100),
    4: np.linspace(-1,1,100),
    5: np.linspace(-1,1,100),
    6: np.linspace(0,500,100),
}

In [None]:
for icls in range(1,8):
    for ivar in range(2,7):
        plt.figure()
        ax = plt.axes()
        b = bins[ivar]
        #plt.hist(ygen_f[ygen_f[:, 0]==icls, ivar], bins=b, histtype="step", lw=2, label="gen");
        plt.hist(ycand_f[ycand_f[:, 0]==icls, ivar], bins=b, histtype="step", lw=2, label="PF");
        plt.hist(ypred_f[ypred_id_f==icls, ivar], bins=b, histtype="step", lw=2, label="MLPF");
        plt.yscale("log")
        plt.legend()
        plt.title(pid_names_long[icls], y=1.05)
        plt.xlabel(var_names[ivar])
        plt.ylabel("Number of particles")
        cms_label(x1=0.2, x2=0.6)
        plt.savefig("distribution_icls{}_ivar{}.pdf".format(icls, ivar))

In [None]:
def plot_particle_regression(ivar=6, icls=2, particle_label="Neutral hadrons", log=True, minval=-1, maxval=3, norm=matplotlib.colors.LogNorm()):
    plt.figure(figsize=(6,5))
    ax = plt.axes()
    
    
    bins = np.linspace(minval, maxval, 100)
    msk_both = (ypred_id_f == icls) & (ycand_f[:, 0]==icls)
    
    vals_true = ycand_f[msk_both, ivar]
    vals_pred = ypred_f[msk_both, ivar]
    
    if log:
        vals_true = np.log10(vals_true)
        vals_pred = np.log10(vals_pred)
    
    plt.hist2d(
        vals_true,
        vals_pred,
        bins=(bins, bins),
        cmap="Blues", norm=norm
    )
    
    plt.colorbar()
    plt.plot([minval, maxval], [minval, maxval], color="black", ls="--", lw=0.5)
    plt.xlim(minval, maxval)
    plt.ylim(minval, maxval)
    cms_label(x1=0.2, x2=0.48)
    plt.text(0.02, 0.95, particle_label, transform=ax.transAxes)
    ax.set_xticks(ax.get_yticks());

In [None]:
plot_particle_regression(ivar=6, icls=1, particle_label="Charged hadrons")
plt.xlabel("PFCandidate $\log_{10}$ E/GeV")
plt.ylabel("MLPFCandidate $\log_{10}$ E/GeV")
plt.savefig("energy_corr_cls1.pdf", bbox_inches="tight")

In [None]:
plot_particle_regression(ivar=6, icls=2, particle_label="Neutral hadrons")
plt.xlabel("PFCandidate $\log_{10}$ E/GeV")
plt.ylabel("MLPFCandidate $\log_{10}$ E/GeV")
plt.savefig("energy_corr_cls2.pdf", bbox_inches="tight")

In [None]:
plot_particle_regression(ivar=3, icls=1, particle_label="Charged hadrons", log=False, minval=-4, maxval=4, norm=None)
plt.xlabel("PFCandidate $\eta$")
plt.ylabel("MLPFCandidate $\eta$")
plt.savefig("eta_corr_cls1.pdf", bbox_inches="tight")

In [None]:
plot_particle_regression(ivar=3, icls=2, particle_label="Neutral hadrons", log=False, minval=-4, maxval=4, norm=None)
plt.xlabel("PFCandidate $\eta$")
plt.ylabel("MLPFCandidate $\eta$")
plt.savefig("eta_corr_cls2.pdf", bbox_inches="tight")

In [None]:
plot_particle_regression(ivar=6, icls=3, particle_label="HF", minval=0.0, maxval=4, norm=None)
plt.xlabel("PFCandidate $\log_{10}$ E/GeV")
plt.ylabel("MLPFCandidate $\log_{10}$ E/GeV")

In [None]:
plot_particle_regression(ivar=6, icls=4, particle_label="HF", minval=0.0, maxval=4, norm=None)
plt.xlabel("PFCandidate $\log_{10}$ E/GeV")
plt.ylabel("MLPFCandidate $\log_{10}$ E/GeV")

## Gen level

In [None]:
for icls in range(1,8):
    npred = np.sum(ypred_id == icls, axis=1)
    ncand = np.sum(ycand[:, :, 0] == icls, axis=1)
    ngen = np.sum(ygen[:, :, 0] == icls, axis=1)
    plt.figure(figsize=(6,6))
    plt.scatter(ngen, ncand, marker=".", alpha=0.5, label="PF")
    plt.scatter(ngen, npred, marker=".", alpha=0.5, label="MLPF")
    plt.legend(loc="best", frameon=False)
    a = 0.5*min(np.min(ngen), np.min(ngen))
    b = 2*max(np.max(ngen), np.max(ngen))
    plt.xlim(a,b)
    plt.ylim(a,b)
    plt.plot([a,b],[a,b], color="black", ls="--")
    plt.title(pid_names_long[icls],y=1.05)
    plt.xlabel("number of gen particles")
    plt.ylabel("number of PFCandidates")
    cms_label(x2=0.6, y=0.89)
#     plt.savefig("num_cls{}.pdf".format(icls))


In [None]:
bins = np.linspace(0,500,100)
mplhep.histplot(np.histogram(ygen_f[ygen_f[:, 0]==2, 6], bins=bins))
mplhep.histplot(np.histogram(ycand_f[ycand_f[:, 0]==2, 6], bins=bins))
mplhep.histplot(np.histogram(ypred_f[ypred_f[:, 0]==2, 6], bins=bins))
plt.yscale("log")

In [None]:
icls = 4
bins = np.linspace(-200,200,100)
particle_label = "neutral hadrons"

msk_cand = (ygen_f[:, 0]==icls) & (ycand_f[:, 0]==icls)
msk_pred = (ygen_f[:, 0]==icls) & (ypred_f[:, 0]==icls)

vals_gen1 = ygen_f[msk_cand, 6]
vals_gen2 = ygen_f[msk_pred, 6]
vals_cand = ycand_f[msk_cand, 6]
vals_pred = ypred_f[msk_pred, 6]

res_cand = vals_gen1 - vals_cand
res_pred = vals_gen2 - vals_pred

plt.figure(figsize=(5,5))
ax = plt.axes()
plt.hist(
    res_cand,
    bins=bins, histtype="step", lw=2,
    label="PF, $\mu={:.2f}, \sigma={:.2f}$".format(np.mean(res_cand), np.std(res_cand)));

plt.hist(res_pred,
    bins=bins,
    histtype="step", lw=2,
    label="MLPF, $\mu={:.2f}, \sigma={:.2f}$".format(np.mean(res_pred), np.std(res_pred))
);

plt.yscale("log")
plt.ylabel("Number of particles / bin")
cms_label(x1=0.21, x2=0.55)
plt.ylim(top=10**9)
plt.text(0.02, 0.95, particle_label, transform=ax.transAxes)
plt.xlabel("particle $E_{\mathrm{gen}} - E_{\mathrm{reco}}$ [GeV]")
plt.legend(frameon=False)