In [None]:
import energyflow as ef
import h5py
import matplotlib as mpl
import matplotlib.pyplot as plt
import numpy as np
from cycler import cycler

In [None]:
# define plot style
mpl.rcParams["axes.prop_cycle"] = cycler(
    color=[
        "#B6BFC3",
        "#3B515B",
        "#0271BB",
        "#E2001A",
    ]
)
mpl.rcParams["font.size"] = 15
mpl.rcParams["patch.linewidth"] = 1.25

In [None]:
path = "/beegfs/desy/user/ewencedr/data/lhco/final_data/processed_data_background_rel.h5"
with h5py.File(path, "r") as f:
    jets = f["jet_data"][:]

In [None]:
p4_jets = ef.p4s_from_ptyphims(jets)

In [None]:
# get mjj from p4_jets
pj_x = np.sqrt(np.sum(p4_jets[:, 0] ** 2, axis=1))
pj_y = np.sqrt(np.sum(p4_jets[:, 1] ** 2, axis=1))
mjj = (pj_x + pj_y) ** 2
print(mjj.shape)

In [None]:
from sklearn.neighbors import KernelDensity

kde_model = KernelDensity(kernel="gaussian", bandwidth=0.01)
kde_model.fit(mjj.reshape(-1, 1))

samples = kde_model.sample(len(mjj))

In [None]:
hist = plt.hist(
    mjj, bins=np.arange(0.005e8, 1.8e8, 0.005e8), histtype="stepfilled", label="Truth", alpha=0.5
)
plt.hist(samples, bins=hist[1], histtype="step", label="KDE samples")
plt.xlabel("mjj")
plt.legend(frameon=False)
plt.yscale("log")
plt.show()