In [None]:
import os
import vector
import mplhep
import numpy as np
import pandas as pd
import awkward as ak
import boost_histogram as bh
import matplotlib.pyplot as plt
import matplotlib as mpl
import mplhep
import gc
mplhep.style.use("CMS")

In [None]:
qq_test = ak.from_parquet(r"qq_train.parquet")
z_test = ak.from_parquet(r"z_train.parquet")
zh_test = ak.from_parquet(r"zh_train.parquet")

In [None]:
# Create 4-vectors from the information in the .parquet file test and access information such as energy or pT:

def to_p4(p4_obj):
    return vector.awk(
        ak.zip(
            {
                "mass": p4_obj.tau,
                "x": p4_obj.x,
                "y": p4_obj.y,
                "z": p4_obj.z,
            }
        )
    )
def my_to_ptphietaenergy(p4_obj):
    return vector.awk(
        ak.zip(
            {
                "pt": p4_obj.pt,
                "phi": p4_obj.phi,
                "eta": p4_obj.rapidity,
                "E": p4_obj.E
            }
        )
    )

def to_bh(test, bins, cumulative=False):
    h1 = bh.Histogram(bh.axis.Variable(bins))
    h1.fill(test)
    if cumulative:
        h1[:] = np.sum(h1.values()) - np.cumsum(h1)
    return h1

# ZH
zh_gen_tau_p4s = to_p4(zh_test.gen_jet_tau_p4s)
zh_reco_cand_p4s = to_p4(zh_test.reco_cand_p4s)
zh_reco_jet_p4s = to_p4(zh_test.reco_jet_p4s)

zh_test["4momentum"] = my_to_ptphietaenergy(zh_reco_cand_p4s)
# Z
z_gen_tau_p4s = to_p4(z_test.gen_jet_tau_p4s)
z_reco_cand_p4s = to_p4(z_test.reco_cand_p4s)
z_reco_jet_p4s = to_p4(z_test.reco_jet_p4s)
z_gen_tau_pt = z_gen_tau_p4s.pt
z_test["4momentum"] = my_to_ptphietaenergy(z_reco_cand_p4s)
#qq
qq_test_reco_cand_p4s = to_p4(qq_test["reco_cand_p4s"])
qq_test_reco_jet_p4s = to_p4(qq_test["reco_jet_p4s"])
qq_test["4momentum"] = my_to_ptphietaenergy(qq_test_reco_jet_p4s)

In [None]:
zh_test["part_energy"] = zh_reco_cand_p4s.E
zh_test["part_px"] = zh_reco_cand_p4s.x
zh_test["part_py"] = zh_reco_cand_p4s.y
zh_test["part_phi"] = zh_reco_cand_p4s.phi
zh_test["part_eta"] = zh_reco_cand_p4s.eta
zh_test["jet_pt"] = zh_reco_jet_p4s.pt
zh_test["jet_energy"] = zh_reco_cand_p4s.E
zh_test["jet_eta"] = zh_reco_jet_p4s.eta
zh_test["jet_phi"] = zh_reco_jet_p4s.phi
zh_test["part_deta"] = zh_reco_cand_p4s
zh_test["part_deta"] = zh_reco_cand_p4s

z_test["part_energy"] = z_reco_cand_p4s.E
z_test["part_px"] = z_reco_cand_p4s.x
z_test["part_py"] = z_reco_cand_p4s.y
z_test["jet_pt"] = z_reco_jet_p4s.pt
z_test["jet_energy"] = z_reco_jet_p4s.E
z_test["jet_eta"] = z_reco_jet_p4s.eta
z_test["jet_phi"] = z_reco_jet_p4s.phi
z_test["part_phi"] = z_reco_cand_p4s.phi
z_test["part_eta"] = z_reco_cand_p4s.eta

qq_test["part_energy"] = qq_test_reco_cand_p4s.E
qq_test["part_px"] = qq_test_reco_cand_p4s.x
qq_test["part_py"] = qq_test_reco_cand_p4s.y
qq_test["jet_pt"] = qq_test_reco_jet_p4s.pt
qq_test["jet_energy"] = qq_test_reco_jet_p4s.E
qq_test["jet_eta"] = qq_test_reco_jet_p4s.eta
qq_test["jet_phi"] = qq_test_reco_jet_p4s.phi
qq_test["part_phi"] = qq_test_reco_cand_p4s.phi
qq_test["part_eta"] = qq_test_reco_cand_p4s.eta

In [None]:
qq_test = qq_test[["part_energy","part_px","part_py","jet_pt","jet_energy","jet_eta","jet_phi","part_phi","part_eta"]]
zh_test = zh_test[["part_energy","part_px","part_py","jet_pt","jet_energy","jet_eta","jet_phi","part_phi","part_eta"]]
z_test = z_test[["part_energy","part_px","part_py","jet_pt","jet_energy","jet_eta","jet_phi","part_phi","part_eta"]]

In [None]:
zh_test["part_energy"] = ak.fill_none(ak.pad_none(zh_test["part_energy"], target=100,clip=True), 0)
zh_test["part_px"] = ak.fill_none(ak.pad_none(zh_test["part_px"], target=100,clip=True), 0)
zh_test["part_py"] = ak.fill_none(ak.pad_none(zh_test["part_py"], target=100,clip=True), 0)
zh_test["part_eta"] = ak.fill_none(ak.pad_none(zh_test["part_eta"], target=100,clip=True), 0)
zh_test["part_phi"] = ak.fill_none(ak.pad_none(zh_test["part_phi"], target=100,clip=True), 0)

z_test["part_energy"] = ak.fill_none(ak.pad_none(z_test["part_energy"], target=100,clip=True), 0)
z_test["part_px"] = ak.fill_none(ak.pad_none(z_test["part_px"], target=100,clip=True), 0)
z_test["part_py"] = ak.fill_none(ak.pad_none(z_test["part_py"], target=100,clip=True), 0)
z_test["part_eta"] = ak.fill_none(ak.pad_none(z_test["part_eta"], target=100,clip=True), 0)
z_test["part_phi"] = ak.fill_none(ak.pad_none(z_test["part_phi"], target=100,clip=True), 0)


qq_test["part_energy"] = ak.fill_none(ak.pad_none(qq_test["part_energy"], target=100,clip=True), 0)
qq_test["part_px"] = ak.fill_none(ak.pad_none(qq_test["part_px"], target=100,clip=True), 0)
qq_test["part_py"] = ak.fill_none(ak.pad_none(qq_test["part_py"], target=100,clip=True), 0)
qq_test["part_eta"] = ak.fill_none(ak.pad_none(qq_test["part_eta"], target=100,clip=True), 0)
qq_test["part_phi"] = ak.fill_none(ak.pad_none(qq_test["part_phi"], target=100,clip=True), 0)

zh_test["label"] = np.ones(len(zh_test["jet_pt"]))
z_test["label"] = np.ones(len(z_test["jet_pt"]))
qq_test["label"] = np.zeros(len(qq_test["jet_pt"]))
my_data = zh_test[["part_energy","part_px","part_py","jet_pt","jet_energy","label", "jet_eta", "jet_phi", "part_eta", "part_phi"]]
my_data_z = z_test[["part_energy","part_px","part_py","jet_pt","jet_energy","label", "jet_eta", "jet_phi", "part_eta", "part_phi"]]
my_data2 = qq_test[["part_energy","part_px","part_py","jet_pt","jet_energy","label", "jet_eta", "jet_phi", "part_eta", "part_phi"]]

In [None]:
chunk_size = 1000
total_elements = len(my_data_z)
with uproot.recreate("my_data_sample7.root") as root_file:
    root_file["tree"] = {"part_energy": my_data_z["part_energy"], "part_px" : my_data_z["part_px"],"part_py" : my_data_z["part_py"], "jet_pt": my_data_z["jet_pt"], "jet_energy":my_data_z["jet_energy"], "label":my_data_z["label"], "jet_eta":my_data_z["jet_phi"], "jet_phi":my_data_z["jet_phi"], "part_eta":my_data_z["part_eta"], "part_phi":my_data_z["part_phi"]}  # Write the first chunk (initialize the tree)