In [16]:
import pylhe
import numpy as np
import matplotlib.pyplot as plt
import awkward as ak
import pandas as pd

In [17]:
lhe_file = "/users/eleves-b/2022/baptiste.barthe-gold/Documents/Comput_HEP/MG5_aMC_v2_9_22/VBF-cHDD-1/Events/run_01/unweighted_events.lhe.gz"
events = pylhe.read_lhe_with_attributes(lhe_file)
nevents = pylhe.read_num_events(lhe_file)
print(f"Number of events: {nevents}")

Number of events: 10000


In [18]:
def extract_4m(events):
    cross_section_weight, quarks_4m, leptons_4m = [], [], []

    for event in events:
        # select only final state particles
        part = [i for i in event.particles if i.status == 1.0]

        # select only quarks up to charm
        quarks = [i for i in part if abs(i.id) <= 4]

        # select only leptons
        leptons = [i for i in part if abs(i.id) in [11, 13, 15]]

        # sanity check, we expect quarks and leptons to be a list of two elements
        assert len(quarks) == 2, f"Length of selected quarks is not 2: {len(quarks)}"
        assert len(leptons) == 2, f"Length of selected leptons is not 2: {len(leptons)}"

        quarks_4m.append(
            [
                {
                    "px": quarks[0].px,
                    "py": quarks[0].py,
                    "pz": quarks[0].pz,
                    "e": quarks[0].e,
                },
                {
                    "px": quarks[1].px,
                    "py": quarks[1].py,
                    "pz": quarks[1].pz,
                    "e": quarks[1].e,
                },
            ]
        )

        leptons_4m.append(
            [
                {
                    "px": leptons[0].px,
                    "py": leptons[0].py,
                    "pz": leptons[0].pz,
                    "e": leptons[0].e,
                },
                {
                    "px": leptons[1].px,
                    "py": leptons[1].py,
                    "pz": leptons[1].pz,
                    "e": leptons[1].e,
                },
            ]
        )

        cross_section_weight.append(event.eventinfo.weight)

    quarks_4m = ak.Array(quarks_4m, with_name="Momentum4D")
    leptons_4m = ak.Array(leptons_4m, with_name="Momentum4D")

    return quarks_4m, leptons_4m, cross_section_weight

In [19]:
quarks_4m, leptons_4m, cross_section_weight = extract_4m(events)

In [20]:
dataset = pd.DataFrame(
    {
        "m_ll": (leptons_4m[:, 0] + leptons_4m[:, 1]).mass,
        "m_jj": (quarks_4m[:, 0] + quarks_4m[:, 1]).mass,
        "pt_l1": leptons_4m[:, 0].pt,
        "pt_l2": leptons_4m[:, 1].pt,
        "pt_j1": quarks_4m[:, 0].pt,
        "pt_j2": quarks_4m[:, 1].pt,
        "pt_ll": (leptons_4m[:, 0] + leptons_4m[:, 1]).pt,
        "eta_l1": leptons_4m[:, 0].eta,
        "eta_l2": leptons_4m[:, 1].eta,
        "eta_j1": quarks_4m[:, 0].eta,
        "eta_j2": quarks_4m[:, 1].eta,
        "delta_eta_jj": quarks_4m[:, 0].eta - quarks_4m[:, 1].eta,
        "delta_phi_jj": quarks_4m[:, 0].phi - quarks_4m[:, 1].phi,
        "cross_section_weight": cross_section_weight,
    }
)

In [21]:
dataset.head()

Unnamed: 0,m_ll,m_jj,pt_l1,pt_l2,pt_j1,pt_j2,pt_ll,eta_l1,eta_l2,eta_j1,eta_j2,delta_eta_jj,delta_phi_jj,cross_section_weight
0,97.301623,943.361573,108.569643,20.778174,75.50676,44.107703,90.782239,0.795077,1.442213,3.308059,-2.280637,5.588697,1.481401,0.10225
1,90.418983,2289.163457,54.811763,69.057975,68.944994,83.776263,87.922188,-0.366088,0.016954,-2.883546,3.926106,-6.809652,-1.928219,0.10225
2,88.374317,443.932024,65.597483,83.688548,59.132934,118.467958,136.112027,-0.798966,0.035543,1.513928,-1.826767,3.340696,1.499711,-0.10225
3,91.905279,509.985711,109.58603,44.059127,166.110295,250.280191,140.166951,-1.009533,-0.07946,-1.132555,0.33232,-1.464875,2.584731,-0.10225
4,63.578634,1238.938688,15.466064,116.071958,32.671507,123.273635,115.495962,0.05907,-0.150732,3.753815,-2.187382,5.941197,-1.94235,-0.10225


In [22]:
dataset.to_csv("./data/cHDD_1_10k.csv", index=False)