In [24]:
import pylhe
import numpy as np
import matplotlib.pyplot as plt
import awkward as ak
import pandas as pd

In [25]:
lhe_file = "/users/eleves-b/2022/baptiste.barthe-gold/Documents/Comput_HEP/MG5_aMC_v2_9_22/VBF-SM/Events/run_01/unweighted_events.lhe.gz"
events = pylhe.read_lhe_with_attributes(lhe_file)
nevents = pylhe.read_num_events(lhe_file)
print(f"Number of events: {nevents}")

Number of events: 10000


In [26]:
def extract_4m(events):
    cross_section_weight, quarks_4m, leptons_4m = [], [], []

    for event in events:
        # select only final state particles
        part = [i for i in event.particles if i.status == 1.0]

        # select only quarks up to charm
        quarks = [i for i in part if abs(i.id) <= 4]

        # select only leptons
        leptons = [i for i in part if abs(i.id) in [11, 13, 15]]

        # sanity check, we expect quarks and leptons to be a list of two elements
        assert len(quarks) == 2, f"Length of selected quarks is not 2: {len(quarks)}"
        assert len(leptons) == 2, f"Length of selected leptons is not 2: {len(leptons)}"

        quarks_4m.append(
            [
                {
                    "px": quarks[0].px,
                    "py": quarks[0].py,
                    "pz": quarks[0].pz,
                    "e": quarks[0].e,
                },
                {
                    "px": quarks[1].px,
                    "py": quarks[1].py,
                    "pz": quarks[1].pz,
                    "e": quarks[1].e,
                },
            ]
        )

        leptons_4m.append(
            [
                {
                    "px": leptons[0].px,
                    "py": leptons[0].py,
                    "pz": leptons[0].pz,
                    "e": leptons[0].e,
                },
                {
                    "px": leptons[1].px,
                    "py": leptons[1].py,
                    "pz": leptons[1].pz,
                    "e": leptons[1].e,
                },
            ]
        )

        cross_section_weight.append(event.eventinfo.weight)

    quarks_4m = ak.Array(quarks_4m, with_name="Momentum4D")
    leptons_4m = ak.Array(leptons_4m, with_name="Momentum4D")

    return quarks_4m, leptons_4m, cross_section_weight

In [27]:
quarks_4m, leptons_4m, cross_section_weight = extract_4m(events)

In [28]:
dataset = pd.DataFrame(
    {
        "m_ll": (leptons_4m[:, 0] + leptons_4m[:, 1]).mass,
        "m_jj": (quarks_4m[:, 0] + quarks_4m[:, 1]).mass,
        "pt_l1": leptons_4m[:, 0].pt,
        "pt_l2": leptons_4m[:, 1].pt,
        "pt_j1": quarks_4m[:, 0].pt,
        "pt_j2": quarks_4m[:, 1].pt,
        "pt_ll": (leptons_4m[:, 0] + leptons_4m[:, 1]).pt,
        "eta_l1": leptons_4m[:, 0].eta,
        "eta_l2": leptons_4m[:, 1].eta,
        "eta_j1": quarks_4m[:, 0].eta,
        "eta_j2": quarks_4m[:, 1].eta,
        "delta_eta_jj": quarks_4m[:, 0].eta - quarks_4m[:, 1].eta,
        "delta_phi_jj": quarks_4m[:, 0].phi - quarks_4m[:, 1].phi,
        "cross_section_weight": cross_section_weight,
    }
)

In [29]:
dataset.head()

Unnamed: 0,m_ll,m_jj,pt_l1,pt_l2,pt_j1,pt_j2,pt_ll,eta_l1,eta_l2,eta_j1,eta_j2,delta_eta_jj,delta_phi_jj,cross_section_weight
0,91.63521,667.72278,76.915838,70.168638,79.861451,114.119794,116.217132,1.229191,1.006156,3.575737,-0.300728,3.876465,-1.900115,0.525908
1,96.278517,2420.868329,122.669099,33.745498,208.670392,330.81819,145.454612,0.829505,1.967168,2.528491,-1.890051,4.418542,-2.839869,0.525908
2,93.136842,1354.262005,24.359742,101.941891,94.105585,64.554857,85.31315,-0.216607,-0.199792,-1.59326,4.113683,-5.706943,2.063296,0.525908
3,92.805405,732.812606,90.881102,22.378353,233.302256,268.59272,104.814634,1.747504,0.111492,2.229235,0.346242,1.882993,2.744728,0.525908
4,89.975393,540.147829,68.022582,49.693719,172.316059,118.034701,93.485945,0.126138,1.033327,-0.842621,1.687083,-2.529704,2.601356,0.525908


In [30]:
dataset.to_csv("./data/SM_10k.csv", index=False)