In [3]:
from typing import List, Tuple
from pyhepmc import GenEvent, GenParticle, ReaderAsciiHepMC2, FourVector
import numpy as np

MOMENTUM_ORDER = True

def calculate_trans_momentum(electron):
    """Function to calculate transverse momentum of a GenParticle Object"""
    px = electron.momentum.px
    py = electron.momentum.py
    return np.sqrt(px**2 + py**2)

def get_z_decay_electrons(event):
    electrons = []
    for p in event.particles:
        if p.pid == 23:
            vz = p.end_vertex
            if vz is None:
                continue
            for dau in vz.particles_out:
                if dau.status == 1 and abs(dau.pid) == 11:
                    electrons.append(dau)
    return electrons

def get_final_state_electrons(event):
    electrons = []
    for p in event.particles:
        if abs(p.pid) == 11 and p.status == 1:
            electrons.append(p)
    return electrons

def get_final_state_particles(event):
    particles = []
    for p in event.particles:
        if p.status == 1:
            particles.append(p)
    return particles


def extract_electron_momenta(event, num_particles):
    z_electrons = get_z_decay_electrons(event)
    other_electrons = get_final_state_electrons(event)
    final_state_particles = get_final_state_particles(event)

    # Sort each category by transverse momentum
    sorted_z_electrons = sorted(z_electrons, key=lambda x: calculate_trans_momentum(x), reverse=True)
    sorted_other_electrons = sorted(other_electrons, key=lambda x: calculate_trans_momentum(x), reverse=True)
    sorted_final_state_particles = sorted(final_state_particles, key=lambda x: calculate_trans_momentum(x), reverse=True)

    # Deduplicate while preserving order
    unique_particles = {}
    for particle in (sorted_z_electrons + sorted_other_electrons + sorted_final_state_particles):
        if id(particle) not in unique_particles:
            unique_particles[id(particle)] = particle

    # Convert to list
    particles = list(unique_particles.values())

    if len(particles) < num_particles:
        for _ in range(num_particles - len(particles)):
            dummy = GenParticle(FourVector(0, 0, 0, 0), 0, 1)  # pid=0, status=1 (dummy)
            particles.append(dummy)

    return particles[:num_particles]




def extract_electron_momenta_from_hepmc2(input_path, num_particles = 20):
    momenta_list = []
    for paths in input_path:
        with ReaderAsciiHepMC2(paths) as reader:
            while not reader.failed():
                evt = GenEvent()
                reader.read_event(evt)
                if reader.failed():
                    break
                try:
                    mom_pair = extract_electron_momenta(evt, num_particles)
                    momenta_list.append(mom_pair)
                except:
                    continue
    return momenta_list


def write_momenta_to_npz(momenta, output_path):
    # Define column names
    names = []
    for i in range(len(momenta[0])):
        names.extend( [f"px{i}",f"py{i}",f"pz{i}",f"e{i}"])
    # Convert momenta into a NumPy array
    print(len(names))

    data = np.array([
        [[p1.momentum.px, p1.momentum.py, p1.momentum.pz, p1.momentum.e] for p1 in events]
        for events in momenta
    ])
    print(data.reshape(-1, 80).shape)
    # Save to NPZ with both data and names
    np.savez(output_path, data=data.reshape(-1,80), names=names)



  from pyhepmc import GenEvent, GenParticle, ReaderAsciiHepMC2, FourVector


In [None]:
momenta = extract_electron_momenta_from_hepmc2([
    f"../DATA/HEPMCfiles/HEPMC.43646139._0000{str(i).zfill(2)}.hepmc" for i in range(1, 11)
])

In [None]:
write_momenta_to_npz(momenta, "../DATA/MomentumOrdered100-000Events20ParticlesFlattened.npz")

200
(100000, 200)
