# Open data processing: step 1/n

Take Ed's processed data files and split the PFCs by their ids.

PDG codes (from https://cms-opendata-workshop.github.io/workshop2023-lesson-advobjects/02-particleflow/index.html):

- 11, 13 = electron, muon
- 22 = photon
- 130 = neutral hadron
- 211 = charged hadron
- 1 = hadronic particle reconstructed in the forward calorimeters
- 2 = electromagnetic particle reconstructed in the forward calorimeters

We will split as:
- Muons (2 / event)
- Hadrons + photons, to be routed to fastjet for clustering
- Electrons, to trash

In [1]:
import h5py
import numpy as np
import matplotlib.pyplot as plt
import vector

In [2]:
selected_data_dir = '/global/cfs/cdirs/m3246/ewitkowski/selected_data'
selected_pT = h5py.File(f'{selected_data_dir}/recoPFCandidates_particleFlow__RECO_obj_pt_.h5', 'r')['values']
selected_eta = h5py.File(f'{selected_data_dir}/recoPFCandidates_particleFlow__RECO_obj_eta_.h5', 'r')['values']
selected_phi = h5py.File(f'{selected_data_dir}/recoPFCandidates_particleFlow__RECO_obj_phi_.h5', 'r')['values']
selected_pdgId = h5py.File(f'{selected_data_dir}/recoPFCandidates_particleFlow__RECO_obj_pdgId_.h5', 'r')['values']

In [3]:
outfile_dimuons = "/global/u1/r/rmastand/dimuonAD/data_post_fj/muons_only_od.dat"
outfile_hadrons = "/global/u1/r/rmastand/dimuonAD/data_pre_fj/hadrons_only_od.dat"

Muons file:  pt eta phi M

Hadrons file: px py pz E

In [4]:
n_events = 1000

muon_mass = 0.1056583755 # GeV
particles_to_fastjet = [22, 130, 211, 1, 2]

ofile_muons = open(outfile_dimuons, "w")
ofile_hadrons = open(outfile_hadrons, "w")
   
for event in range(n_events):
    
    if event % 100 == 0:
        print(f"On event {event} of {n_events}...")
        
    # get the nonzero entries
    loc_pid = selected_pdgId[event]
    loc_pt = selected_pT[event][loc_pid != 0]
    loc_eta = selected_eta[event][loc_pid != 0]
    loc_phi = selected_phi[event][loc_pid != 0]
    loc_pid = loc_pid[loc_pid != 0]    
    num_particles = len(loc_pid)
    #print(f"Event {event} has {num_particles} particles.")
    
    ofile_muons.write("#BEGIN\n")
    ofile_hadrons.write("#BEGIN\n")

    # get the muons
    muon_ids = np.where(np.abs(loc_pid) == 13)[0]
    # construct the muon 4-vector
    mu_1 = vector.obj(pt = loc_pt[muon_ids[0]], eta = loc_eta[muon_ids[0]], phi = loc_phi[muon_ids[0]], M = muon_mass)
    mu_2 = vector.obj(pt = loc_pt[muon_ids[1]], eta = loc_eta[muon_ids[1]], phi = loc_phi[muon_ids[1]], M = muon_mass)
    dimu_system = mu_1 + mu_2
    
    ofile_muons.write(f"{dimu_system.pt} {dimu_system.eta} {dimu_system.phi} {dimu_system.M}\n")

    # get the hadrons
    for particle_i in range(len(loc_pt)):
        if np.abs(loc_pid[particle_i]) in particles_to_fastjet:
            particle_vector = vector.obj(pt = loc_pt[particle_i], eta = loc_eta[particle_i], phi = loc_phi[particle_i], M = 0)
            ofile_hadrons.write(f"{particle_vector.px} {particle_vector.py} {particle_vector.pz} {particle_vector.E}\n")
    
    ofile_muons.write("#END\n")
    ofile_hadrons.write("#END\n")

    
ofile_muons.close()  
ofile_hadrons.close()
     
print("Done!")
                                         

On event 0 of 1000...
On event 100 of 1000...
On event 200 of 1000...
On event 300 of 1000...
On event 400 of 1000...
On event 500 of 1000...
On event 600 of 1000...
On event 700 of 1000...
On event 800 of 1000...
On event 900 of 1000...
Done!
