In [1]:
import os
import pandas as pd
import numpy as np
import awkward0
import uproot3_methods

In [2]:
import logging
logging.basicConfig(level=logging.DEBUG, format='[%(asctime)s] %(levelname)s: %(message)s')

In [3]:
cd ..

/home/rd804/discriminator-metric


In [4]:
def _transform(data, label, jet_size=0.8):
    from collections import OrderedDict
    v = OrderedDict()

    _e = data[...,0]
    _px = data[...,1]
    _py = data[...,2]
    _pz = data[...,3]
    
    mask = _e>0
    n_particles = np.sum(mask, axis=1)

    print('num particles: ',n_particles[0:20])

    px = awkward0.JaggedArray.fromcounts(n_particles, _px[mask])
    py = awkward0.JaggedArray.fromcounts(n_particles, _py[mask])
    pz = awkward0.JaggedArray.fromcounts(n_particles, _pz[mask])
    energy = awkward0.JaggedArray.fromcounts(n_particles, _e[mask])

    p4 = uproot3_methods.TLorentzVectorArray.from_cartesian(px, py, pz, energy)

    #print(p4)
    pt = p4.pt

    jet_p4 = p4.sum()

    # outputs
    _label = label
    v['label'] = np.stack((_label, 1-_label), axis=-1)
    #v['train_val_test'] = df['ttv'].values
    
    v['jet_pt'] = jet_p4.pt
    v['jet_eta'] = jet_p4.eta
    v['jet_phi'] = jet_p4.phi
    v['jet_mass'] = jet_p4.mass
    v['n_parts'] = n_particles

    v['part_px'] = px
    v['part_py'] = py
    v['part_pz'] = pz
    v['part_energy'] = energy

    v['part_pt_log'] = np.log(pt)
    v['part_ptrel'] = pt/v['jet_pt']
    v['part_logptrel'] = np.log(v['part_ptrel'])

    v['part_e_log'] = np.log(energy)
    v['part_erel'] = energy/jet_p4.energy
    v['part_logerel'] = np.log(v['part_erel'])

    v['part_raw_etarel'] = (p4.eta - v['jet_eta'])
    _jet_etasign = np.sign(v['jet_eta'])
    _jet_etasign[_jet_etasign==0] = 1
    v['part_etarel'] = v['part_raw_etarel'] * _jet_etasign

    v['part_phirel'] = p4.delta_phi(jet_p4)
    v['part_deltaR'] = np.hypot(v['part_etarel'], v['part_phirel'])

    def _make_image(var_img, rec, n_pixels = 64, img_ranges = [[-0.8, 0.8], [-0.8, 0.8]]):
        wgt = rec[var_img]
        x = rec['part_etarel']
        y = rec['part_phirel']
        img = np.zeros(shape=(len(wgt), n_pixels, n_pixels))
        for i in range(len(wgt)):
            hist2d, xedges, yedges = np.histogram2d(x[i], y[i], bins=[n_pixels, n_pixels], range=img_ranges, weights=wgt[i])
            img[i] = hist2d
        return img

#     v['img'] = _make_image('part_ptrel', v)

    return v

In [5]:
def convert(data,label, destdir, basename, step=None, limit=None):

    if not os.path.exists(destdir):
        os.makedirs(destdir)
    output = os.path.join(destdir, '%s.awkd'%(basename))
    logging.info(output)

    if os.path.exists(output):
        logging.warning('... file already exist: continue ...')
        return
    v=_transform(data,label)
    awkward0.save(output, v, mode='x')

In [6]:
srcDir = 'data'
destDir = 'data/converted'

In [7]:
# conver training file
split = ['train', 'val']
for s in split:
    particle_data = pd.read_hdf('data/jetnet_data.h5', f'particle_data_{s}').values.reshape(-1,150,4)
    labels = pd.read_hdf('data/jetnet_data.h5', f'labels_{s}').values
    convert(data=particle_data,label=labels, destdir=destDir, basename=f'{s}_file')

[2023-04-07 07:21:04,632] INFO: data/converted/train_file.awkd


num particles:  [74 57 81 59 33 67 79 47 48 59 71 81 56 73 45 66 90 79 45 71]
[[TLorentzVector(x=66.212, y=-6.9782, z=-196.68, t=207.64) TLorentzVector(x=66.005, y=3.5843, z=-202.21, t=212.74) TLorentzVector(x=44.929, y=1.5276, z=-137.48, t=144.64) ... TLorentzVector(x=0.36931, y=-0.012029, z=-1.2045, t=1.2599) TLorentzVector(x=0.3376, y=-0.0093053, z=-1.0557, t=1.1084) TLorentzVector(x=0.31922, y=-0.011996, z=-0.8985, t=0.9536)] [TLorentzVector(x=195.96, y=-1.5747, z=-133.89, t=237.34) TLorentzVector(x=178.72, y=-2.6738, z=-119.5, t=215.01) TLorentzVector(x=103, y=-0.38003, z=-70.825, t=125) ... TLorentzVector(x=0.50616, y=0.038721, z=-0.24888, t=0.56537) TLorentzVector(x=0.39013, y=0.093997, z=-0.21024, t=0.45303) TLorentzVector(x=0.25055, y=-0.026231, z=-0.22375, t=0.33694)] [TLorentzVector(x=153.53, y=-2.4244, z=39.582, t=158.57) TLorentzVector(x=74.888, y=-0.99518, z=19.112, t=77.295) TLorentzVector(x=72.385, y=-0.4772, z=18.806, t=74.789) ... TLorentzVector(x=0.325, y=0.011945, z

[2023-04-07 07:21:10,149] INFO: data/converted/val_file.awkd


num particles:  [60 79 41 65 82 45 52 28 57 26 52 40 74 66 81 66 69 36 86 51]
[[TLorentzVector(x=89.002, y=-1.6485, z=-93.778, t=129.3) TLorentzVector(x=85.674, y=2.6002, z=-91.634, t=125.47) TLorentzVector(x=66.671, y=1.5471, z=-69.023, t=95.978) ... TLorentzVector(x=1.0376, y=0.18184, z=-1.3726, t=1.7302) TLorentzVector(x=0.9546, y=0.03658, z=-0.83885, t=1.2713) TLorentzVector(x=0.37779, y=-0.0057411, z=-0.30439, t=0.48519)] [TLorentzVector(x=108.05, y=0.89472, z=-1.6208, t=108.06) TLorentzVector(x=87.408, y=0.94948, z=-2.5462, t=87.45) TLorentzVector(x=76.41, y=0.4913, z=-1.6939, t=76.43) ... TLorentzVector(x=0.51269, y=-0.093974, z=-0.059371, t=0.5246) TLorentzVector(x=0.35624, y=0.076259, z=0.074279, t=0.37181) TLorentzVector(x=0.25572, y=0.0024267, z=0.0028248, t=0.25574)] [TLorentzVector(x=286.69, y=2.3893, z=-587.24, t=653.49) TLorentzVector(x=196.91, y=1.6238, z=-407.25, t=452.36) TLorentzVector(x=96.547, y=0.25223, z=-196.3, t=218.76) ... TLorentzVector(x=0.46948, y=0.076203,

In [8]:
# conver validation file


In [9]:
# conver testing file
