# Test 12: Jet
Test `jet` function which collects the information of the jet for each event by using SVJ with CKKW-L and without decay data.

## 1. Import Packages

In [1]:
# The Python Standard Library
import os
import sys
import time
import datetime
import glob
import multiprocessing as mp

# The Third-Party Library
import math
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import tqdm
import prettytable
import uproot
import pyjet
import importlib

# My Packages
import myhep.particle_information_v2 as mypInfo_v2
import myhep.analytical_function_v2 as myaFun_v2
import myhep.analysis_v3 as myAnal_v3
# import myhep.particleinfo_v1 as mypiv1
# import myhep.particlefun_v1 as myafv1

# increase figure showing resolution
%config InlineBackend.figure_format = 'retina'

## 2. Import .root File and Load the Data via class
Skip:  
2-1. Check the number of events for each branch  
2-2. Define mass quantities

In [2]:
INPUT_FILE = '/youwei_u3/svj_data_master/scheme_1/root/ckkwl_wo.root'

DATA = uproot.open(INPUT_FILE)['Delphes;1']
GP = mypInfo_v2.classGenParticle(DATA)
Jet = mypInfo_v2.classJet(DATA)
Event = mypInfo_v2.classEvent(DATA)

## 3. Analyze the Dark Sector in the Parton and Truth Levels
Skip

## 4. Jet Clustering

### 4-1. Select stable final state particles without/with filtering out dark sector

In [3]:
SFSP, SFSP_filterDM = myAnal_v3.selectStableFinalStateParticle(
    GP, filter=[51, -51, 53, -53, 4900211, -4900211, 4900213, -4900213,
                4900101, -4900101, 4900021,
                12, -12, 14, -14, 16, -16])

The PID of dark matter are [51, -51, 53, -53, 4900211, -4900211, 4900213, -4900213, 4900101, -4900101, 4900021, 12, -12, 14, -14, 16, -16].
19373 events are stable final state.
19373 events are stable final state without DM.


### 4-2. Let's do the jet clustering!!

In [4]:
R, JetClusteringAlgorithm, pTmin_pyjet = 0.4, -1, 0

PseudoJet = myAnal_v3.jetClustering_v1(SFSP, R=R,
                                       p=JetClusteringAlgorithm,
                                       pTmin=pTmin_pyjet)
PseudoJet_filterDM = myAnal_v3.jetClustering_v1(SFSP_filterDM, R=R,
                                                p=JetClusteringAlgorithm,
                                                pTmin=pTmin_pyjet)
print('Done')

Done


## 5. Analyze the Jet in the Truth Level

### 5-1. Preselection version 1

In [5]:
presel_bef, presel_pt, presel_pt_eta, presel_idx = myAnal_v3.preselection_v1(PseudoJet_filterDM, pT_min=20, eta_max=2.5)

19373 events before preselection
19373 events after pT preselection
19373 events after pT & eta preselections
--------------------------------------------------------------------------------
0 events without PseudoJet before preselection
364 events without PseudoJet after pT preselection
526 events without PseudoJet after pT & eta preselections


### 5-2. MET

In [6]:
arr_MET, df_MET = myAnal_v3.MET_visParticles_v1(SFSP_filterDM)

19373 events in MET data.


### 5-3. Jet

In [7]:
df_jet = myAnal_v3.jet(presel_pt_eta)

19373 events and 4 leading jet states (pT, eta, phi, mass).


In [8]:
df_jet.head()

Unnamed: 0,N_jet,pT_1,pT_2,pT_3,pT_4,eta_1,eta_2,eta_3,eta_4,phi_1,phi_2,phi_3,phi_4,mass_1,mass_2,mass_3,mass_4
0,2.0,87.399442,64.213808,-999.0,-999.0,0.187685,0.613261,-999.0,-999.0,-0.162607,-0.410417,-999.0,-999.0,13.970557,11.678316,-999.0,-999.0
1,3.0,156.631723,116.276794,100.766976,-999.0,-0.720677,-1.840887,-2.304753,-999.0,1.45317,-2.920251,-2.086857,-999.0,24.607911,21.576661,12.046796,-999.0
2,1.0,180.418744,-999.0,-999.0,-999.0,-1.262694,-999.0,-999.0,-999.0,2.763893,-999.0,-999.0,-999.0,34.021953,-999.0,-999.0,-999.0
3,1.0,123.361312,-999.0,-999.0,-999.0,1.777785,-999.0,-999.0,-999.0,-0.543866,-999.0,-999.0,-999.0,23.160264,-999.0,-999.0,-999.0
4,4.0,163.660012,72.199409,33.798631,25.914431,-0.501624,1.195995,1.194867,-0.849276,-2.365578,-2.805764,0.61146,1.376696,33.096091,15.648594,8.263577,8.521142


##### Conclusion: Well Done!!

In [9]:
# !!! test 'jet' function !!!
# 
_jet = []
for i in range(3):
    # preselData = presel_pt_eta[i]
    n_jet = presel_pt_eta[i].shape[0]
    pt, eta = presel_pt_eta[i]['pT'], presel_pt_eta[i]['eta']
    phi, mass = presel_pt_eta[i]['phi'], presel_pt_eta[i]['mass']
    j = np.array([n_jet])
    print(f'event {i}: N_jet = {n_jet}, pT = {pt}, jet = {j}')
    if n_jet < 4:
        diff = 4 - n_jet
        arr_n999 = np.full(diff, -999, dtype=np.float64)
        print(f'-999: {arr_n999}')
        pt, eta = np.concatenate((pt, arr_n999), axis=None), np.concatenate((eta, arr_n999), axis=None)
        phi, mass = np.concatenate((phi, arr_n999), axis=None), np.concatenate((mass, arr_n999), axis=None)
    print(f'event {i}: N_jet = {pt.shape[0]}, pT = {pt}')
    print(f'                    eta = {eta}')
    print(f'                    phi = {phi}')
    print(f'                    mass = {mass}')
    j = np.concatenate((j, pt[:4], eta[:4], phi[:4], mass[:4]), axis=None)
    print(f'* {j.shape}, {j.dtype}')
    # print(f'* {j.shape}, j = {j}')
    _jet.append(j)
    print('-'*80)
print('-'*80)
arr_jet, stack_jet = np.array(_jet), np.stack(_jet, axis=0)
print(f'shape = {arr_jet.shape}, {stack_jet.shape} and dtype = {arr_jet.dtype}, {stack_jet.dtype}')
print(np.sum(stack_jet - arr_jet))
stack_jet

event 0: N_jet = 2, pT = [87.39944212 64.21380757], jet = [2]
-999: [-999. -999.]
event 0: N_jet = 4, pT = [  87.39944212   64.21380757 -999.         -999.        ]
                    eta = [ 1.87685428e-01  6.13261450e-01 -9.99000000e+02 -9.99000000e+02]
                    phi = [-1.62607177e-01 -4.10416869e-01 -9.99000000e+02 -9.99000000e+02]
                    mass = [  13.97055667   11.67831571 -999.         -999.        ]
* (17,), float64
--------------------------------------------------------------------------------
event 1: N_jet = 3, pT = [156.63172343 116.2767939  100.76697551], jet = [3]
-999: [-999.]
event 1: N_jet = 4, pT = [ 156.63172343  116.2767939   100.76697551 -999.        ]
                    eta = [-7.20677177e-01 -1.84088715e+00 -2.30475287e+00 -9.99000000e+02]
                    phi = [   1.45317025   -2.92025066   -2.08685743 -999.        ]
                    mass = [  24.60791147   21.576661     12.04679642 -999.        ]
* (17,), float64
----------------

array([[ 2.00000000e+00,  8.73994421e+01,  6.42138076e+01,
        -9.99000000e+02, -9.99000000e+02,  1.87685428e-01,
         6.13261450e-01, -9.99000000e+02, -9.99000000e+02,
        -1.62607177e-01, -4.10416869e-01, -9.99000000e+02,
        -9.99000000e+02,  1.39705567e+01,  1.16783157e+01,
        -9.99000000e+02, -9.99000000e+02],
       [ 3.00000000e+00,  1.56631723e+02,  1.16276794e+02,
         1.00766976e+02, -9.99000000e+02, -7.20677177e-01,
        -1.84088715e+00, -2.30475287e+00, -9.99000000e+02,
         1.45317025e+00, -2.92025066e+00, -2.08685743e+00,
        -9.99000000e+02,  2.46079115e+01,  2.15766610e+01,
         1.20467964e+01, -9.99000000e+02],
       [ 1.00000000e+00,  1.80418744e+02, -9.99000000e+02,
        -9.99000000e+02, -9.99000000e+02, -1.26269412e+00,
        -9.99000000e+02, -9.99000000e+02, -9.99000000e+02,
         2.76389283e+00, -9.99000000e+02, -9.99000000e+02,
        -9.99000000e+02,  3.40219529e+01, -9.99000000e+02,
        -9.99000000e+02, -9.9

## Test

### Test A: basic

In [10]:
print(presel_pt_eta[0].dtype)
presel_pt_eta[0]

[('pT', '<f8'), ('eta', '<f8'), ('phi', '<f8'), ('mass', '<f8')]


array([(87.39944212, 0.18768543, -0.16260718, 13.97055667),
       (64.21380757, 0.61326145, -0.41041687, 11.67831571)],
      dtype=[('pT', '<f8'), ('eta', '<f8'), ('phi', '<f8'), ('mass', '<f8')])

In [11]:
print(presel_pt_eta[0]['pT'].dtype)
presel_pt_eta[0]['pT']

float64


array([87.39944212, 64.21380757])

In [12]:
for i in range(3):
    print(f'event {i}: {presel_pt_eta[i]}')
    print(f"pT = {presel_pt_eta[i]['pT']}")
    print('-'*80)

event 0: [(87.39944212, 0.18768543, -0.16260718, 13.97055667)
 (64.21380757, 0.61326145, -0.41041687, 11.67831571)]
pT = [87.39944212 64.21380757]
--------------------------------------------------------------------------------
event 1: [(156.63172343, -0.72067718,  1.45317025, 24.60791147)
 (116.2767939 , -1.84088715, -2.92025066, 21.576661  )
 (100.76697551, -2.30475287, -2.08685743, 12.04679642)]
pT = [156.63172343 116.2767939  100.76697551]
--------------------------------------------------------------------------------
event 2: [(180.41874406, -1.26269412, 2.76389283, 34.02195291)]
pT = [180.41874406]
--------------------------------------------------------------------------------


### Test B: supply -999 element

In [13]:
# supply -999 element
a = np.array([1, 2, 3])
if a.shape[0] < 4:
    diff = 4 - a.shape[0]
    print(f'n_jet = {a.shape[0]}, difference = {diff}')
    print(np.full(diff, -999))
else:
    print(f'>= 4, n_jet = {a.shape[0]}')

n_jet = 3, difference = 1
[-999]


### Test C: stack all events

In [14]:
# stack all events
_jet = []
tuple_jet = () # it is not good
a, b = np.array([1]), np.array([10])
for i, element in enumerate([2, 3, 6, 7]):
    a, b = np.append(a, [element]), np.append(b, [10 * element + element])
    pt, eta = a, b
    n_pt = pt.shape[0]
    j = np.array([n_pt])
    print(f'event {i}: pT = {pt}, jet = {j}')
    if n_pt < 4:
        diff = 4 - n_pt
        arr_n999 = np.full(diff, -999)
        print(f'         n_jet = {n_pt}, difference = {diff}, -999: {arr_n999}')
        pt, eta = np.concatenate((pt, arr_n999), axis=None), np.concatenate((eta, arr_n999), axis=None)
    else:
        print(f'         >= 4, n_jet = {n_pt}')
    print(f'event {i}: n_jet = {pt.shape[0]}, pT = {pt}')
    print(f'                    eta = {eta}')
    j = np.concatenate((j, pt[:4], eta[:4]), axis=None)
    print(f'* {j.shape}, j = {j}')
    # method 1
    _jet.append(j)
    print('-'*80)
print('-'*80)
# _jet
arr_jet = np.array(_jet)
stack_jet = np.stack(_jet, axis=0)
print(arr_jet.shape, '\n', arr_jet)
print('-'*20)
print(stack_jet.shape)
print(np.sum(stack_jet - arr_jet))
stack_jet

event 0: pT = [1 2], jet = [2]
         n_jet = 2, difference = 2, -999: [-999 -999]
event 0: n_jet = 4, pT = [   1    2 -999 -999]
                    eta = [  10   22 -999 -999]
* (9,), j = [   2    1    2 -999 -999   10   22 -999 -999]
--------------------------------------------------------------------------------
event 1: pT = [1 2 3], jet = [3]
         n_jet = 3, difference = 1, -999: [-999]
event 1: n_jet = 4, pT = [   1    2    3 -999]
                    eta = [  10   22   33 -999]
* (9,), j = [   3    1    2    3 -999   10   22   33 -999]
--------------------------------------------------------------------------------
event 2: pT = [1 2 3 6], jet = [4]
         >= 4, n_jet = 4
event 2: n_jet = 4, pT = [1 2 3 6]
                    eta = [10 22 33 66]
* (9,), j = [ 4  1  2  3  6 10 22 33 66]
--------------------------------------------------------------------------------
event 3: pT = [1 2 3 6 7], jet = [5]
         >= 4, n_jet = 5
event 3: n_jet = 5, pT = [1 2 3 6 7]
       

array([[   2,    1,    2, -999, -999,   10,   22, -999, -999],
       [   3,    1,    2,    3, -999,   10,   22,   33, -999],
       [   4,    1,    2,    3,    6,   10,   22,   33,   66],
       [   5,    1,    2,    3,    6,   10,   22,   33,   66]])

### Test D: `np.full()`, `np.append()`, and `np.concatenate()`

In [15]:
arr_2 = np.full(3, 2)
print(arr_2.dtype)
print('-'*10)
arr_2 = np.full(3, 2, dtype=np.float64)
print(arr_2.dtype)
arr_2

int64
----------
float64


array([2., 2., 2.])

In [16]:
arr_n999 = np.full(2, -999)
print(arr_n999.dtype)
print('-'*10)
arr_n999 = np.full(2, -999, dtype=np.float64)
print(arr_n999.dtype)
arr_n999

int64
----------
float64


array([-999., -999.])

In [17]:
test_2 = np.append(arr_2, arr_n999)
print(test_2.dtype)
test_2

float64


array([   2.,    2.,    2., -999., -999.])

In [18]:
arr_2 = np.concatenate((arr_2, arr_n999), axis=None)
print(arr_2.dtype)
arr_2

float64


array([   2.,    2.,    2., -999., -999.])

In [19]:
arr_2 - test_2

array([0., 0., 0., 0., 0.])

In [20]:
a = np.array([1, 2, 3, 4, 5, 6, 7])
b = np.array([10, 20, 30, 40, 50])
c = np.concatenate((a, b), axis=None)
print(c)
c = np.concatenate((a[:3], b[:3]), axis=None)
c

[ 1  2  3  4  5  6  7 10 20 30 40 50]


array([ 1,  2,  3, 10, 20, 30])

In [21]:
j = np.array([7])
print(j)
j = np.concatenate((j, a, b, [666, 777, 999]), axis=None)
j

[7]


array([  7,   1,   2,   3,   4,   5,   6,   7,  10,  20,  30,  40,  50,
       666, 777, 999])

In [22]:
j = np.array([7])
j = np.concatenate((j, a[:3], b[:3]), axis=None)
j

array([ 7,  1,  2,  3, 10, 20, 30])

In [23]:
a = np.array([[1, 2], [3, 4]])
b = np.array([[5, 6]])
np.concatenate((a, b), axis=None)

array([1, 2, 3, 4, 5, 6])

In [24]:
np.concatenate([a, b], axis=None)

array([1, 2, 3, 4, 5, 6])

### Test E: `np.stack()`

In [25]:
# e.g. 1
# (10, 3, 4)
arrays = [np.random.randn(3, 4) for _ in range(10)]
arrays

[array([[-1.06498209, -1.49447888, -1.3553921 , -0.31098244],
        [-1.30258974, -0.23585204, -2.0972719 ,  0.35200334],
        [ 0.11850903,  0.66874395, -0.54599126, -2.41146124]]),
 array([[ 1.44305741,  1.67982091, -0.29185494,  0.02475435],
        [-0.88814561,  1.21941564,  1.41365588,  0.02583602],
        [-0.73708567,  0.70675679,  0.18425647,  0.19178133]]),
 array([[ 1.37749279,  0.7049367 , -3.1253836 ,  1.27233151],
        [-0.60077458,  0.12436186, -0.22180637, -2.55025993],
        [ 0.75736952, -0.48365095,  0.86938705,  0.65831046]]),
 array([[ 0.14406546,  1.02639708,  1.17638724, -1.19035951],
        [ 0.991366  ,  1.64976282,  0.71659513,  0.07171392],
        [ 2.63102516, -0.6590316 ,  0.48972937, -0.94914274]]),
 array([[-0.44137199,  0.03666197,  1.38362802,  1.00984036],
        [-1.65167208, -1.18855268,  0.19789649,  0.6788938 ],
        [ 0.11524954, -1.38618203,  2.04003767,  0.70357228]]),
 array([[-2.61435798, -0.20264783,  1.39145632,  2.17915716]

In [26]:
print(len(arrays))
arrays[0].shape

10


(3, 4)

In [27]:
print(np.stack(arrays, axis=0).shape)
np.stack(arrays, axis=0)

(10, 3, 4)


array([[[-1.06498209, -1.49447888, -1.3553921 , -0.31098244],
        [-1.30258974, -0.23585204, -2.0972719 ,  0.35200334],
        [ 0.11850903,  0.66874395, -0.54599126, -2.41146124]],

       [[ 1.44305741,  1.67982091, -0.29185494,  0.02475435],
        [-0.88814561,  1.21941564,  1.41365588,  0.02583602],
        [-0.73708567,  0.70675679,  0.18425647,  0.19178133]],

       [[ 1.37749279,  0.7049367 , -3.1253836 ,  1.27233151],
        [-0.60077458,  0.12436186, -0.22180637, -2.55025993],
        [ 0.75736952, -0.48365095,  0.86938705,  0.65831046]],

       [[ 0.14406546,  1.02639708,  1.17638724, -1.19035951],
        [ 0.991366  ,  1.64976282,  0.71659513,  0.07171392],
        [ 2.63102516, -0.6590316 ,  0.48972937, -0.94914274]],

       [[-0.44137199,  0.03666197,  1.38362802,  1.00984036],
        [-1.65167208, -1.18855268,  0.19789649,  0.6788938 ],
        [ 0.11524954, -1.38618203,  2.04003767,  0.70357228]],

       [[-2.61435798, -0.20264783,  1.39145632,  2.17915716]

In [28]:
np.stack(arrays, axis=1).shape

(3, 10, 4)

In [29]:
# e.g. 2
# (2, 3, 4)
arrays = [np.random.randn(3, 4) for _ in range(2)]
arrays

[array([[ 0.26718293, -0.33681506, -1.91419064,  0.09036509],
        [-0.86505785,  1.48820901, -0.28372001,  0.66288244],
        [-1.85230413,  1.06270229, -0.25973698,  0.70267019]]),
 array([[ 0.16608498, -0.41896287,  1.17984553, -2.54996969],
        [-0.28984423, -0.78277653, -0.43734091, -1.97747208],
        [ 0.37111532, -0.08255713, -0.03765176, -0.91391429]])]

In [30]:
print(len(arrays))
arrays[0].shape

2


(3, 4)

In [31]:
print(np.stack(arrays, axis=0).shape)
arrays_0 = np.stack(arrays, axis=0)
arrays_0

(2, 3, 4)


array([[[ 0.26718293, -0.33681506, -1.91419064,  0.09036509],
        [-0.86505785,  1.48820901, -0.28372001,  0.66288244],
        [-1.85230413,  1.06270229, -0.25973698,  0.70267019]],

       [[ 0.16608498, -0.41896287,  1.17984553, -2.54996969],
        [-0.28984423, -0.78277653, -0.43734091, -1.97747208],
        [ 0.37111532, -0.08255713, -0.03765176, -0.91391429]]])

In [32]:
print(np.stack(arrays, axis=1).shape)
arrays_1 = np.stack(arrays, axis=1)
arrays_1

(3, 2, 4)


array([[[ 0.26718293, -0.33681506, -1.91419064,  0.09036509],
        [ 0.16608498, -0.41896287,  1.17984553, -2.54996969]],

       [[-0.86505785,  1.48820901, -0.28372001,  0.66288244],
        [-0.28984423, -0.78277653, -0.43734091, -1.97747208]],

       [[-1.85230413,  1.06270229, -0.25973698,  0.70267019],
        [ 0.37111532, -0.08255713, -0.03765176, -0.91391429]]])

In [33]:
print(np.stack(arrays, axis=2).shape)
arrays_2 = np.stack(arrays, axis=2)
arrays_2

(3, 4, 2)


array([[[ 0.26718293,  0.16608498],
        [-0.33681506, -0.41896287],
        [-1.91419064,  1.17984553],
        [ 0.09036509, -2.54996969]],

       [[-0.86505785, -0.28984423],
        [ 1.48820901, -0.78277653],
        [-0.28372001, -0.43734091],
        [ 0.66288244, -1.97747208]],

       [[-1.85230413,  0.37111532],
        [ 1.06270229, -0.08255713],
        [-0.25973698, -0.03765176],
        [ 0.70267019, -0.91391429]]])

In [34]:
print(arrays[0] - arrays_0[0])
arrays[1] - arrays_0[1]

[[0. 0. 0. 0.]
 [0. 0. 0. 0.]
 [0. 0. 0. 0.]]


array([[0., 0., 0., 0.],
       [0., 0., 0., 0.],
       [0., 0., 0., 0.]])

In [35]:
np.array(arrays)
np.array(arrays) - arrays_0

array([[[0., 0., 0., 0.],
        [0., 0., 0., 0.],
        [0., 0., 0., 0.]],

       [[0., 0., 0., 0.],
        [0., 0., 0., 0.],
        [0., 0., 0., 0.]]])

In [36]:
# e.g. 3
a = np.array([1, 2, 3])
b = np.array([4, 5, 6])
c = np.stack((a, b))
c

array([[1, 2, 3],
       [4, 5, 6]])

In [37]:
cc = np.stack([a, b])
cc

array([[1, 2, 3],
       [4, 5, 6]])

In [38]:
ccc = np.array([a, b])
ccc

array([[1, 2, 3],
       [4, 5, 6]])

In [39]:
print(c - cc)
print(np.sum(c - ccc))
cc - ccc

[[0 0 0]
 [0 0 0]]
0


array([[0, 0, 0],
       [0, 0, 0]])