# Test 2: Analysis
Just test my analysis functions by using SVJ with CKKW-L and without decay data.

## 1. Import Packages

In [1]:
# The Python Standard Library
import os
import sys
import time
import datetime
import glob
import multiprocessing as mp

# The Third-Party Library
import math
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import tqdm
import prettytable
import uproot
import pyjet
import importlib

# My Packages
import myhep.particle_information_v2 as mypInfo_v2
import myhep.analytical_function_v2 as myaFun_v2
import myhep.analysis_v3 as myAnal_v3
# import myhep.particleinfo_v1 as mypiv1
# import myhep.particlefun_v1 as myafv1

# increase figure showing resolution
%config InlineBackend.figure_format = 'retina'

## 2. Import .root File and Load the Data via class

In [2]:
INPUT_FILE = '/youwei_u3/svj_data_master/scheme_1/root/ckkwl_wo.root'

DATA = uproot.open(INPUT_FILE)['Delphes;1']
GP = mypInfo_v2.classGenParticle(DATA)
Jet = mypInfo_v2.classJet(DATA)
Event = mypInfo_v2.classEvent(DATA)

### 2-1. Check the number of events for each branch

In [3]:
if GP.length == Jet.length == Event.length:
    print("There are {} events in the .root file.".format(GP.length))
else:
    print("""\033[7;31m****** There is the problem for the number of event \
    in the .root file. ******\033[0m""")
#     print("\033[7;31m****** There is the problem for the number of event "
#           "in the .root file. ******\033[0m")
    print("\033[7;31m****** Please check your .root file. ******\033[0m")

There are 19373 events in the .root file.


### 2-2. Define physical quantites

In [4]:
# 2-1. Invariant mass M
def M(m1, pt1, eta1, phi1, m2, pt2, eta2, phi2):
    px1, py1, pz1 = pt1*np.cos(phi1), pt1 * \
        np.sin(phi1), np.sqrt(m1**2+pt1**2)*np.sinh(eta1)
    e1 = np.sqrt(m1**2 + px1**2 + py1**2 + pz1**2)
    px2, py2, pz2 = pt2*np.cos(phi2), pt2 * \
        np.sin(phi2), np.sqrt(m2**2+pt2**2)*np.sinh(eta2)
    e2 = np.sqrt(m2**2 + px2**2 + py2**2 + pz2**2)
    return np.sqrt((e1+e2)**2 - (px1+px2)**2 - (py1+py2)**2 - (pz1+pz2)**2)


# 2-2. Transverse mass MT
def MT(m1, pt1, eta1, phi1, m2, pt2, eta2, phi2):
    px1, py1, pz1 = pt1*np.cos(phi1), pt1 * \
        np.sin(phi1), np.sqrt(m1**2+pt1**2)*np.sinh(eta1)
    e1 = np.sqrt(m1**2 + px1**2 + py1**2 + pz1**2)
    px2, py2, pz2 = pt2*np.cos(phi2), pt2 * \
        np.sin(phi2), np.sqrt(m2**2+pt2**2)*np.sinh(eta2)
    e2 = np.sqrt(m2**2 + px2**2 + py2**2 + pz2**2)
    ET1, ET2 = np.sqrt(m1**2 + pt1**2), np.sqrt(m2**2 + pt2**2)
    return np.sqrt((ET1+ET2)**2 - (px1+px2)**2 - (py1+py2)**2)


# 2-3. Transverse mass mT is invariant under Lorentz boost along the z direction.
def mT(m1, pt1, eta1, phi1, m2, pt2, eta2, phi2):
    px1, py1, pz1 = pt1*np.cos(phi1), pt1 * \
        np.sin(phi1), np.sqrt(m1**2+pt1**2)*np.sinh(eta1)
    e1 = np.sqrt(m1**2 + px1**2 + py1**2 + pz1**2)
    px2, py2, pz2 = pt2*np.cos(phi2), pt2 * \
        np.sin(phi2), np.sqrt(m2**2+pt2**2)*np.sinh(eta2)
    e2 = np.sqrt(m2**2 + px2**2 + py2**2 + pz2**2)
    return np.sqrt((e1+e2)**2 - (pz1+pz2)**2)


# 2-4. Transverse energy ET
def ET(m1, pt1, eta1, phi1, m2, pt2, eta2, phi2):
    px1, py1, pz1 = pt1*np.cos(phi1), pt1 * \
        np.sin(phi1), np.sqrt(m1**2+pt1**2)*np.sinh(eta1)
    e1 = np.sqrt(m1**2 + px1**2 + py1**2 + pz1**2)
    px2, py2, pz2 = pt2*np.cos(phi2), pt2 * \
        np.sin(phi2), np.sqrt(m2**2+pt2**2)*np.sinh(eta2)
    e2 = np.sqrt(m2**2 + px2**2 + py2**2 + pz2**2)
    m12 = np.sqrt((e1+e2)**2 - (px1+px2)**2 - (py1+py2)**2 - (pz1+pz2)**2)
    return np.sqrt(m12**2 + (px1+px2)**2 + (py1+py2)**2)

## 3. Analyze the Dark Quark Pair in the Parton and Truth Levels
This step can be skipped.

In [5]:
df_xdxdx_23_v3 = myAnal_v3.analyze_xdxdx(GP, status=23)
df_xdxdx_71_v3 = myAnal_v3.analyze_xdxdx(GP, status=71)

For status = 23, all events only include 2 particles.
For status = 71, all events only include 2 particles.


## 4. Jet Clustering

### 4-1. Select stable final state particle without/with filtering out dark sector

In [5]:
SFSP_v3, SFSP_filterDM_v3 = myAnal_v3.selectStableFinalStateParticle(
    GP, filter=[51, -51, 53, -53, 4900211, -4900211, 4900213, -4900213])

The PID of dark matter are [51, -51, 53, -53, 4900211, -4900211, 4900213, -4900213].
19373 events are stable final state.
19373 events are stable final state without DM.


### 4-2. Let's do the jet clustering!!

In [6]:
R, jetClusteringAlgorithm, pTmin_pyjet = 0.4, -1, 0

PseudoJet_v3 = myAnal_v3.jetClustering(SFSP_v3, R=R,
                                       p=jetClusteringAlgorithm,
                                       pTmin=pTmin_pyjet)
PseudoJet_filterDM_v3 = myAnal_v3.jetClustering(SFSP_filterDM_v3, R=R,
                                                p=jetClusteringAlgorithm,
                                                pTmin=pTmin_pyjet)
print('Done new version')

Done new version


## 5. Analyze the Jets in the Truth Level

### 5-1. Preselection

In [None]:
def preselection(PseudoJet, num_jet_min, pT_min, eta_max):
    """
    PseudoJet=[event_i(PseudoJet)] is a list to store all events.
    num_jet_min=mininal number of jets
    pT_min=minial pT, eta_mas=maximal eta
    GP=GenParticle, _=list, i=i-th event, df=dataframe,
    acc=accumulate, tem=temporary
    """
    _presel_events = []
    for i in range(len(PseudoJet)):
        print()
        for j, jet in enumerate(PseudoJet[i]):
            print()

## test important

In [12]:
PseudoJet_filterDM_v3[21][:6]

[PseudoJet(pt=754.721, eta=-0.269, phi=-0.352, mass=73.172),
 PseudoJet(pt=104.833, eta=-1.115, phi=1.143, mass=20.663),
 PseudoJet(pt=85.254, eta=0.918, phi=-2.531, mass=16.942),
 PseudoJet(pt=64.974, eta=-3.034, phi=1.609, mass=11.325),
 PseudoJet(pt=54.199, eta=0.230, phi=-2.593, mass=12.152),
 PseudoJet(pt=27.550, eta=-0.937, phi=0.655, mass=4.183)]

In [35]:
for i in range(21,23):
    print("event {}".format(i))
    for j, jet in enumerate(PseudoJet_filterDM_v3[i]):
        if jet.pt > 20:
            print(j, jet)
    print('-'*80)

event 21
0 PseudoJet(pt=754.721, eta=-0.269, phi=-0.352, mass=73.172)
1 PseudoJet(pt=104.833, eta=-1.115, phi=1.143, mass=20.663)
2 PseudoJet(pt=85.254, eta=0.918, phi=-2.531, mass=16.942)
3 PseudoJet(pt=64.974, eta=-3.034, phi=1.609, mass=11.325)
4 PseudoJet(pt=54.199, eta=0.230, phi=-2.593, mass=12.152)
5 PseudoJet(pt=27.550, eta=-0.937, phi=0.655, mass=4.183)
--------------------------------------------------------------------------------
event 22
0 PseudoJet(pt=135.815, eta=2.146, phi=-3.118, mass=28.584)
1 PseudoJet(pt=23.674, eta=2.146, phi=-2.674, mass=8.452)
2 PseudoJet(pt=23.255, eta=3.290, phi=0.439, mass=4.790)
--------------------------------------------------------------------------------


In [129]:
_presel_events_pt = []
_num_events_bef, _num_events_after_pt, _num_events_after_eta = [], [], []
_pt, _eta, _phi, m = [], [], [], []

for i in range(21,23):
    print("event {}".format(i))
    _num_events_bef.append(len(PseudoJet_filterDM_v3[i]))
    _event_i = []
    for j, jet in enumerate(PseudoJet_filterDM_v3[i]):
        if jet.pt > 20:
            print(j, jet)
            # print(j, jet, [jet.pt, jet.eta])  # [] = () in np.array()
            # print(j, jet, (jet.pt, jet.eta))
            # sum_test = np.sum(np.array([jet.pt, jet.eta, jet.phi, jet.mass]) -
                            #   np.array((jet.pt, jet.eta, jet.phi, jet.mass)))
            # print(sum_test)
            _event_i.append((jet.pt, jet.eta, jet.phi, jet.mass))
    print('*'*20 + 'NumPy Array')
    # print(_event_i)
    arr_event_i = np.array(_event_i, dtype=[('pT', '<f8'), ('eta', '<f8'),
                                            ('phi', '<f8'), ('mass', '<f8')])
    print(arr_event_i.shape, arr_event_i)
    _num_events_after_pt.append(len(arr_event_i))
    _presel_events_pt.append(arr_event_i)
    print('-'*80)
    
print("Number of events before preselection = {}".format(_num_events_bef))
print("Number of events after pT preselection = {}".format(_num_events_after_pt))
print("Number of events after eta preselection = {}".format(_num_events_after_eta))
print('-'*80)
_presel_events_pt[1]

event 21
0 PseudoJet(pt=754.721, eta=-0.269, phi=-0.352, mass=73.172)
1 PseudoJet(pt=104.833, eta=-1.115, phi=1.143, mass=20.663)
2 PseudoJet(pt=85.254, eta=0.918, phi=-2.531, mass=16.942)
3 PseudoJet(pt=64.974, eta=-3.034, phi=1.609, mass=11.325)
4 PseudoJet(pt=54.199, eta=0.230, phi=-2.593, mass=12.152)
5 PseudoJet(pt=27.550, eta=-0.937, phi=0.655, mass=4.183)
********************NumPy Array
(6,) [(754.72107953, -0.26936706, -0.35238845, 73.17201984)
 (104.83282762, -1.11482256,  1.14303952, 20.66261802)
 ( 85.25366785,  0.91831199, -2.53096755, 16.94153709)
 ( 64.97417129, -3.03388869,  1.60866641, 11.32521937)
 ( 54.1991487 ,  0.23044176, -2.59317889, 12.15247457)
 ( 27.54988231, -0.93685402,  0.65517858,  4.1833831 )]
--------------------------------------------------------------------------------
event 22
0 PseudoJet(pt=135.815, eta=2.146, phi=-3.118, mass=28.584)
1 PseudoJet(pt=23.674, eta=2.146, phi=-2.674, mass=8.452)
2 PseudoJet(pt=23.255, eta=3.290, phi=0.439, mass=4.790)
**

array([(135.81475624, 2.14646234, -3.11762065, 28.58375223),
       ( 23.67438139, 2.14635282, -2.67445185,  8.45196396),
       ( 23.25535698, 3.28953198,  0.43855045,  4.79000333)],
      dtype=[('pT', '<f8'), ('eta', '<f8'), ('phi', '<f8'), ('mass', '<f8')])

In [28]:
print(len(PseudoJet_filterDM_v3[21]))
print(len(PseudoJet_filterDM_v3[22]))

75
185


In [130]:
a = np.array([(1, 2), (3, 4), (5, 6)], dtype=[('x', 'i4'), ('y', 'i4')])
a

array([(1, 2), (3, 4), (5, 6)], dtype=[('x', '<i4'), ('y', '<i4')])

In [131]:
np.array(a)

array([(1, 2), (3, 4), (5, 6)], dtype=[('x', '<i4'), ('y', '<i4')])

### Example 1

In [17]:
print(len(PseudoJet_filterDM_v3))
print('-'*80)
for i in range(3):
    print("How many PseudoJet: {}".format(len(PseudoJet_filterDM_v3[i])))
    print(PseudoJet_filterDM_v3[i][:3])
    print('-'*80)

19373
--------------------------------------------------------------------------------
How many PseudoJet: 88
[PseudoJet(pt=144.818, eta=0.390, phi=-0.246, mass=29.794), PseudoJet(pt=32.644, eta=0.658, phi=-0.752, mass=8.309), PseudoJet(pt=12.051, eta=-0.031, phi=-0.040, mass=1.622)]
--------------------------------------------------------------------------------
How many PseudoJet: 154
[PseudoJet(pt=156.632, eta=-0.721, phi=1.453, mass=24.608), PseudoJet(pt=116.277, eta=-1.841, phi=-2.920, mass=21.577), PseudoJet(pt=100.767, eta=-2.305, phi=-2.087, mass=12.047)]
--------------------------------------------------------------------------------
How many PseudoJet: 163
[PseudoJet(pt=180.419, eta=-1.263, phi=2.764, mass=34.022), PseudoJet(pt=13.794, eta=0.471, phi=1.933, mass=2.814), PseudoJet(pt=6.642, eta=-4.642, phi=1.803, mass=1.408)]
--------------------------------------------------------------------------------


In [19]:
PseudoJet_filterDM_v3[21][:10]

[PseudoJet(pt=754.721, eta=-0.269, phi=-0.352, mass=73.172),
 PseudoJet(pt=104.833, eta=-1.115, phi=1.143, mass=20.663),
 PseudoJet(pt=85.254, eta=0.918, phi=-2.531, mass=16.942),
 PseudoJet(pt=64.974, eta=-3.034, phi=1.609, mass=11.325),
 PseudoJet(pt=54.199, eta=0.230, phi=-2.593, mass=12.152),
 PseudoJet(pt=27.550, eta=-0.937, phi=0.655, mass=4.183),
 PseudoJet(pt=19.967, eta=-0.716, phi=-2.708, mass=3.603),
 PseudoJet(pt=11.095, eta=-1.614, phi=-3.022, mass=2.247),
 PseudoJet(pt=9.129, eta=-0.150, phi=2.344, mass=1.500),
 PseudoJet(pt=6.189, eta=-0.029, phi=-0.764, mass=2.306)]

In [18]:
for i in range(21,23):
    print("event {}".format(i))
    for j, jet in enumerate(PseudoJet_filterDM_v3[i]):
        if jet.pt > 20:
            print(j, jet)
    print('-'*80)

event 21
0 PseudoJet(pt=754.721, eta=-0.269, phi=-0.352, mass=73.172)
1 PseudoJet(pt=104.833, eta=-1.115, phi=1.143, mass=20.663)
2 PseudoJet(pt=85.254, eta=0.918, phi=-2.531, mass=16.942)
3 PseudoJet(pt=64.974, eta=-3.034, phi=1.609, mass=11.325)
4 PseudoJet(pt=54.199, eta=0.230, phi=-2.593, mass=12.152)
5 PseudoJet(pt=27.550, eta=-0.937, phi=0.655, mass=4.183)
--------------------------------------------------------------------------------
event 22
0 PseudoJet(pt=135.815, eta=2.146, phi=-3.118, mass=28.584)
1 PseudoJet(pt=23.674, eta=2.146, phi=-2.674, mass=8.452)
2 PseudoJet(pt=23.255, eta=3.290, phi=0.439, mass=4.790)
--------------------------------------------------------------------------------


In [13]:
for j, jet in enumerate(PseudoJet_filterDM_v3[21]):
    if jet.pt > 20:
        print(j, jet)
        
        # |eta| < 1.0 or 2.5
        
        # N_jet
        
        # pt1 > 440
        # pt2 > 150
        
        # [arr(event0), arr(event1), ...]
        # event0 = [[pt], [eta], [phi], [mass]]
        # event0 = [(pt1, eta1, phi1, mass1), (pt2, eta2, ), ...] better

0 PseudoJet(pt=754.721, eta=-0.269, phi=-0.352, mass=73.172)
1 PseudoJet(pt=104.833, eta=-1.115, phi=1.143, mass=20.663)
2 PseudoJet(pt=85.254, eta=0.918, phi=-2.531, mass=16.942)
3 PseudoJet(pt=64.974, eta=-3.034, phi=1.609, mass=11.325)
4 PseudoJet(pt=54.199, eta=0.230, phi=-2.593, mass=12.152)
5 PseudoJet(pt=27.550, eta=-0.937, phi=0.655, mass=4.183)


In [11]:
for i in range(10):
    print(Jet.dataframelize(i))
    print('-'*80)

           Mass          PT       Eta       Phi
0  5.453704e+01  241.723770  0.370880 -0.282046
1 -9.536743e-07   26.896011 -1.431609  2.805379
--------------------------------------------------------------------------------
        Mass          PT       Eta       Phi
0   6.678744  177.236328 -0.139621 -0.447590
1  17.477238  118.888374 -0.711579  1.418930
2  19.143206  113.456596 -2.323572 -2.034082
3  20.718964  111.807510 -1.892479 -2.930634
--------------------------------------------------------------------------------
        Mass          PT       Eta       Phi
0  37.228954  188.137466 -1.268446  2.796516
1   3.147427   68.941101 -2.162593 -1.792415
2   1.937583   36.007828  1.437399  0.033540
3   5.678205   25.617086  2.068637 -0.297460
--------------------------------------------------------------------------------
        Mass          PT       Eta       Phi
0  20.538086  132.913483  1.756381 -0.577489
-------------------------------------------------------------------------

## Test 1: Constituents

In [52]:
print(dir(PseudoJet_filterDM_v3))
print('-'*80)
print(dir(PseudoJet_filterDM_v3[21]))
print('-'*80)
print(dir(PseudoJet_filterDM_v3[21][0]))

['__add__', '__class__', '__contains__', '__delattr__', '__delitem__', '__dir__', '__doc__', '__eq__', '__format__', '__ge__', '__getattribute__', '__getitem__', '__gt__', '__hash__', '__iadd__', '__imul__', '__init__', '__init_subclass__', '__iter__', '__le__', '__len__', '__lt__', '__mul__', '__ne__', '__new__', '__reduce__', '__reduce_ex__', '__repr__', '__reversed__', '__rmul__', '__setattr__', '__setitem__', '__sizeof__', '__str__', '__subclasshook__', 'append', 'clear', 'copy', 'count', 'extend', 'index', 'insert', 'pop', 'remove', 'reverse', 'sort']
--------------------------------------------------------------------------------
['__add__', '__class__', '__contains__', '__delattr__', '__delitem__', '__dir__', '__doc__', '__eq__', '__format__', '__ge__', '__getattribute__', '__getitem__', '__gt__', '__hash__', '__iadd__', '__imul__', '__init__', '__init_subclass__', '__iter__', '__le__', '__len__', '__lt__', '__mul__', '__ne__', '__new__', '__reduce__', '__reduce_ex__', '__repr__

In [53]:
print(PseudoJet_filterDM_v3[21][5])
print('-'*80)
PseudoJet_filterDM_v3[21][5].constituents_array()

PseudoJet(pt=27.550, eta=-0.937, phi=0.655, mass=4.183)
--------------------------------------------------------------------------------


array([(0.99460959, -1.1102134 , 0.67404842,  1.39569998e-01),
       (6.0019908 , -1.01715386, 0.54768401,  9.39570010e-01),
       (0.98741752, -0.776838  , 0.57899618,  1.39569998e-01),
       (1.61963058, -0.94204098, 0.55713117,  0.00000000e+00),
       (4.14160728, -0.93889606, 0.60915875, -8.42936970e-08),
       (7.85252857, -0.89518613, 0.74074847,  1.39569998e-01),
       (6.04252958, -0.88757044, 0.71782219,  9.38269973e-01)],
      dtype=[('pT', '<f8'), ('eta', '<f8'), ('phi', '<f8'), ('mass', '<f8')])

In [56]:
aa = PseudoJet_filterDM_v3[21][5].constituents_array()
aa['pT']

array([0.99460959, 6.0019908 , 0.98741752, 1.61963058, 4.14160728,
       7.85252857, 6.04252958])

In [64]:
for j in range(6):
    print(len(PseudoJet_filterDM_v3[21][j]))

87
26
33
21
23
7


In [69]:
a = PseudoJet_filterDM_v3[21][4].constituents_array()
b = PseudoJet_filterDM_v3[21][5].constituents_array()
print(a.shape)
print(b.shape)

(23,)
(7,)


In [80]:
c = np.array([a, b])
c[1]

array([(0.99460959, -1.1102134 , 0.67404842,  1.39569998e-01),
       (6.0019908 , -1.01715386, 0.54768401,  9.39570010e-01),
       (0.98741752, -0.776838  , 0.57899618,  1.39569998e-01),
       (1.61963058, -0.94204098, 0.55713117,  0.00000000e+00),
       (4.14160728, -0.93889606, 0.60915875, -8.42936970e-08),
       (7.85252857, -0.89518613, 0.74074847,  1.39569998e-01),
       (6.04252958, -0.88757044, 0.71782219,  9.38269973e-01)],
      dtype=[('pT', '<f8'), ('eta', '<f8'), ('phi', '<f8'), ('mass', '<f8')])

In [81]:
b

array([(0.99460959, -1.1102134 , 0.67404842,  1.39569998e-01),
       (6.0019908 , -1.01715386, 0.54768401,  9.39570010e-01),
       (0.98741752, -0.776838  , 0.57899618,  1.39569998e-01),
       (1.61963058, -0.94204098, 0.55713117,  0.00000000e+00),
       (4.14160728, -0.93889606, 0.60915875, -8.42936970e-08),
       (7.85252857, -0.89518613, 0.74074847,  1.39569998e-01),
       (6.04252958, -0.88757044, 0.71782219,  9.38269973e-01)],
      dtype=[('pT', '<f8'), ('eta', '<f8'), ('phi', '<f8'), ('mass', '<f8')])