# Test 9: Analysis Script Version 1
Test my analysis `analysis_script_v1.py` by using SVJ with CKKW-L and without decay data.

## 1. Import Packages

In [1]:
# The Python Standard Library
import os
import sys
import time
import datetime
import glob
import multiprocessing as mp

# The Third-Party Library
import math
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import tqdm
import prettytable
import uproot
import pyjet
import importlib

# My Packages
import myhep.particle_information_v2 as mypInfo_v2
import myhep.analytical_function_v2 as myaFun_v2
import myhep.analysis_v3 as myAnal_v3
# import myhep.particleinfo_v1 as mypiv1
# import myhep.particlefun_v1 as myafv1

# increase figure showing resolution
%config InlineBackend.figure_format = 'retina'

## 2. Import .root File and Load the Data via class
Skip:  
2-1. Check the number of events for each branch  
2-2. Define mass quantities

In [2]:
INPUT_FILE = '/youwei_u3/svj_data_master/scheme_1/root/ckkwl_wo.root'

DATA = uproot.open(INPUT_FILE)['Delphes;1']
GP = mypInfo_v2.classGenParticle(DATA)
Jet = mypInfo_v2.classJet(DATA)
Event = mypInfo_v2.classEvent(DATA)

## 3. Analyze the Dark Quark Pair in the Parton and Truth Levels

In [3]:
df_xdxdx_23_v3 = myAnal_v3.analyze_xdxdx(GP, status=23)
df_xdxdx_71_v3 = myAnal_v3.analyze_xdxdx(GP, status=71)

For status = 23, all events only include 2 particles.
For status = 71, all events only include 2 particles.


## 4. Jet Clustering

### 4-1. Select stable final state particles without/with filtering out dark sector

In [4]:
SFSP_v3, SFSP_filterDM_v3 = myAnal_v3.selectStableFinalStateParticle(
    GP, filter=[51, -51, 53, -53, 4900211, -4900211, 4900213, -4900213])

The PID of dark matter are [51, -51, 53, -53, 4900211, -4900211, 4900213, -4900213].
19373 events are stable final state.
19373 events are stable final state without DM.


### 4-2. Let's do the jet clustering!!

In [5]:
R, jetClusteringAlgorithm, pTmin_pyjet = 0.4, -1, 0

PseudoJet_v3 = myAnal_v3.jetClustering_v1(SFSP_v3, R=R,
                                       p=jetClusteringAlgorithm,
                                       pTmin=pTmin_pyjet)
PseudoJet_filterDM_v3 = myAnal_v3.jetClustering_v1(SFSP_filterDM_v3, R=R,
                                                p=jetClusteringAlgorithm,
                                                pTmin=pTmin_pyjet)
print('Done new version')

Done new version


## 5. Analyze the Jet in the Truth Level

### 5-1. Preselection version 1

In [6]:
presel_bef, presel_pt, presel_pt_eta, presel_idx = myAnal_v3.preselection_v1(PseudoJet_filterDM_v3, pT_min=20, eta_max=2.5)

19373 events before preselection
19373 events after pT preselection
19373 events after pT & eta preselections
--------------------------------------------------------------------------------
0 events without PseudoJet before preselection
357 events without PseudoJet after pT preselection
519 events without PseudoJet after pT & eta preselections


### 5-2. MET

In [7]:
arr_MET, df_MET = myAnal_v3.MET_visParticles_v1(SFSP_filterDM_v3)

19373 events in MET data.


### 5-3. Analyze truth jet with scheme 1 and version 2

In [8]:
N_jet_v2, jj_v2, jjj_v2 = myAnal_v3.analyze_truthJet_scheme1_v2(presel_pt_eta)

19373 events in the array of number of jets.
16752 selected events and 10 observables in dijet.
12771 selected events and 10 observables in trijet.


### 5-4. Analyze truth jet and MET with scheme 1 and version 1

In [9]:
df_jj_MET, df_jjj_MET = myAnal_v3.analyze_truthJet_MET_scheme1_v1(presel_pt_eta, arr_MET)

## 6. Event Weight

In [10]:
# method 1
weight_1 = np.array(Event.Weight)

# method 2
_weight = []
for i in range(Event.length):
    _weight.append(Event.Weight[i][0])

weight_2 = np.array(_weight)
weight_2

array([2.7949634e-14, 2.7949634e-14, 4.1366087e-14, ..., 3.5438452e-14,
       2.7949634e-14, 2.7949634e-14], dtype=float32)

## 7. Check Output Data
I want to check my output data from analysis script is consistent with these.

### 7-1. Prepare output data

In [37]:
path_data = '/youwei_u3/svj_data_master/scheme_1/analysis_script_v1/'

status23 = pd.read_csv(path_data + 'status23_rinv3_Lambdad5_ckkwl_wo.csv')
status71 = pd.read_csv(path_data + 'status71_rinv3_Lambdad5_ckkwl_wo.csv')
MET = pd.read_csv(path_data + 'met_rinv3_Lambdad5_ckkwl_wo.csv')
N_jet = pd.read_csv(path_data + 'n_jet_rinv3_Lambdad5_ckkwl_wo.csv')
jj = pd.read_csv(path_data + 'jj_rinv3_Lambdad5_ckkwl_wo.csv')
jjj = pd.read_csv(path_data + 'jjj_rinv3_Lambdad5_ckkwl_wo.csv')
jj_MET = pd.read_csv(path_data + 'jj_met_rinv3_Lambdad5_ckkwl_wo.csv')
jjj_MET = pd.read_csv(path_data + 'jjj_met_rinv3_Lambdad5_ckkwl_wo.csv')
weight = np.load(path_data + 'weight_rinv3_Lambdad5_ckkwl_wo.npz', allow_pickle=True)

In [41]:
# check status = 23 & 71
check_23 = np.sum(status23.to_numpy() - df_xdxdx_23_v3.to_numpy())
check_71 = np.sum(status71.to_numpy() - df_xdxdx_71_v3.to_numpy())
# check MET and N_jet
check_met = np.sum(MET.to_numpy() - df_MET.to_numpy())
check_n_jet = np.sum(N_jet.to_numpy() - N_jet_v2.to_numpy())
# check jj and jjj
check_jj = np.sum(jj.to_numpy() - jj_v2.to_numpy())
check_jjj = np.sum(jjj.to_numpy() - jjj_v2.to_numpy())
# check jj and jjj with MET
check_jj_met = np.sum(jj_MET.to_numpy() - df_jj_MET.to_numpy())
check_jjj_met = np.sum(jjj_MET.to_numpy() - df_jjj_MET.to_numpy())
# check weight
check_weight_1 = np.sum(weight['weight_1'] - weight_1)
check_weight_2 = np.sum(weight['weight_2'] - weight_2)

print(f"Status = 23: {check_23}")
print(f"Status = 71: {check_71}")
print(f"MET        : {check_met}")
print(f"N_jet      : {check_n_jet}")
print(f"jj         : {check_jj}")
print(f"jjj        : {check_jjj}")
print(f"jj & MET   : {check_jj_met}")
print(f"jjj & MET  : {check_jjj_met}")
print(f"weight 1   : {check_weight_1}")
print(f"weight 2   : {check_weight_2}")

Status = 23: 4.302854866709187e-11
Status = 71: 4.963466696840651e-11
MET        : 7.382606235050089e-12
N_jet      : 0
jj         : 6.712607531454695e-11
jjj        : 6.183584169065868e-11
jj & MET   : 2.0752002855239883e-11
jjj & MET  : -3.544679395112941e-12
weight 1   : [0.]
weight 2   : 0.0


##### Conclusion: I think the output data is very close to these data.

In [46]:
df_MET.dtypes

MET     float64
phi     float64
METx    float64
METy    float64
dtype: object

In [47]:
MET.dtypes

MET     float64
phi     float64
METx    float64
METy    float64
dtype: object