# Test 8: Event Weight
Test my event weight by using SVJ with CKKW-L and without decay data.

## 1. Import Packages

In [1]:
# The Python Standard Library
import os
import sys
import time
import datetime
import glob
import multiprocessing as mp

# The Third-Party Library
import math
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import tqdm
import prettytable
import uproot
import pyjet
import importlib

# My Packages
import myhep.particle_information_v2 as mypInfo_v2
import myhep.analytical_function_v2 as myaFun_v2
import myhep.analysis_v3 as myAnal_v3
# import myhep.particleinfo_v1 as mypiv1
# import myhep.particlefun_v1 as myafv1

# increase figure showing resolution
%config InlineBackend.figure_format = 'retina'

## 2. Import .root File and Load the Data via class
Skip:  
2-1. Check the number of events for each branch  
2-2. Define mass quantities

In [2]:
INPUT_FILE = '/youwei_u3/svj_data_master/scheme_1/root/ckkwl_wo.root'

DATA = uproot.open(INPUT_FILE)['Delphes;1']
GP = mypInfo_v2.classGenParticle(DATA)
Jet = mypInfo_v2.classJet(DATA)
Event = mypInfo_v2.classEvent(DATA)

## 3. Analyze the Dark Quark Pair in the Parton and Truth Levels
Skip

## 4. Jet Clustering
Skip

## 5. Analyze the Jet and MET in the Truth Level
Skip

## 6. Event Weight

In [3]:
Event

<myhep.particle_information_v2.classEvent at 0x7f6130d0be80>

In [4]:
Event.dataframelize(0)

Unnamed: 0,Weight
0,2.794963e-14


In [5]:
print(Event.length)
Event.Weight

19373


<JaggedArray [[2.7949634e-14] [2.7949634e-14] [4.1366087e-14] ... [3.5438452e-14] [2.7949634e-14] [2.7949634e-14]] at 0x7f6130d0b910>

In [6]:
print(Event.Weight[0])
Event.Weight[0]

[2.7949634e-14]


array([2.7949634e-14], dtype=float32)

In [7]:
print(Event.Weight[0].dtype)
Event.Weight[0].dtype

float32


dtype('float32')

In [8]:
test = 0
for i in range(Event.length):
    if Event.Weight[i].dtype != 'float32':
        test += 1
        print(i)

if test == 0:
    print("All events are float32.")
else:
    print("{} events are not float32.".format(test))

All events are float32.


##### Conclusion: All events are `float32`.

### 6-1. Method 1
Old way: Directly apply `np.array()` into `Event.Weight`

In [9]:
weight_1 = np.array(Event.Weight)
print(weight_1)
print('-'*80)
weight_1

[array([2.7949634e-14], dtype=float32)
 array([2.7949634e-14], dtype=float32)
 array([4.1366087e-14], dtype=float32) ...
 array([3.5438452e-14], dtype=float32)
 array([2.7949634e-14], dtype=float32)
 array([2.7949634e-14], dtype=float32)]
--------------------------------------------------------------------------------


array([array([2.7949634e-14], dtype=float32),
       array([2.7949634e-14], dtype=float32),
       array([4.1366087e-14], dtype=float32), ...,
       array([3.5438452e-14], dtype=float32),
       array([2.7949634e-14], dtype=float32),
       array([2.7949634e-14], dtype=float32)], dtype=object)

##### Conclusion 1: There are many `np.array(float32)` in `np.array(object)`.

In [10]:
weight_1.dtype

dtype('O')

In [11]:
weight_1[0].dtype

dtype('float32')

In [12]:
print(np.sum(weight_1)[0])
np.sum(weight_1)

5.7769295e-10


array([5.7769295e-10], dtype=float32)

##### Conclusion 2: It doesn't need to divide by number of events.

### 6-2. Method 2
New way: Iterate all elements into `np.array()`

In [13]:
_weight = []
for i in range(Event.length):
    _weight.append(Event.Weight[i][0])

# print(_weight)
weight_2 = np.array(_weight)
weight_2

array([2.7949634e-14, 2.7949634e-14, 4.1366087e-14, ..., 3.5438452e-14,
       2.7949634e-14, 2.7949634e-14], dtype=float32)

In [14]:
weight_2.dtype

dtype('float32')

In [15]:
weight_2[0]

2.7949634e-14

In [16]:
weight_2[:3]

array([2.7949634e-14, 2.7949634e-14, 4.1366087e-14], dtype=float32)

In [17]:
np.sum(weight_2)

5.777784e-10

In [18]:
np.sum(weight_2) - np.sum(weight_1)[0]

8.543166e-14

##### Concluison 1: Cross-sections of weight_1 & _2 are a little bit different.

##### Conclusion 2: `weight_2` is better, since `weight_2` is closer PYTHIA output than `weight_1`.

In [19]:
print(weight_1 - weight_2)
np.sum(weight_1 - weight_2)

[array([0.], dtype=float32) array([0.], dtype=float32)
 array([0.], dtype=float32) ... array([0.], dtype=float32)
 array([0.], dtype=float32) array([0.], dtype=float32)]


array([0.], dtype=float32)

##### Conclusion 3: But `weight_1 - weight_2` is 0, thus `weight_1 = weight_2`.
I think it is possible problem to be `float`.

## 7. Save in .csv and .npz Files

### 7-1. [將 Pandas DataFrame 寫入 CSV](https://www.delftstack.com/zh-tw/howto/python-pandas/write-a-pandas-dataframe-to-csv/)
[Official](https://pandas.pydata.org/docs/reference/api/pandas.DataFrame.to_csv.html#pandas.DataFrame.to_csv): Save in `.zip` file

In [23]:
mid_term_marks = {"Student": ["Kamal", "Arun", "David", "Thomas", "Steven"],
                  "Economics": [10, 8, 6, 5, 8],
                  "Fine Arts": [7, 8, 5, 9, 6],
                  "Mathematics": [7, 3, 5, 8, 5]}
mid_term_marks_df = pd.DataFrame(mid_term_marks)
mid_term_marks_df

Unnamed: 0,Student,Economics,Fine Arts,Mathematics
0,Kamal,10,7,7
1,Arun,8,8,3
2,David,6,5,5
3,Thomas,5,9,8
4,Steven,8,6,5


In [24]:
mid_term_marks_df.to_csv("midterm.csv", index=False)

### 7-2. [numpy.savez_compressed](https://numpy.org/doc/stable/reference/generated/numpy.savez_compressed.html)

In [26]:
test_array = np.random.rand(3, 2)
test_vector = np.random.rand(4)
np.savez_compressed('./123', a=test_array, b=test_vector)
loaded = np.load('./123.npz')

In [27]:
print(test_array)
test_vector

[[0.9219718  0.89438997]
 [0.14363616 0.34647482]
 [0.2074924  0.17260254]]


array([0.88807297, 0.21049467, 0.92179359, 0.32138046])

In [28]:
loaded.files

['a', 'b']

In [29]:
print(np.array_equal(test_array, loaded['a']))
print(test_array == loaded['a'])

True
[[ True  True]
 [ True  True]
 [ True  True]]


In [30]:
print(np.array_equal(test_vector, loaded['b']))
print(test_vector == loaded['b'])

True
[ True  True  True  True]
