# Exploring econdata 

In [1]:
import uproot
import awkward as ak
import numpy as np
import pandas as pd

In [2]:
fname = "ntuple.root"
dir = "FloatingpointAutoEncoderStrideDummyHistomaxGenmatchGenclustersntuple/"
ev_dict = uproot.open(fname)["FloatingpointAutoEncoderStrideDummyHistomaxGenmatchGenclustersntuple/HGCalTriggerNtuple"]

In [3]:
ev_dict.show()

name                 | typename                 | interpretation                
---------------------+--------------------------+-------------------------------
run                  | int32_t                  | AsDtype('>i4')
event                | int32_t                  | AsDtype('>i4')
lumi                 | int32_t                  | AsDtype('>i4')
gen_n                | int32_t                  | AsDtype('>i4')
gen_PUNumInt         | int32_t                  | AsDtype('>i4')
gen_TrueNumInt       | float                    | AsDtype('>f4')
vtx_x                | float                    | AsDtype('>f4')
vtx_y                | float                    | AsDtype('>f4')
vtx_z                | float                    | AsDtype('>f4')
gen_eta              | std::vector<float>       | AsJagged(AsDtype('>f4'), he...
gen_phi              | std::vector<float>       | AsJagged(AsDtype('>f4'), he...
gen_pt               | std::vector<float>       | AsJagged(AsDtype('>f4'), he...
gen_energy

In [36]:
arrays_toread = [
    "econ_index","econ_data",
    "econ_subdet","econ_zside","econ_layer","econ_waferu","econ_waferv","econ_wafertype",
    "tc_simenergy",
    "tc_subdet","tc_zside","tc_layer","tc_waferu","tc_waferv","tc_wafertype",
    "gen_pt","gen_energy","gen_eta","gen_phi",
    "genpart_pt","genpart_energy",
]
events = ev_dict.arrays(arrays_toread)

econ = ak.zip({
    "index": events['econ_index'],
    "data": events["econ_data"],
    "subdet": events["econ_subdet"],
    "zside": events["econ_zside"],
    "layer": events["econ_layer"],
    "waferu": events["econ_waferu"],
    "waferv": events["econ_waferv"],
})
tc = ak.zip({
    "simenergy": events["tc_simenergy"],
    "subdet": events["tc_subdet"],
    "zside": events["tc_zside"],
    "layer": events["tc_layer"],
    "waferu": events["tc_waferu"],
    "waferv": events["tc_waferv"],
})
gen = ak.zip({
    "pt": events["gen_pt"],
    "energy": events["gen_energy"],
    "eta": events["gen_eta"],
    "phi": events["gen_phi"],
})

In [38]:
# find wafers that we want to save
# the problem is that the number of wafers from trigger cells: trigger cells/48 
# is not the same as the number of wafers from econ data: econ_data/16
df_tc = ak.to_pandas(tc)
df_econ = ak.to_pandas(econ)
df_gen = ak.to_pandas(gen)

In [15]:
df_tc

Unnamed: 0_level_0,Unnamed: 1_level_0,simenergy,subdet,zside,layer,waferu,waferv
entry,subentry,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
0,0,0.0,2,1,40,-2,-4
0,1,0.0,2,-1,35,-5,0
0,2,0.0,2,1,35,-6,-5
0,3,0.0,2,1,32,7,2
0,4,0.0,2,1,31,5,4
...,...,...,...,...,...,...,...
9,2927,0.0,1,-1,9,-1,-4
9,2928,0.0,1,-1,9,-1,-4
9,2929,0.0,1,-1,13,0,-4
9,2930,0.0,2,1,30,-3,1


In [16]:
df_simtotal = df_tc.groupby(['entry','subdet','zside','layer','waferu','waferv'])["simenergy"].sum()

In [17]:
#with pd.option_context('display.max_rows', None,
#                       'display.max_columns', None,
#                       'display.precision', 3,
#                       ):
#    print(df_simtotal)
print(df_simtotal)

entry  subdet  zside  layer  waferu  waferv
0      1       -1     1      -6      -4        0.0
                             -5      -1        0.0
                             -4      -4        0.0
                                     -3        0.0
                                     -2        0.0
                                              ... 
9      2        1     40      4       2        0.0
                      41     -3      -5        0.0
                             -2      -5        0.0
                      43     -4      -2        0.0
                              3       4        0.0
Name: simenergy, Length: 10880, dtype: float32


In [18]:
df_econ

Unnamed: 0_level_0,Unnamed: 1_level_0,index,data,subdet,zside,layer,waferu,waferv
entry,subentry,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
0,0,0,320,2,1,40,-2,-4
0,1,1,0,2,1,40,-2,-4
0,2,2,0,2,1,40,-2,-4
0,3,3,1088,2,1,40,-2,-4
0,4,4,832,2,1,40,-2,-4
...,...,...,...,...,...,...,...,...
9,17691,11,1024,1,-1,13,-3,-1
9,17692,12,304,1,-1,13,-3,-1
9,17693,13,640,1,-1,13,-3,-1
9,17694,14,48,1,-1,13,-3,-1


In [19]:
df_econ.index.names

FrozenList(['entry', 'subentry'])

In [20]:
df_econ.reset_index(inplace=True)

In [21]:
df_econ.set_index(['entry','subdet','zside','layer','waferu','waferv'],inplace=True)

In [22]:
df_econ['simenergy'] = df_simtotal

In [23]:
df_econ.drop(columns='subentry',inplace=True)

In [24]:
print(df_econ)

                                        index  data  simenergy
entry subdet zside layer waferu waferv                        
0     2       1    40    -2     -4          0   320        0.0
                                -4          1     0        0.0
                                -4          2     0        0.0
                                -4          3  1088        0.0
                                -4          4   832        0.0
...                                       ...   ...        ...
9     1      -1    13    -3     -1         11  1024        0.0
                                -1         12   304        0.0
                                -1         13   640        0.0
                                -1         14    48        0.0
                                -1         15   752        0.0

[174080 rows x 3 columns]


In [25]:
print(df_econ['simenergy'][df_econ['simenergy'] >0])

entry  subdet  zside  layer  waferu  waferv
0      2       1      30     1       4         0.639610
                                     4         0.639610
                                     4         0.639610
                                     4         0.639610
                                     4         0.639610
                                                 ...   
9      1       1      25     1       3         0.262666
                                     3         0.262666
                                     3         0.262666
                                     3         0.262666
                                     3         0.262666
Name: simenergy, Length: 14576, dtype: float32


In [26]:
df_econ_wsimenergy = df_econ[df_econ.simenergy > 0]

In [27]:
df_econ_wsimenergy = df_econ_wsimenergy.rename(columns={"index": "econ_index", "data": "econ_data", "simenergy": "wafer_energy"})

In [28]:
df_econ_wsimenergy[df_econ_wsimenergy.econ_index == 0]

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,Unnamed: 3_level_0,Unnamed: 4_level_0,Unnamed: 5_level_0,econ_index,econ_data,wafer_energy
entry,subdet,zside,layer,waferu,waferv,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
0,2,1,30,1,4,0,704,0.639610
0,2,1,29,0,2,0,384,0.325268
0,2,1,30,2,4,0,0,0.384041
0,2,1,29,0,3,0,256,0.177588
0,2,1,29,1,2,0,448,0.594947
...,...,...,...,...,...,...,...,...
9,1,1,1,1,3,0,384,0.314042
9,1,1,3,2,3,0,1024,0.061016
9,1,1,9,1,4,0,394,0.249215
9,1,1,3,1,2,0,832,0.058836


In [30]:
# Econ data: 9 -bit values from each node?
# Wafer energy: in gev

In [29]:
df_econ_wsimenergy

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,Unnamed: 3_level_0,Unnamed: 4_level_0,Unnamed: 5_level_0,econ_index,econ_data,wafer_energy
entry,subdet,zside,layer,waferu,waferv,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
0,2,1,30,1,4,0,704,0.639610
0,2,1,30,1,4,1,0,0.639610
0,2,1,30,1,4,2,0,0.639610
0,2,1,30,1,4,3,704,0.639610
0,2,1,30,1,4,4,768,0.639610
...,...,...,...,...,...,...,...,...
9,1,1,25,1,3,11,256,0.262666
9,1,1,25,1,3,12,320,0.262666
9,1,1,25,1,3,13,448,0.262666
9,1,1,25,1,3,14,512,0.262666


In [45]:
df_econ_wsimenergy.reset_index(inplace=True)

In [46]:
df_econ_wsimenergy.set_index(['entry'],inplace=True)

In [55]:
with pd.option_context('display.max_rows', None,
                       'display.max_columns', None,
                       'display.precision', 3,
                       ):
    print(df_econ_wsimenergy.econ_data[(df_econ_wsimenergy.layer==30) & (df_econ_wsimenergy.econ_index==0)])

entry
0     704
0       0
0     192
1     768
1     576
1    1024
1     768
1     576
2     768
2     768
3     896
4    1280
4     384
4     704
4     704
5     512
5    1024
6     448
7     640
7     320
7     832
7     448
Name: econ_data, dtype: uint32


In [57]:
df_econ_wsimenergy.to_hdf('econ_data_electron_eta2.7_phi1.5_Nov22.h5', key='df', mode='w')

In [31]:
df_gen.reset_index(inplace=True)

In [32]:
df_gen.set_index('entry',inplace=True)

In [39]:
df_gen

Unnamed: 0_level_0,Unnamed: 1_level_0,pt,energy,eta,phi
entry,subentry,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
0,0,80.423019,599.289551,2.697059,1.492554
1,0,124.385948,927.308289,2.697515,1.498797
2,0,52.817207,391.153015,2.690819,1.491963
3,0,33.214794,246.043961,2.691075,1.491199
4,0,190.976349,1420.463135,2.695186,1.49827
5,0,35.699162,265.670898,2.695733,1.498154
6,0,145.696686,1083.321899,2.694855,1.498915
7,0,138.498276,1028.103638,2.693192,1.490191
8,0,39.580746,294.223419,2.694588,1.497054
9,0,25.126141,187.417267,2.698052,1.491331


In [None]:
df_econ_wsimenergy['gen_energy'] = df_gen['energy']

In [None]:
df_econ_wsimenergy