In [1]:
import pandas as pd
import numpy as np

from root_pandas import read_root

Welcome to JupyROOT 6.12/06


In [2]:
import matplotlib.pyplot as plt
from mpl_toolkits.mplot3d import Axes3D
%matplotlib inline

#### Declare branches to be used

In [3]:
hits_branches = ['rechit_skiroc',
        'rechit_channel',
        'rechit_x',
        'rechit_y',
        'rechit_energy',
        'rechit_layer'
        ]

info_branches = ['run', 'event','PI_positionX', 'PI_positionY',]

tracks_branches = ['ntracks',
         'impactX_HGCal_layer_1',
         'impactY_HGCal_layer_1'
        ]

In [4]:
fname = "calo_setup/ntuple_1222.root"

In [5]:
df_hits = read_root(fname, "rechitntupler/hits", columns = hits_branches)
df_info = read_root(fname, "rechitntupler/hits", columns = info_branches)
df_tracks = read_root(fname, "trackimpactntupler/impactPoints", columns = tracks_branches)

#### Merge dataframes into one

In [6]:
df_all = pd.concat([df_info, df_hits, df_tracks], axis=1, join_axes=[df_hits.index])

In [7]:
df_all[:2]

Unnamed: 0,PI_positionY,PI_positionX,run,event,rechit_x,rechit_y,rechit_channel,rechit_energy,rechit_skiroc,rechit_layer,impactY_HGCal_layer_1,ntracks,impactX_HGCal_layer_1
0,-999.0,-999.0,1222,1,"[1.000202e-05, -1.9488935, 1.000202e-05, -0.97...","[1.12519, 1.0003498e-05, 1.12519, -5.06339, 1....","[36, 18, 34, 20, 26, 24, 18, 30, 28, 22, 34, 3...","[0.35155967, 0.39493895, 0.49695483, 1.9753718...","[3, 0, 5, 7, 4, 4, 7, 4, 4, 4, 4, 7, 7, 7, 7, ...","[1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, ...",[-1.2472391],1,[-5.3245564]
1,-999.0,-999.0,1222,2,"[9.989902e-06, 2.9233453, 0.97444177, 1e-05, -...","[-5.62599, -2.81299, 0.56259, 1e-05, -2.81299,...","[8, 44, 38, 36, 56, 26, 18, 28, 32, 44, 36, 38...","[0.5169128, 1.070182, 0.35918677, 1.1010299, 1...","[1, 1, 2, 6, 7, 4, 7, 4, 7, 5, 11, 11, 11, 11,...","[1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 3, 3, 3, 3, 3, ...",[2.4550781],1,[0.63384724]


#### Calculate number of hits in a layer

In [8]:
def rh_filter(rh_row, layer = 1):
    select = (rh_row.rechit_layer == layer)
    select &= rh_row.rechit_energy > 0.3
    return len(rh_row.rechit_x[select])

In [9]:
df_all['nhits'] = df_all.apply(rh_filter, axis=1)

In [10]:
df_all[:2]

Unnamed: 0,PI_positionY,PI_positionX,run,event,rechit_x,rechit_y,rechit_channel,rechit_energy,rechit_skiroc,rechit_layer,impactY_HGCal_layer_1,ntracks,impactX_HGCal_layer_1,nhits
0,-999.0,-999.0,1222,1,"[1.000202e-05, -1.9488935, 1.000202e-05, -0.97...","[1.12519, 1.0003498e-05, 1.12519, -5.06339, 1....","[36, 18, 34, 20, 26, 24, 18, 30, 28, 22, 34, 3...","[0.35155967, 0.39493895, 0.49695483, 1.9753718...","[3, 0, 5, 7, 4, 4, 7, 4, 4, 4, 4, 7, 7, 7, 7, ...","[1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, ...",[-1.2472391],1,[-5.3245564],2
1,-999.0,-999.0,1222,2,"[9.989902e-06, 2.9233453, 0.97444177, 1e-05, -...","[-5.62599, -2.81299, 0.56259, 1e-05, -2.81299,...","[8, 44, 38, 36, 56, 26, 18, 28, 32, 44, 36, 38...","[0.5169128, 1.070182, 0.35918677, 1.1010299, 1...","[1, 1, 2, 6, 7, 4, 7, 4, 7, 5, 11, 11, 11, 11,...","[1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 3, 3, 3, 3, 3, ...",[2.4550781],1,[0.63384724],3


### Clean ntracks == 1 events and create new branch

In [11]:
selection = df_all.ntracks == 1
selection &= df_all.nhits == 1

In [12]:
df_all = df_all[selection]

### Define globalimpact position from the single track

In [13]:
## convert Series to float for impact position (take first element, which for ntracks=1 is the only one)
impact_x = df_all.apply(lambda row: row.impactX_HGCal_layer_1[0], axis=1)
impact_y = df_all.apply(lambda row: row.impactY_HGCal_layer_1[0], axis=1)

In [14]:
# create columns for impacts
df_all.loc[impact_x.index, 'impact_x'] = impact_x
df_all.loc[impact_y.index, 'impact_y'] = impact_y

In [15]:
# Fix PI position: put to 0 if not available
df_all.PI_positionX = df_all.apply(lambda row: max(row.PI_positionX,0), axis=1)
df_all.PI_positionY = df_all.apply(lambda row: max(row.PI_positionY,0), axis=1)

In [16]:
# Construct impact from track impact and PI position
df_all['impact_x'] = df_all.impact_x + df_all.PI_positionX
df_all['impact_y'] = df_all.impact_y + df_all.PI_positionY

In [17]:
# scale to cm
df_all['impact_x'] /= 10
df_all['impact_y'] /= 10

In [18]:
df_all[:2]

Unnamed: 0,PI_positionY,PI_positionX,run,event,rechit_x,rechit_y,rechit_channel,rechit_energy,rechit_skiroc,rechit_layer,impactY_HGCal_layer_1,ntracks,impactX_HGCal_layer_1,nhits,impact_x,impact_y
5,0,0,1222,6,"[-2.9233453, 9.997981e-06, -0.97444177, -1.948...","[-1.6877899, -1.12519, 1.6877899, -5.62599, 3....","[40, 36, 26, 18, 30, 28, 32, 48, 36, 32, 30, 2...","[0.3699954, 4.102713, 7.0221953, 4.0269313, 1....","[0, 7, 4, 7, 4, 4, 4, 4, 4, 7, 7, 7, 7, 7, 11,...","[1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 3, ...",[1.6950089],1,[-2.7425716],1,-0.274257,0.169501
13,0,0,1222,14,"[-0.97444177, -3.897797, 1e-05, 9.997981e-06, ...","[-0.56259, 1.00069965e-05, 1e-05, -1.12519, 1....","[24, 2, 36, 36, 26, 28, 32, 34, 32, 30, 22, 34...","[0.83746463, 0.29761407, 1.0459461, 1.8773766,...","[0, 0, 6, 7, 4, 4, 7, 6, 6, 7, 7, 7, 11, 11, 9...","[1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 3, 3, 3, ...",[1.1340532],1,[4.8897667],1,0.488977,0.113405


### Remove unneeded branches like PI position, etc.

In [19]:
drop_columns = ['PI_positionY', 'PI_positionX', 'impactY_HGCal_layer_1', 'impactX_HGCal_layer_1']
df_all = df_all.drop(columns=drop_columns, axis=0)

In [20]:
df_all[:2]

Unnamed: 0,run,event,rechit_x,rechit_y,rechit_channel,rechit_energy,rechit_skiroc,rechit_layer,ntracks,nhits,impact_x,impact_y
5,1222,6,"[-2.9233453, 9.997981e-06, -0.97444177, -1.948...","[-1.6877899, -1.12519, 1.6877899, -5.62599, 3....","[40, 36, 26, 18, 30, 28, 32, 48, 36, 32, 30, 2...","[0.3699954, 4.102713, 7.0221953, 4.0269313, 1....","[0, 7, 4, 7, 4, 4, 4, 4, 4, 7, 7, 7, 7, 7, 11,...","[1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 3, ...",1,1,-0.274257,0.169501
13,1222,14,"[-0.97444177, -3.897797, 1e-05, 9.997981e-06, ...","[-0.56259, 1.00069965e-05, 1e-05, -1.12519, 1....","[24, 2, 36, 36, 26, 28, 32, 34, 32, 30, 22, 34...","[0.83746463, 0.29761407, 1.0459461, 1.8773766,...","[0, 0, 6, 7, 4, 4, 7, 6, 6, 7, 7, 7, 11, 11, 9...","[1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 3, 3, 3, ...",1,1,0.488977,0.113405


### Clean rechits: Series -> values (select only first rechit)

In [23]:
for branch in hits_branches:
    df_all[branch] = df_all[branch].apply(lambda x: x[0])

In [24]:
df_all[:2]

Unnamed: 0,run,event,rechit_x,rechit_y,rechit_channel,rechit_energy,rechit_skiroc,rechit_layer,ntracks,nhits,impact_x,impact_y
5,1222,6,-2.923345,-1.68779,40,0.369995,0,1,1,1,-0.274257,0.169501
13,1222,14,-0.974442,-0.56259,24,0.837465,0,1,1,1,0.488977,0.113405


In [25]:
df_all.to_root("out.root",key = "tree")