In [None]:
import ROOT
import narf
import pandas as pd
import h5py 
import hist
import hdf5plugin
import boost_histogram as bh
import numpy as np
import matplotlib.pyplot as plt

Welcome to JupyROOT 6.28/02


In [None]:
'''loading the file and extracting as numpy array'''
f = h5py.File("templatesTest2.hdf5","r")
results = narf.ioutils.pickle_load_h5py(f["results"])
H = results['ZmumuPostVFP']['output']['signalTemplates_nominal'].get() #boost histogram values
#H2 = resultsresults['ZmumuPostVFP']['output']['signalTemplates_mass'].get()
t = h5py.File('templatesFit.hdf5','r')
#print(results['ZmumuPostVFP']['output'])


'''Unpacking the data'''
#first unroll the tensor in eta and pt shape: (6,8,48,60,2,6) -> (6,8,2880,2,6)
unrolled = H.to_numpy()[0].reshape((6,8,-1,2,6)) 
#next, swap axes such that unrolled eta/pt in in last position (6,8,2880,2,6) -> (6,8,2,6,2880)
#lastly, reshape into 2 dimensional array which will be passed into dataframe (6,8,2,6,2880) -> (576,2880)
#one row corresponds to one unrolled pt/eta distribution (template)
a = np.swapaxes(unrolled , 2 , -1).reshape(-1,2880) 

'''Building the pandas dataframe'''
yBinsC     = H.axes['Zrap'].centers
qtBinsC    = H.axes['Zpt'].centers
charges    = H.axes['charge'].centers
helicities = list(H.axes['helicities'])
#helicities = ['L','I','T','A','P','UL']
#multi index object
iterables = [yBinsC, qtBinsC,helicities ,charges] #2charges * 6helicities *6y bins * 8qt bins =  576 rows
multi = pd.MultiIndex.from_product(iterables , names = ['rapidity', 'qt' , 'hel','charge'])

#building dataframe
df = pd.DataFrame(a , index = multi)


'''Adding cross section information to our dataframe by merging'''
qtBins = np.array([0., 3., 6., 9.62315204,12.36966732,16.01207711,21.35210602,29.50001253,60.,200.]) #these have to be like this for now
yBins = np.array([0., 0.4, 0.8, 1.2, 1.6, 2.0, 2.4, 3.0, 10.0])

threshold_y = np.digitize(2.4,yBins)-1
threshold_qt = np.digitize(60.,qtBins)-1

T = t['helicity'][:threshold_y,:threshold_qt,:] #cross sections
processes = [yBinsC , qtBinsC , helicities]
multi2 = pd.MultiIndex.from_product(processes , names = ['rapidity', 'qt' , 'hel'])
charges =  [-1.0]*288 + [1.0]*288
s = pd.Series(T.ravel(), index = multi2 , name='xsec')
xsec_df = pd.concat([s,s] ,axis=0).reset_index()
xsec_df['charge'] = charges



df = df.merge(xsec_df ,left_on=['rapidity','qt','hel','charge'], right_on=['rapidity','qt','hel','charge'])
df['data'] = df.loc[:,0:2879].apply(np.hstack , axis=1)
df = df.loc[:,['rapidity' , 'qt','hel','charge','data','xsec']]


#setting process as index
df.set_index('helXsec_'+df['hel']+'_y_'+df['rapidity'].apply(lambda x: round(x,1)).apply(str)+'_qt_'+df['qt'].apply(lambda x: round(x,1)).apply(str),inplace=True)
df['helgroups']='y_'+df['rapidity'].apply(lambda x: round(x,1)).apply(str)+'_qt_'+df['qt'].apply(lambda x: round(x,1)).apply(str)
df.drop(columns=['rapidity','qt','hel'],inplace=True)

In [14]:
df.columns

Index(['rapidity', 'qt', 'hel', 'charge', 'data', 'xsec'], dtype='object')

In [13]:
df.columns = pd.MultiIndex.from_tuples(df.columns, names=['Caps','Lower'])
df.head()

Unnamed: 0,rapidity,qt,hel,charge,data,xsec
helXsec_L_y_0.2_qt_1.5,0.2,1.5,L,-1.0,"[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ...",1979.366479
helXsec_L_y_0.2_qt_1.5,0.2,1.5,L,1.0,"[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ...",1979.366479
helXsec_I_y_0.2_qt_1.5,0.2,1.5,I,-1.0,"[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ...",223.724364
helXsec_I_y_0.2_qt_1.5,0.2,1.5,I,1.0,"[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ...",223.724364
helXsec_T_y_0.2_qt_1.5,0.2,1.5,T,-1.0,"[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ...",-349.398681


In [14]:
results['ZtautauPostVFP']['output']['signalTemplates_mass'].get()

Hist(
  Variable([0, 0.4, 0.8, 1.2, 1.6, 2, 2.4], name='Zrap'),
  Variable([0, 3, 6, 9.62315, 12.3697, 16.0121, 21.3521, 29.5, 60], name='Zpt'),
  Regular(48, -2.4, 2.4, name='mueta'),
  Regular(60, 25, 55, name='mupt'),
  Regular(2, -2, 2, underflow=False, overflow=False, name='charge'),
  StrCategory(['L', 'I', 'T', 'A', 'P', 'UL'], name='helicities'),
  StrCategory(['massShift100MeVDown', 'massShift90MeVDown', 'massShift80MeVDown', 'massShift70MeVDown', 'massShift60MeVDown', 'massShift50MeVDown', 'massShift40MeVDown', 'massShift30MeVDown', 'massShift20MeVDown', 'massShift10MeVDown', 'massShift0MeV', 'massShift10MeVUp', 'massShift20MeVUp', 'massShift30MeVUp', 'massShift40MeVUp', 'massShift50MeVUp', 'massShift60MeVUp', 'massShift70MeVUp', 'massShift80MeVUp', 'massShift90MeVUp', 'massShift100MeVUp', 'massShift2p1MeVDown', 'massShift2p1MeVUp'], name='massShift'),
  storage=Weight()) # Sum: WeightedSum(value=953776, variance=1.11049e+06)

In [3]:
df.info()

<class 'pandas.core.frame.DataFrame'>
Index: 576 entries, helXsec_L_y_0.2_qt_1.5 to helXsec_UL_y_2.2_qt_44.8
Columns: 2885 entries, rapidity to xsec
dtypes: float64(2884), object(1)
memory usage: 12.7+ MB


In [29]:
df.memory_usage()

Index     21160
charge     4608
0          4608
1          4608
2          4608
          ...  
2876       4608
2877       4608
2878       4608
2879       4608
xsec       4608
Length: 2883, dtype: int64