In [1]:
# Import useful packages
import uproot
import h5py
import pandas as pd
import awkward as ak
import numpy as np
import vector
import itertools
import tk as tkinter
import matplotlib
#matplotlib.use('Agg')  # need for not displaying plots when running batch jobs
matplotlib.use('GTK3Agg')   # need this one if you want plots displayed
from matplotlib import pyplot as plt
from matplotlib import colors
from Plotter import *

  self.kernel.do_one_iteration()


In [None]:
def readInData(reco_method,filename,eventnumbers):
    """
    Reads data into a dataframe from a file, and removes events that are not part of the test dataset (if list of test event numbers is given).

        Parameters:
            reco_method (str): Name of the reconstruction model (e.g.'KLFitter', 'TRecNet', etc.)
            filename (str): Name, including path, of root file to read data from.
            eventnumbers (list): Event numbers for the test dataset.

        Returns:
            df (pd.DataFrame): Dataframe full of data from the given file.
    """

    print('Reading in data for '+reco_method+' ...')

    # Open root file and its trees
    f = uproot.open(filename)

    # For TRecNet-based models ...
    if 'TRecNet' in reco_method and 'sys' not in reco_method:

        # Load the truth and reco data
        truth_arr = f['parton'].arrays()
        reco_arr = f['reco'].arrays()

        # Create dataframe with truth and reco data, as well as event numbers
        truth_df = ak.to_pandas(truth_arr)
        truth_df = truth_df.add_prefix('truth_')
        reco_df = ak.to_pandas(reco_arr)
        reco_df = reco_df.add_prefix('reco_')
        df = pd.concat([truth_df,reco_df], axis=1)
        df['eventNumber'] = truth_arr['eventNumber']

    # Systematics for TRecNet models
    elif 'TRecNet' in reco_method and 'sys' in reco_method:

        # Get whether it's up or down systematics
        sys_type = reco_method.split('_')[0]

        # Load sys data
        sys_arr = f['reco'].arrays()

        # ISSUE: this is cutting all events! Not sure why but shouldn't matter once it's all the new data? Idk, it really shouldnt be happening
        # Only take events with the same event numbers as the test data
        #sel = np.isin(sys_arr['eventNumber'],eventnumbers)
        #sys_arr = sys_arr[sel]


        # Create dataframe with reco data, as well as event numbers
        df = ak.to_pandas(sys_arr)
        #df = df.drop('index',axis=1) # index not in df I guess

        # Bit of a hack for now:
        # Get whether it's up or down systematics
        sys_type = reco_method.split('_')[0]
        df = df.add_prefix(sys_type+'_')   # Want to add sysUP or sysDOWN prefix to match the other files
        df = df.rename(columns={sys_type+'_eventNumber':'eventNumber',sys_type+'_jet_n':'jet_n',sys_type+'_logLikelihood':'logLikelihood'})

    # Systematics for non-TRecNet models
    elif 'TRecNet' not in reco_method and 'sys' in reco_method:

        # Get whether it's up or down systematics
        sys_type = reco_method.split('_')[0]

        # Load sys data
        sys_arr = f[sys_type].arrays()

        # ISSUE: this is cutting all events! Not sure why but shouldn't matter once it's all the new data? Idk, it really shouldnt be happening
        # Only take events with the same event numbers as the test data
        #sel = np.isin(sys_arr['eventNumber'],eventnumbers)
        #sys_arr = sys_arr[sel]


        # Create dataframe with reco data, as well as event numbers
        df = ak.to_pandas(sys_arr)
        #df = df.drop('index',axis=1) # index not in df I guess

        # Bit of a hack for now:
        #df = df.add_prefix('reco_')
        #df = df.rename(columns={'reco_eventNumber':'eventNumber'})




    # For likelihood-based models ... 
    else:

        # Load truth and reco data
        truth_arr = f['truth'].arrays()
        reco_arr = f['reco'].arrays()

        # ISSUE: this is cutting all events! Not sure why but shouldn't matter once it's all the new data? Idk, it really shouldnt be happening
        # Only take events with the same event numbers as the test data
        sel = np.isin(truth_arr['eventNumber'],eventnumbers)
        truth_arr = truth_arr[sel]
        reco_arr = reco_arr[sel]


        # Create dataframe with truth and reco data, as well as event numbers
        truth_df = ak.to_pandas(truth_arr)
        truth_df = truth_df.drop('index',axis=1)
        reco_df = ak.to_pandas(reco_arr)
        reco_df = reco_df.drop('index',axis=1)
        df = pd.concat([truth_df,reco_df], axis=1)

    

    return df

In [3]:
# Define all the variables and their ranges we want
scale = '[GeV]'
pt = Variable('pt', 'p_T', np.arange(0,550,50), unit=scale)
#px = Variable('px','p_x',np.arange(-250,250,50),unit=scale)
#py = Variable('py','p_y',np.arange(-250,250,50),unit=scale)
eta = Variable('eta', '\eta', np.arange(-6,7.5,1.5))
y = Variable('y', 'y', np.arange(-2.5,3,0.5))
phi = Variable('phi', '\phi', np.arange(-3,3.75,0.75))
m_t = Variable('m','m', np.arange(100,260,20),unit=scale)
E_t = Variable('E','E', np.arange(100,1600,150),unit=scale)
#pout_t = Variable('pout','p_{out}',range(-275,325,50), unit=scale,alt_names=['Pout'])
m_tt = Variable('m','m', np.arange(200,1700,150),unit=scale)
E_tt = Variable('E','E', np.arange(200,2700,250),unit=scale)
dphi_tt = Variable('dphi','|\Delta\phi|', np.arange(0,3.6,0.45),alt_names=['deltaPhi'])
deta_tt = Variable('deta','|\Delta\eta|', np.arange(0,10,1),alt_names=['deltaEta'])
Ht_tt = Variable('Ht','H_T',np.arange(0,1200,120),unit=scale,alt_names=['HT'])
yboost_tt = Variable('yboost','y_{boost}',np.arange(-3,3.75,0.75),alt_names=['y_boost'])
#ystar_tt = Variable('ystar','y_{star}',np.arange(-2.5,3,0.5),alt_names=['y_star'])
chi_tt = Variable('chi','\chi',np.arange(0,25,2.5),alt_names=['chi_tt'])

# Define the particles we want
top_had = Particle('th','t,had',[pt,eta,y,phi,m_t,E_t],alt_names=['thad','topHad','top_had'])
top_lep = Particle('tl','t,lep',[pt,eta,y,phi,m_t,E_t],alt_names=['tlep','topLep','top_lep'])
top_antitop = Particle('ttbar','t\overline{t}',[pt,eta,y,phi,m_tt,E_tt,dphi_tt,deta_tt,Ht_tt,yboost_tt,chi_tt])

In [4]:
f_test = h5py.File('/mnt/xrootdg/jchishol/mntuples_08_01_22/variables_ttbar_ljets_jetMatch04_6jets_test.h5','r')
total_test_events = len(np.array(f_test.get('eventNumber')))
f_test.close()
print(total_test_events)

3942027


In [5]:
TRecNet_df = readInData('TRecNet','Model_Custom_full_Results.root')
print(TRecNet_df)

Reading in data for TRecNet
         truth_th_E  truth_th_eta  truth_th_m  truth_th_phi  truth_th_pout  \
entry                                                                        
0        475.930594      2.848772  178.917484     -1.357432      37.856434   
1        327.143937     -0.584280  171.508766      1.144990      78.873909   
2        349.204125      0.321736  171.637531     -2.026933     278.365265   
3        202.464562      0.113129  173.226797      2.691848     104.010834   
4        495.988844     -1.754646  169.047375      1.572338     -27.068096   
...             ...           ...         ...           ...            ...   
3942022  531.905875      1.095136  173.057641      0.457400     -43.665470   
3942023  478.572156     -1.903769  169.438344      1.051035     -33.535278   
3942024  400.924094      2.865699  173.170141     -2.421465      38.690205   
3942025  315.061000      0.923655  172.234328     -0.440234     -88.464722   
3942026  215.092172     -0.449053  1

In [6]:
eventnumbers = np.array(TRecNet_df['eventNumber'])

In [7]:
TRecNet_sysUP_df = readInData('sys','Model_Custom_sysUP_Results.root',eventnumbers)
TRecNet_sysDOWN_df = readInData('sys','Model_Custom_sysDOWN_Results.root',eventnumbers)

Reading in data for sys
Reading in data for sys


In [14]:
TRecNet_sysUP_df

Unnamed: 0_level_0,reco_th_pt,reco_th_eta,reco_th_phi,reco_th_m,reco_wh_pt,reco_wh_eta,reco_wh_phi,reco_wh_m,reco_tl_pt,reco_tl_eta,...,reco_ttbar_y,reco_th_pout,reco_tl_pout,reco_ttbar_dphi,reco_ttbar_Ht,reco_ttbar_yboost,reco_ttbar_ystar,reco_ttbar_chi,eventNumber,reco_jet_n
entry,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
0,56.007612,2.803485,-1.406549,172.545077,72.560645,1.988934,-2.614635,80.315198,239.564798,1.165700,...,1.254428,44.902975,-192.066251,2.211409,295.572410,1.330721,0.330554,1.936937,7040187,5
1,224.964173,-0.731637,1.223032,172.501100,120.558001,-0.645841,1.712799,80.392679,137.813899,-1.035931,...,-0.645894,89.274136,-54.689672,2.733524,362.778072,-0.652740,0.055150,1.116612,7038749,5
2,253.254712,0.452742,-2.119167,172.584292,231.602098,0.545698,-2.075665,80.272725,100.041782,0.237216,...,0.276125,240.919360,-95.169017,1.257402,353.296494,0.248934,0.129044,1.294452,7038156,7
3,114.810984,0.192742,-1.633826,172.547446,121.150500,0.288891,-1.101118,80.306324,65.888820,-1.990288,...,-0.449385,-18.006932,10.333990,2.984103,180.699804,-0.479795,0.587024,3.235062,7038765,5
4,156.971806,-1.815973,1.495471,172.494306,169.436825,-1.559299,1.426795,80.379330,133.086504,-0.728016,...,-0.974278,-35.024732,29.695263,2.916571,290.058311,-0.958799,-0.491025,2.669925,7038065,5
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
3790040,87.551166,-0.675625,-2.378740,172.532398,74.082743,-1.099378,3.128217,80.222044,77.171664,-3.015429,...,-1.219298,-6.544913,5.768991,3.066768,164.722830,-1.227809,0.903969,6.097863,256311264,4
3790041,69.540357,-1.107974,1.023084,172.455993,48.702831,-1.046604,-0.287613,80.475552,65.940302,-0.043553,...,-0.251164,2.734847,-2.593266,3.102255,135.480658,-0.250389,-0.234836,1.599470,256311672,4
3790042,112.393316,-2.062362,1.879896,172.504166,14.669272,-2.989260,-1.171711,80.450546,219.287454,-1.682344,...,-1.474681,-25.349622,49.458938,2.914091,331.680770,-1.477028,-0.015557,1.031604,256311943,5
3790043,156.981885,0.285517,-0.647589,172.489973,121.934064,0.688352,-0.822405,80.306503,136.974965,2.358977,...,1.025850,42.526695,-37.106782,2.867263,293.956850,1.045632,-0.852047,5.496402,256310852,5


In [21]:
np.where(np.isin(eventnumbers,TRecNet_sysUP_df['eventNumber'])==False)

(array([    104,     472,     816, ..., 3941635, 3941746, 3941989]),)

In [22]:
eventnumbers[104]

7045444

In [40]:
tree = uproot.open('/mnt/xrootdg/jchishol/mntuples_08_01_22/KLF6_Results.root')['reco'].arrays()

In [63]:
sel = np.isin(tree['eventNumber'],eventnumbers)
tree = tree[sel]

In [64]:
tree['reco_th_px'] = tree['reco_th_pt']*np.cos(tree['reco_th_phi'])
tree['truth_th_px'] = (tree['truth_th_pt']/1000)*np.cos(tree['truth_th_phi'])

In [65]:
plt.hist(tree['reco_th_px'],histtype='step',bins=30,range=(-200,200))
plt.hist(tree['truth_th_px'],histtype='step',bins=30,range=(-200,200))
plt.show()