In [1]:
import uproot as uproot
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import mplhep as hep
import awkward as ak

In [2]:
inputs_root = "/eos/cms/store/group/phys_btag/ParticleTransformer/merged/ntuple_merged_342.root:deepntuplizer/tree"

file_ = uproot.open(inputs_root)
columns_ = file_.keys()

In [4]:
global_branches = ['jet_pt', 'jet_eta','nCpfcand',
                   'nNpfcand','nsv','npv',
                   'TagVarCSV_trackSumJetEtRatio','TagVarCSV_trackSumJetDeltaR','TagVarCSV_vertexCategory',
                   'TagVarCSV_trackSip2dValAboveCharm','TagVarCSV_trackSip2dSigAboveCharm','TagVarCSV_trackSip3dValAboveCharm',
                   'TagVarCSV_trackSip3dSigAboveCharm','TagVarCSV_jetNSelectedTracks','TagVarCSV_jetNTracksEtaRel']
                
cpf_branches = ['Cpfcan_BtagPf_trackEtaRel','Cpfcan_BtagPf_trackPtRel','Cpfcan_BtagPf_trackPPar',
                'Cpfcan_BtagPf_trackDeltaR','Cpfcan_BtagPf_trackPParRatio','Cpfcan_BtagPf_trackSip2dVal',
                'Cpfcan_BtagPf_trackSip2dSig','Cpfcan_BtagPf_trackSip3dVal','Cpfcan_BtagPf_trackSip3dSig',
                'Cpfcan_BtagPf_trackJetDistVal','Cpfcan_ptrel','Cpfcan_drminsv',
                'Cpfcan_VTX_ass','Cpfcan_puppiw','Cpfcan_chi2',
                'Cpfcan_quality']

npf_branches = ['Npfcan_ptrel','Npfcan_deltaR','Npfcan_isGamma',
                'Npfcan_HadFrac', 'Npfcan_drminsv', 'Npfcan_puppiw']
        
vtx_branches = ['sv_pt','sv_deltaR','sv_mass',
                'sv_ntracks','sv_chi2','sv_normchi2',
                'sv_dxy','sv_dxysig','sv_d3d',
                'sv_d3dsig','sv_costhetasvpv','sv_enratio']

In [5]:
df_glob = file_.arrays(global_branches, library="ak")
df_cpf = file_.arrays(cpf_branches, library="ak")
df_npf = file_.arrays(npf_branches, library="ak")
df_vtx = file_.arrays(vtx_branches, library="ak")

In [6]:
hflav = file_.arrays('jet_hflav', library="ak")['jet_hflav']

In [7]:
df_cpf_clip = ak.pad_none(df_cpf,25,clip=True)
df_npf_clip = ak.pad_none(df_npf,25,clip=True)
df_vtx_clip = ak.pad_none(df_vtx,5,clip=True)

In [8]:
#glob_np = ak.to_numpy(df_glob)

In [9]:
def quantile_min_max(feature,group='glob',candidate=None):
    if group=='glob':
        print(feature,group,candidate)
        array_np = ak.to_numpy(df_glob[feature])
        array_np = np.where(array_np == -999, 0, array_np)
        array_np = np.where(array_np ==   -1, 0, array_np)
        mini, maxi = np.quantile(array_np,0.01),np.quantile(array_np,0.99)
        mini_, maxi_ = np.quantile(array_np,0.2),np.quantile(array_np,0.8)
        return [mini, maxi], np.std(array_np[(array_np >= mini_) & (array_np <= maxi_)])
    elif group=='cpf':
        print(feature,group,candidate)
        array_np = ak.to_numpy(df_cpf_clip[feature][:,candidate])
        array_np = np.where(array_np == -999, 0, array_np)
        array_np = np.where(array_np ==   -1, 0, array_np)
        mini, maxi = np.quantile(array_np,0.01),np.quantile(array_np,0.99)
        mini_, maxi_ = np.quantile(array_np,0.2),np.quantile(array_np,0.8)
        return [mini, maxi], np.std(array_np[(array_np >= mini_) & (array_np <= maxi_)])
    elif group=='npf':
        print(feature,group,candidate)
        array_np = ak.to_numpy(df_npf_clip[feature][:,candidate])
        array_np = np.where(array_np == -999, 0, array_np)
        array_np = np.where(array_np ==   -1, 0, array_np)
        mini, maxi = np.quantile(array_np,0.01),np.quantile(array_np,0.99)
        mini_, maxi_ = np.quantile(array_np,0.2),np.quantile(array_np,0.8)
        return [mini, maxi], np.std(array_np[(array_np >= mini_) & (array_np <= maxi_)])
    elif group=='vtx':
        print(feature,group,candidate)
        array_np = ak.to_numpy(df_vtx_clip[feature][:,candidate])
        array_np = np.where(array_np == -999, 0, array_np)
        array_np = np.where(array_np ==   -1, 0, array_np)
        mini, maxi = np.quantile(array_np,0.01),np.quantile(array_np,0.99)
        mini_, maxi_ = np.quantile(array_np,0.2),np.quantile(array_np,0.8)
        return [mini, maxi], np.std(array_np[(array_np >= mini_) & (array_np <= maxi_)])
    elif group=='cpf_pts':
        print(feature,group,candidate)
        array_np = ak.to_numpy(df_cpf_pts_clip[feature][:,candidate])
        array_np = np.where(array_np == -999, 0, array_np)
        array_np = np.where(array_np ==   -1, 0, array_np)
        mini, maxi = np.quantile(array_np,0.01),np.quantile(array_np,0.99)
        mini_, maxi_ = np.quantile(array_np,0.2),np.quantile(array_np,0.8)
        return [mini, maxi], np.std(array_np[(array_np >= mini_) & (array_np <= maxi_)])
    elif group=='npf_pts':
        print(feature,group,candidate)
        array_np = ak.to_numpy(df_npf_pts_clip[feature][:,candidate])
        array_np = np.where(array_np == -999, 0, array_np)
        array_np = np.where(array_np ==   -1, 0, array_np)
        mini, maxi = np.quantile(array_np,0.01),np.quantile(array_np,0.99)
        mini_, maxi_ = np.quantile(array_np,0.2),np.quantile(array_np,0.8)
        return [mini, maxi], np.std(array_np[(array_np >= mini_) & (array_np <= maxi_)])
    elif group=='vtx_pts':
        print(feature,group,candidate)
        array_np = ak.to_numpy(df_vtx_pts_clip[feature][:,candidate])
        array_np = np.where(array_np == -999, 0, array_np)
        array_np = np.where(array_np ==   -1, 0, array_np)
        mini, maxi = np.quantile(array_np,0.01),np.quantile(array_np,0.99)
        mini_, maxi_ = np.quantile(array_np,0.2),np.quantile(array_np,0.8)
        return [mini, maxi], np.std(array_np[(array_np >= mini_) & (array_np <= maxi_)])

In [10]:
print(quantile_min_max('jet_pt','glob'))

jet_pt glob None
([15.444969825744629, 1535.6858300781278], 69.261475)


In [11]:
global_epsilons = np.zeros(len(global_branches))

global_standardized_epsilons = np.zeros(len(global_branches))

global_ranges = np.zeros((len(global_branches),2))

for (i,key) in enumerate(global_branches):
    range_inputs, standardized_epsilon = quantile_min_max(key,'glob')
    scale_epsilon = (range_inputs[1] - range_inputs[0])/2
    global_epsilons[i] = scale_epsilon
    global_standardized_epsilons[i] = standardized_epsilon
    global_ranges[i] = range_inputs
    print(range_inputs, scale_epsilon, standardized_epsilon)

jet_pt glob None
[15.444969825744629, 1535.6858300781278] 760.1204301261915 69.261475
jet_eta glob None
[-2.3910172843933104, 2.3943071389198303] 2.39266221165657 0.62393546
nCpfcand glob None
[1.0, 36.0] 17.5 2.8966672
nNpfcand glob None
[0.0, 20.0] 10.0 2.1228364
nsv glob None
[0.0, 4.0] 2.0 0.48071712
npv glob None
[9.0, 48.0] 19.5 4.1257358
TagVarCSV_trackSumJetEtRatio glob None
[0.0, 0.9856463485956194] 0.4928231742978097 0.10858201
TagVarCSV_trackSumJetDeltaR glob None
[0.0, 2.6554229521751425] 1.3277114760875712 0.015905594
TagVarCSV_vertexCategory glob None
[0.0, 2.0] 1.0 0.92754763
TagVarCSV_trackSip2dValAboveCharm glob None
[-0.014902344904839993, 0.036601562052965164] 0.02575195347890258 0.00093595526
TagVarCSV_trackSip2dSigAboveCharm glob None
[-2.80285924911499, 12.758635606765749] 7.780747427940369 0.35713342
TagVarCSV_trackSip3dValAboveCharm glob None
[-0.029931544382125138, 0.06389331802725805] 0.046912431204691594 0.0019247151
TagVarCSV_trackSip3dSigAboveCharm glob Non

In [13]:
cpf_epsilons = np.zeros((len(cpf_branches),25))
cpf_standardized_epsilons = np.zeros((len(cpf_branches),25))
cpf_ranges = np.zeros((len(cpf_branches),25, 2))
for (i,key) in enumerate(cpf_branches):
    for cand in range(25):
        range_inputs, standardized_epsilon = quantile_min_max(key,'cpf',cand)
        scale_epsilon = (range_inputs[1] - range_inputs[0])/2
        cpf_epsilons[i,cand] = scale_epsilon
        cpf_standardized_epsilons[i,cand] = standardized_epsilon
        cpf_ranges[i,cand] = range_inputs
        print(range_inputs, scale_epsilon, standardized_epsilon)

Cpfcan_BtagPf_trackEtaRel cpf 0
[1.617732959985733, 6.018733162879944] 2.2005001014471057 0.44647524
Cpfcan_BtagPf_trackEtaRel cpf 1
[0.0, 6.157034592628479] 3.0785172963142395 0.4810523
Cpfcan_BtagPf_trackEtaRel cpf 2
[0.0, 6.215461888313295] 3.1077309441566476 0.51979274
Cpfcan_BtagPf_trackEtaRel cpf 3
[0.0, 6.238176612854007] 3.1190883064270034 0.57858634
Cpfcan_BtagPf_trackEtaRel cpf 4
[0.0, 6.228526821136475] 3.1142634105682374 0.9895551
Cpfcan_BtagPf_trackEtaRel cpf 5
[0.0, 6.210646243095399] 3.1053231215476993 1.3429825
Cpfcan_BtagPf_trackEtaRel cpf 6
[0.0, 6.171495952606201] 3.0857479763031006 1.4590695
Cpfcan_BtagPf_trackEtaRel cpf 7
[0.0, 6.107757148742677] 3.0538785743713386 1.3531426
Cpfcan_BtagPf_trackEtaRel cpf 8
[0.0, 6.033162083625794] 3.016581041812897 1.2337493
Cpfcan_BtagPf_trackEtaRel cpf 9
[0.0, 5.941444015502931] 2.9707220077514656 1.0340155
Cpfcan_BtagPf_trackEtaRel cpf 10
[0.0, 6.133085017204286] 3.066542508602143 1.3804989
Cpfcan_BtagPf_trackEtaRel cpf 11
[0.0,

In [14]:
npf_epsilons = np.zeros((len(npf_branches),25))
npf_standardized_epsilons = np.zeros((len(npf_branches),25))
npf_ranges = np.zeros((len(npf_branches),25, 2))
for (i,key) in enumerate(npf_branches):
    for cand in range(25):
        range_inputs, standardized_epsilon = quantile_min_max(key,'npf',cand)
        scale_epsilon = (range_inputs[1] - range_inputs[0])/2
        npf_epsilons[i,cand] = scale_epsilon
        npf_standardized_epsilons[i,cand] = standardized_epsilon
        npf_ranges[i,cand] = range_inputs
        print(range_inputs, scale_epsilon, standardized_epsilon)

Npfcan_ptrel npf 0
[-0.996482680439949, 6.301948460639793e-40] 0.4982413402199745 0.068871714
Npfcan_ptrel npf 1
[-0.9978387385606766, 1.4778955123065394e-39] 0.4989193692803383 0.038604733
Npfcan_ptrel npf 2
[-0.9984986788034439, 1.9080751532600672e-39] 0.49924933940172195 0.36749643
Npfcan_ptrel npf 3
[-0.9986718302965164, 2.3901067330033425e-39] 0.4993359151482582 0.44315353
Npfcan_ptrel npf 4
[-0.9987648129463196, 2.995222316243545e-39] 0.4993824064731598 0.48076004
Npfcan_ptrel npf 5
[-0.9988119608163833, 3.055681408551762e-39] 0.4994059804081917 0.4653427
Npfcan_ptrel npf 6
[-0.9988495117425918, 3.07510885631833e-39] 0.4994247558712959 0.4344789
Npfcan_ptrel npf 7
[-0.998893501162529, 3.07510885631833e-39] 0.4994467505812645 0.43580008
Npfcan_ptrel npf 8
[-0.9989380246400833, 3.07510885631833e-39] 0.49946901232004165 0.43675894
Npfcan_ptrel npf 9
[-0.998963058590889, 3.07510885631833e-39] 0.4994815292954445 0.43729192
Npfcan_ptrel npf 10
[-0.99898004591465, 3.07510885631833e-39] 

In [15]:
vtx_epsilons = np.zeros((len(vtx_branches),5))
vtx_standardized_epsilons = np.zeros((len(vtx_branches),5))
vtx_ranges = np.zeros((len(vtx_branches),5, 2))
for (i,key) in enumerate(vtx_branches):
    for cand in range(5):
        range_inputs, standardized_epsilon = quantile_min_max(key,'vtx',cand)
        scale_epsilon = (range_inputs[1] - range_inputs[0])/2
        vtx_epsilons[i,cand] = scale_epsilon
        vtx_standardized_epsilons[i,cand] = standardized_epsilon
        vtx_ranges[i,cand] = range_inputs
        print(range_inputs, scale_epsilon, standardized_epsilon)

sv_pt vtx 0
[0.0, 269.856730957032] 134.928365478516 5.4534616
sv_pt vtx 1
[0.0, 281.2003521728516] 140.6001760864258 5.6200776
sv_pt vtx 2
[0.0, 286.32098419189475] 143.16049209594738 5.6988254
sv_pt vtx 3
[0.0, 287.81987030029313] 143.90993515014657 5.7361717
sv_pt vtx 4
[0.0, 288.587514038086] 144.293757019043 5.742994
sv_deltaR vtx 0
[-0.4964105808734894, 0.9960784316062927] 0.746244506239891 0.18949272
sv_deltaR vtx 1
[-0.49653978377580643, 0.9960784316062927] 0.7463091076910495 0.18948218
sv_deltaR vtx 2
[-0.49655191361904144, 0.9960784316062927] 0.7463151726126671 0.18931955
sv_deltaR vtx 3
[-0.49655530124902725, 0.9960784316062927] 0.74631686642766 0.18927951
sv_deltaR vtx 4
[-0.49655975311994555, 0.9960784316062927] 0.7463190923631191 0.18926302
sv_mass vtx 0
[0.0, 9.330854740142827] 4.665427370071414 0.37618878
sv_mass vtx 1
[0.0, 10.927909107208267] 5.463954553604133 0.403379
sv_mass vtx 2
[0.0, 11.51501242637637] 5.757506213188185 0.41291994
sv_mass vtx 3
[0.0, 11.724386806

In [None]:
np.save('/afs/cern.ch/user/h/heschone/private/DeepJet/epsilons/NEW_global_epsilons.npy',global_epsilons)
np.save('/eos/user/a/anstein/public/DeepJet/Train_ParT/auxiliary/global_standardized_epsilons.npy',global_standardized_epsilons)
np.save('/eos/user/a/anstein/public/DeepJet/Train_ParT/auxiliary/global_ranges.npy',global_ranges)
np.save('/eos/user/a/anstein/public/DeepJet/Train_ParT/auxiliary/cpf_epsilons.npy',cpf_epsilons)
np.save('/eos/user/a/anstein/public/DeepJet/Train_ParT/auxiliary/cpf_standardized_epsilons.npy',cpf_standardized_epsilons)
np.save('/eos/user/a/anstein/public/DeepJet/Train_ParT/auxiliary/cpf_ranges.npy',cpf_ranges)
np.save('/eos/user/a/anstein/public/DeepJet/Train_ParT/auxiliary/npf_epsilons.npy',npf_epsilons)
np.save('/eos/user/a/anstein/public/DeepJet/Train_ParT/auxiliary/npf_standardized_epsilons.npy',npf_standardized_epsilons)
np.save('/eos/user/a/anstein/public/DeepJet/Train_ParT/auxiliary/npf_ranges.npy',npf_ranges)
np.save('/eos/user/a/anstein/public/DeepJet/Train_ParT/auxiliary/vtx_epsilons.npy',vtx_epsilons)
np.save('/eos/user/a/anstein/public/DeepJet/Train_ParT/auxiliary/vtx_standardized_epsilons.npy',vtx_standardized_epsilons)
np.save('/eos/user/a/anstein/public/DeepJet/Train_ParT/auxiliary/vtx_ranges.npy',vtx_ranges)