In [1]:
import numpy as np
import pandas as pd
from matplotlib import pyplot as plt
import awkward as ak
import uproot
from tqdm.auto import tqdm

In [4]:
features = ['trackBTag_DeltaR',
            'trackBTag_Eta',
            'trackBTag_EtaRel',
            'trackBTag_JetDistVal',
            'trackBTag_Momentum',
            'trackBTag_PPar',
            'trackBTag_PParRatio',
            'trackBTag_PtRatio',
            'trackBTag_PtRel',
            'trackBTag_Sip2dSig',
            'trackBTag_Sip2dVal',
            'trackBTag_Sip3dSig',
            'trackBTag_Sip3dVal',
            'track_charge',
            'track_deltaR',
            'track_drminsv',
            'track_drsubjet1',
            'track_drsubjet2',
            'track_dxy',
            'track_dxysig',
            'track_dz',
            'track_dzsig',
            'track_erel',
            'track_etarel',
            'track_mass',
            'track_phirel',
            'track_pt',
            'track_ptrel']

In [144]:
def return_sorted_3D_array(path, num_of_tracks = 10, sort_by = 'trackBTag_Momentum'):
    
    file = uproot.open(path)
    events = file['deepntuplizer/tree']
    max_length  = np.max(events['n_tracks'])
    arrays = events.arrays(features, library='np')
    padded_momenta = np.array([np.pad(v, (0, max_length - len(v)), 'constant') for v in arrays[sort_by]])
    indices = np.flip(np.argsort(padded_momenta),axis=1)
    row_idx = np.repeat(np.arange(indices.shape[0]),indices.shape[1]).reshape(indices.shape)
    
    sorted_arrays = {}

    for key, arr in tqdm(arrays.items()):
        padded = np.array([np.pad(v, (0, max_length - len(v)), 'constant') for v in arr])
        sort = padded[row_idx,indices][:,:num_of_tracks]
        sorted_arrays[key] = sort
    
    stacked_3d_array = np.stack(list(sorted_arrays.values()),axis=1)
    
    return stacked_3d_array

In [8]:
file = uproot.open("root_files/ntuple_merged_0.root")
events = file['deepntuplizer/tree']

In [9]:
max_length  = np.max(events['n_tracks'])

In [10]:
arrays = events.arrays(features, library='np')

In [11]:
padded = np.array([np.pad(v, (0, max_length - len(v)), 'constant') for v in arrays['trackBTag_Momentum']])

In [12]:
indices = np.flip(np.argsort(padded),axis=1)

In [13]:
row_idx = np.repeat(np.arange(indices.shape[0]),indices.shape[1]).reshape(indices.shape)
sort = padded[row_idx,indices]

In [14]:
sorted_arrays = {}

for key, arr in tqdm(arrays.items()):
    padded = np.array([np.pad(v, (0, max_length - len(v)), 'constant') for v in arr])
    sort = padded[row_idx,indices][:,:10]
    sorted_arrays[key] = sort

  0%|          | 0/28 [00:00<?, ?it/s]

In [15]:
X_train = np.stack(list(sorted_arrays.values()),axis=1)

In [121]:
X_train[54,4,:]

array([177.1975   , 176.45265  ,  71.62764  ,  67.66158  ,  46.1704   ,
        39.87363  ,  33.209717 ,  26.0824   ,  21.016676 ,  19.867414 ,
        19.121513 ,  18.676933 ,  18.667404 ,  16.606451 ,  14.34936  ,
         5.9331737,   5.1374288,   4.3219485,   3.9893363,   2.3932638],
      dtype=float32)

In [24]:
sorted_arrays['trackBTag_Momentum'][54]

array([177.1975  , 176.45265 ,  71.62764 ,  67.66158 ,  46.1704  ,
        39.87363 ,  33.209717,  26.0824  ,  21.016676,  19.867414],
      dtype=float32)

In [26]:
np.sort(arrays['trackBTag_Momentum'][54])[::-1]

array([177.1975   , 176.45265  ,  71.62764  ,  67.66158  ,  46.1704   ,
        39.87363  ,  33.209717 ,  26.0824   ,  21.016676 ,  19.867414 ,
        19.121513 ,  18.676933 ,  18.667404 ,  16.606451 ,  14.34936  ,
         5.9331737,   5.1374288,   4.3219485,   3.9893363,   2.3932638,
         2.1719003,   1.7924364,   1.6737804,   1.4612821,   1.4068244,
         1.4035587,   1.2309704,   1.0898428,   0.9976875], dtype=float32)

In [27]:
X_train[:,2,:][54]

array([5.831503 , 7.641682 , 4.215587 , 5.4569993, 5.468007 , 3.8082256,
       4.04375  , 5.124672 , 4.5758867, 3.2280526], dtype=float32)

In [40]:
arrays[features[2]][54][16]

3.2280526

In [29]:
indices[54]

array([  6,  24,  11,   9,   5,  19,  15,  23,   8,  16,  10,  22,  17,
        27,  25,  14,  13,  28,  12,  18,  21,   3,   4,   2,  20,  26,
         7,   0,   1,  45,  47,  48,  49,  50,  51,  52,  53,  54,  55,
        56,  57,  58,  59,  60,  61,  46,  31,  44,  35,  29,  30,  63,
        32,  33,  34,  36,  43,  37,  38,  39,  40,  41,  42,  62, 128,
       127,  65,  98,  99, 100, 101, 102, 103, 104, 105, 106, 107, 108,
       109, 110, 111, 112, 113, 114, 115, 116, 117, 118, 119, 120, 121,
       122, 123, 124, 125, 126,  97,  96,  95,  79,  66,  67,  68,  69,
        70,  71,  72,  73,  74,  75,  76,  77,  78,  80,  94,  81,  82,
        83,  84,  85,  86,  87,  88,  89,  90,  91,  92,  93,  64])

In [46]:
%%time 
T3darray = return_sorted_3D_array("root_files/ntuple_merged_0.root")

  0%|          | 0/28 [00:00<?, ?it/s]

CPU times: user 55.1 s, sys: 1.76 s, total: 56.9 s
Wall time: 56.9 s


In [120]:
T3darray[54,2,:]

array([5.831503 , 7.641682 , 4.215587 , 5.4569993, 5.468007 , 3.8082256,
       4.04375  , 5.124672 , 4.5758867, 3.2280526], dtype=float32)

In [69]:
np.sum(T3darray==0)

797548

In [72]:
np.prod(T3darray.shape)

56000000

In [85]:
T3darray[846]==0

array([[False, False, False, False, False, False, False, False, False,
        False],
       [False, False, False, False, False, False, False, False, False,
        False],
       [False, False, False, False, False, False, False, False, False,
        False],
       [False, False, False, False, False, False, False, False, False,
        False],
       [False, False, False, False, False, False, False, False, False,
        False],
       [False, False, False, False, False, False, False, False, False,
        False],
       [False, False, False, False, False, False, False, False, False,
        False],
       [False, False, False, False, False, False, False, False, False,
        False],
       [False, False, False, False, False, False, False, False, False,
        False],
       [False, False, False, False, False, False, False, False, False,
        False],
       [False, False, False, False, False, False, False, False, False,
        False],
       [False, False, False, False, False, 

In [143]:
counter = 0
for num,i in enumerate(T3darray):
    if np.all(i==0) == True:
        counter +=1
        print(num)
        
print(counter)

2060
4512
23663
27692
30109
34938
37344
52464
57695
101570
107996
117810
117857
139298
160531
175012
178422
182459
197147
19


In [115]:
for num,i in enumerate(events['ntracks'].array()):
    if i==0:
        print(num)

2060
4512
23663
27692
30109
34938
37344
52464
57695
101570
107996
117810
117857
139298
160531
175012
178422
182459
197147


In [145]:
%%time 
T3darray = return_sorted_3D_array("root_files/ntuple_merged_0.root")

  0%|          | 0/28 [00:00<?, ?it/s]

CPU times: user 55.2 s, sys: 3.08 s, total: 58.3 s
Wall time: 59.2 s
