In [27]:
import numpy as np
import matplotlib.pyplot as plt
from matplotlib import cm
from matplotlib.colors import Normalize
import uproot as ur
import awkward as ak
import time as t
import copy
print("Awkward Array Version: "+str(ak.__version__))
print("Uproot Version: "+str(ur.__version__))

import energyflow as ef
from energyflow.archs import EFN
from energyflow.archs import PFN
# from energyflow.datasets import qg_jets
from energyflow.utils import data_split, to_categorical

from sklearn.metrics import roc_auc_score, roc_curve

Awkward Array Version: 1.7.0
Uproot Version: 4.1.9


In [28]:
import sys
path = '/Work/EPE/ML4pi'
sys.path.append(path)
from util import graph_util as gu

In [29]:
def find_sampling_events(_ak_cluster_cell_ID, _cellGeoID, _cellGeoSampling, _filter):
    ''' Inputs:
    _ak_cluster_cell_ID: awkward array with nested arrays of all the cell geometry IDs
    _cellGeoID: list of the cell IDs to find the index from, expects numpy format
    _cellGeoSampling: same as _cellGeoID but the sampling layer
    _filter: the numbered index of calorimetry layers
    Returns:
    an array of event indices with clusters containing barrel events
    '''

    # create unique list of sampling layers for every event
    _event_layers = []
    # list of event indices with one or more clusters in EMB1-3 or TileBar0-2
    _barrel_events = []
    
    for _evt_idx in range(len(_ak_cluster_cell_ID)):
        _cluster_cell_ID = ak_cluster_cell_ID["cluster_cell_ID"][_evt_idx]

        # create unique list of sampling layers for every cluster
        _event_layers = []

        for j in range(len(_cluster_cell_ID)):
            ''' loop through each cluster of the event '''

            for l in range(len(_cluster_cell_ID[j])):
                ''' loop through the cluster elements searching for new layer numbers'''
                # grab the first cell_geo_ID, l-th element of the j-th cluster
                _cellGeoTag = _cluster_cell_ID[j][l]

                # find the index of the cell_geo_ID, use [0][0] becuase of np.where return
                _cellGeo_idx = np.argwhere(np.asarray(_cellGeoID == _cellGeoTag))

                # find the sampling layer based on the index
                _this_layer = _cellGeoSampling[_cellGeo_idx]

                if not np.isin(_this_layer, _event_layers):
                    _event_layers.append(_this_layer)      

        # add event to list if one is in the barrel layer
        if np.any(np.isin(_event_layers, _filter)):
            _barrel_events.append(_evt_idx)

    return np.array(_barrel_events)


def center_cluster():
    ''' Expects a single cluster with a single cluster center '''
    return None

def center_clusters():
    ''' Expects multiple clusters and creates an imshow using the average of the cluster centers'''
    return None

def cluster_to_imshow():
    ''' Takes a cluster and converts to a python imshow '''
    return None

In [30]:
cPionEvents = path + '/atlas_images/pipm/'
nPionEvents = path + '/atlas_images/pi0/'

In [31]:
cPion_502 = ur.open(cPionEvents+'user.angerami.24559744.OutputStream._000502.root')
nPion_502 = ur.open(nPionEvents+'user.angerami.24559740.OutputStream._000502.root')

In [32]:
cPion_502.items()

[('EventTree;1', <TTree 'EventTree' (125 branches) at 0x02025f72b670>),
 ('CellGeo;1', <TTree 'CellGeo' (9 branches) at 0x020443749a60>)]

In [33]:
cPionEvents = cPion_502["EventTree"]
nPionEvents = nPion_502["EventTree"]

In [34]:
cellGeoTree = cPion_502["CellGeo"]

Load&Show Tree

In [35]:
# Show keys
cPionEvents.show(name_width=42,
                interpretation_width=60)

## Event information
## Charged Pions
ak_cluster_nCells = cPionEvents.arrays(filter_name="cluster_nCells")
ak_cluster_cell_ID = cPionEvents.arrays(filter_name="cluster_cell_ID")
ak_cluster_cell_E = cPionEvents.arrays(filter_name="cluster_cell_E")
nCluster = cPionEvents.arrays(library='np')["nCluster"]
eventNumber = cPionEvents.arrays(library='np')["eventNumber"]
nTrack = cPionEvents.arrays(library='np')["nTrack"]
cPionPart = cPionEvents.arrays(library='np')["nTruthPart"]
cPionId = cPionEvents.arrays(library='np')["truthPartPdgId"]
ak_cluster_Eta = cPionEvents.arrays(filter_name="cluster_Eta")
ak_cluster_Phi = cPionEvents.arrays(filter_name="cluster_Phi")

name                                       | typename                 | interpretation                                              
-------------------------------------------+--------------------------+-------------------------------------------------------------
runNumber                                  | int32_t                  | AsDtype('>i4')
eventNumber                                | int64_t                  | AsDtype('>i8')
lumiBlock                                  | int32_t                  | AsDtype('>i4')
coreFlags                                  | uint32_t                 | AsDtype('>u4')
mcEventNumber                              | int32_t                  | AsDtype('>i4')
mcChannelNumber                            | int32_t                  | AsDtype('>i4')
mcEventWeight                              | float                    | AsDtype('>f4')
nTruthPart                                 | int32_t                  | AsDtype('>i4')
G4PreCalo_n_EM                        

In [36]:
print(len(ak_cluster_cell_ID["cluster_cell_ID"]))

20000


In [37]:
## Neutral Pions
nPionPart = nPionEvents.arrays(library='np')["nTruthPart"]
nPionId = nPionEvents.arrays(library='np')["truthPartPdgId"]

print("If single particle gun, then this should all be ones")
print("Charged Pions: ")
print(cPionPart[0:50])
print("Neutral Pions: ")
print(nPionPart[0:50]);print('.. pew pew ..'); print()

print("Check for particle data group identification of pi plus or pi minus")
print(cPionId[0:5]);print()
print("Check for particle data group identification of pi0")
print(nPionId[0:5])

If single particle gun, then this should all be ones
Charged Pions: 
[1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
 1 1 1 1 1 1 1 1 1 1 1 1 1]
Neutral Pions: 
[3 3 3 3 3 3 3 3 3 4 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3
 3 3 3 3 3 3 3 3 3 3 3 3 3]
.. pew pew ..

Check for particle data group identification of pi plus or pi minus
[array([-211]) array([211]) array([211]) array([211]) array([-211])]

Check for particle data group identification of pi0
[array([111,  22,  22]) array([111,  22,  22]) array([111,  22,  22])
 array([111,  22,  22]) array([111,  22,  22])]


In [38]:
# cell geometry
cellGeoTree.show(name_width=42,
                interpretation_width=50)

name                                       | typename                 | interpretation                                    
-------------------------------------------+--------------------------+---------------------------------------------------
cell_geo_ID                                | std::vector<uint64_t>    | AsJagged(AsDtype('>u8'), header_bytes=10)
cell_geo_sampling                          | std::vector<uint16_t>    | AsJagged(AsDtype('>u2'), header_bytes=10)
cell_geo_eta                               | std::vector<float>       | AsJagged(AsDtype('>f4'), header_bytes=10)
cell_geo_phi                               | std::vector<float>       | AsJagged(AsDtype('>f4'), header_bytes=10)
cell_geo_rPerp                             | std::vector<float>       | AsJagged(AsDtype('>f4'), header_bytes=10)
cell_geo_deta                              | std::vector<float>       | AsJagged(AsDtype('>f4'), header_bytes=10)
cell_geo_dphi                              | std::vector<float>       

In [39]:
cell_geo_ID = cellGeoTree.arrays(filter_name='cell_geo_ID')[0]["cell_geo_ID"].to_numpy()
cell_geo_eta = cellGeoTree.arrays(filter_name='cell_geo_eta')[0]["cell_geo_eta"].to_numpy()
cell_geo_phi = cellGeoTree.arrays(filter_name='cell_geo_phi')[0]["cell_geo_phi"].to_numpy()
cell_geo_rPerp = cellGeoTree.arrays(filter_name='cell_geo_rPerp')[0]["cell_geo_rPerp"].to_numpy()
cell_geo_deta = cellGeoTree.arrays(filter_name='cell_geo_deta')[0]["cell_geo_deta"].to_numpy()
cell_geo_dphi = cellGeoTree.arrays(filter_name='cell_geo_dphi')[0]["cell_geo_dphi"].to_numpy()
cell_geo_sampling = cellGeoTree.arrays(filter_name='cell_geo_sampling')[0]["cell_geo_sampling"].to_numpy()

In [40]:
## Find these events for neutral pions
barrel_filter = np.array([1,2,3,12,13,14])

barrel_events = find_sampling_events(_ak_cluster_cell_ID = ak_cluster_cell_ID,
                                     _cellGeoID = cell_geo_ID, _cellGeoSampling = cell_geo_sampling,
                                     _filter = barrel_filter)
print(barrel_events[0:10])

[ 0  2  3  6  7 11 14 20 22 26]


In [41]:
max_cluster_num = 0

total_events = len(barrel_events)
my_event = 0
print_progress = False
ts = t.time()

# search through all the barrel events
for _evt in barrel_events:
    _clusters = ak_cluster_cell_ID["cluster_cell_ID"][_evt]
    
    my_event += 1
    _completion = my_event/total_events
    if print_progress == False and _completion >= .1:
        print_progress = True
        t_10 = t.time()
        print("10% complete in "+str(t_10 - ts)+" (s)"); print()
    
    for _clust_idx in range(nCluster[_evt]):
        _num_in_clust = len(_clusters[_clust_idx])
        if _num_in_clust > max_cluster_num:
            max_cluster_num = _num_in_clust

print('Maximum number of cells in a cluster: '+str(max_cluster_num))

10% complete in 0.23941946029663086 (s)

Maximum number of cells in a cluster: 726


In [42]:
num_central_clusters = 0

for _evt in barrel_events:
    
    for _clust_idx in range(nCluster[_evt]):
        _clust_Eta = ak_cluster_Eta["cluster_Eta"][_evt][_clust_idx]
        
        if np.abs(_clust_Eta) < .7:
            num_central_clusters += 1
        
print(num_central_clusters)

8909


In [43]:
## Just as in the Pflow example we have a three dimensional numpy arrays with
## empty values set as a flag
X = np.zeros( (num_central_clusters, max_cluster_num, 4) )

In [44]:
def find_central_clusters(_numClusters, _ak_cluster_Eta, _events):
    ''' Goal: return an array of indices for awkward array where cells have cluster centers 
    with an eta < .7
    Inputs:
    _numClusters: a numpy array with the number of clusters in event order
    _ak_cluster_Eta: an awkward array holding the cluster eta information
    _events: a refined list of events to search though (can be all events)
    Returns:
    _central_clusters: a list of tuples containing the awkward array coordinates of central
        clusters
    '''
    
    # Empty list to concatenate array coordinates to as tuples
    _central_clusters = []
    
    for _evt_idx in _events:
        for _clust_idx in range(_numClusters[_evt]):
#             print("event "+str(_evt_idx))
#             print("number of clusters: "+str(_numClusters[_evt]))
            
            _clust_Eta = _ak_cluster_Eta["cluster_Eta"][_evt][_clust_idx]
            
            if _clust_Eta > -.7 and _clust_Eta < .7:
                _central_clusters.append((_evt_idx, _clust_idx))

    return _central_clusters

central_clusters = find_central_clusters(_numClusters=nCluster, _ak_cluster_Eta=ak_cluster_Eta,
                                         _events=barrel_events)
print(central_clusters[0:10])

[(0, 0), (2, 0), (3, 0), (6, 0), (7, 0), (11, 0), (14, 0), (20, 0), (22, 0), (26, 0)]


In [45]:
# TO DO: Seems strange that seemingly none of the barrel events have multiple central clusters with abs(eta) < .7
# investigate this further
## Still working on taking values from events and slicing them into X :-(
for i in range(len(central_clusters)):
    # unpack variables for n'th event m'th cluster
    n, m = central_clusters[i]
    _cluster_cell_ID = ak_cluster_cell_ID["cluster_cell_ID"][n][m]
    
    for j in range(len(_cluster_cell_ID)):
        # j'th element of the m'th cluster
        _cellGeoTag = _cluster_cell_ID[j]

        # find the index of the cell_geo_ID, use [0][0] becuase of np.where return
        _cellGeo_idx = np.argwhere(np.asarray(cell_geo_ID == _cellGeoTag))
        _Eta = cell_geo_eta[_cellGeo_idx][0][0]
        _Phi = cell_geo_phi[_cellGeo_idx][0][0]
        _E = ak_cluster_cell_E["cluster_cell_E"][n][m][j]
        PID = 1 ## Fix this to proper PDGID
        
        _array_vals = np.array([_Eta, _Phi, _E, PID])
        
        X[i,j,:] = _array_vals

In [46]:
print(X[0,0:5,:])

[[-1.46141696 -2.92829227  0.19028817  1.        ]
 [-1.4614526  -2.95282793  0.00604178  1.        ]
 [-1.43639922 -2.92829227  0.02199198  1.        ]
 [-1.43643463 -2.95282793  0.02094988  1.        ]
 [-1.46131432 -2.89142489  0.13298044  1.        ]]


In [47]:
X.shape

(8909, 726, 4)

In [48]:
import tensorflow as tf

In [49]:
print('TensorFlow version: '+str(tf.__version__));print()
gpus = tf.config.list_physical_devices('GPU')
for gpu in gpus:
    print("Name:", gpu.name, "  Type:", gpu.device_type)

gpu_list = ["/gpu:4"]
strategy = tf.distribute.MirroredStrategy(devices=gpu_list)
ngpu = strategy.num_replicas_in_sync
print ('Number of devices: {}'.format(ngpu))

TensorFlow version: 2.6.0

Name: /physical_device:GPU:0   Type: GPU
INFO:tensorflow:Using MirroredStrategy with devices ('/job:localhost/replica:0/task:0/device:GPU:4',)
Number of devices: 1
