In [None]:
#!pip install numpy uproot

In [None]:
import awkward as ak
import numpy as np
import uproot as uproot

In [None]:
file = uproot.open('merged.root')
file.keys()

In [None]:
events = file['lCToTSTsAssoc/lCTo3simTS_tree']
events.show()

## Input Features

 - `event`: the index of the events in the file
 
Each event contains a varying number of point. For each point:
 - `pos_x`, `pos_y`, `pos_z`: (x,y,z) coordinates of the point
 - `time`: time of the point (not all points have timing information -- a value of -99 indicates no timing information)
 - `eta`, `phi`: [pseudorapidity](https://en.wikipedia.org/wiki/Pseudorapidity), azimuth angle of the point
 - `layer`: layer number of the point
 - `energy`: energy of the point

**[Note]** `simTst_idx` and `enFraction` are the truth labels.

These features are [JaggedArray](https://github.com/scikit-hep/awkward-0.x). This can be thought of as an array of arrays, where the first level of indices correspond to the event indices, and the second level correspond to the LCs within each event.

In [None]:
# To get the total number of events in the file
totEvents = len(events['event'].array())
totEvents

In [None]:
# To get the information of all points from all events in the file
id = events['id'].array()
pos_x = events['pos_x'].array()
pos_y = events['pos_y'].array()
pos_z = events['pos_z'].array()
energy = events['energy'].array()
eta = events['eta'].array()
phi = events['phi'].array()


In [None]:
# To access energy of all points from the first event: this returns a regular numpy array
energy[0]

In [None]:
# To get the number of points in each event
nPoints_perEv = ak.num(energy)
nPoints_perEv

In [None]:
# to pad/truncate to a regular 2D array
ak.pad_none(energy, 2, clip=True)

In [None]:
# To flatten the jagged array to a 1d array
ak.flatten(energy)

In [None]:
# To get the total number of points within the events
len(ak.flatten(energy))

### Truth definition

The target is to assign each point to up to 3 particle showers - aka Tracksters - sorted by the higher fraction of point's energy contained. The truth information is as follows:

 - `simTst_idx`: index of the (up to 3) showers associated with the point at hand; if the point is associated to less than 3 showers, the remaining array elements are -1;
 - `enFraction`: the fraction of the point's energy that should be assigned to each of the (up to 3) showers.

In [None]:
simTst_idx = events['simTst_idx'].array()
simTst_idx[0]

In [None]:
enFraction = events['enFraction'].array()
enFraction[0]

## Pattern Recognition

An example of pattern recognition is provided. 
In first approximation it looks like points belonging to the same particle have very similar eta and phi coordinates. 
Maybe we can just create a big 2D tiling structure, and cluster together all those points that lie in the same tile?

In [None]:
points = list()

for evt in range(totEvents):
    points.append(np.array(list(zip(id[evt],pos_x[evt],pos_y[evt],pos_z[evt],eta[evt],phi[evt],energy[evt]))))
    


In [None]:
from math import pi

# points never exceed 3.2 in eta
maxEta = 3.2
# points are never smaller than 1.5 in eta
minEta = 1.5
etaRange = maxEta - minEta
nEtaBins = 20
nPhiBins = 64
# let's linearize the 2-D tiling
nBins = nEtaBins * nPhiBins

etaBinSize = etaRange/nEtaBins
# phi goes from -pi to pi, so phiRange would be 2*pi
phiBinSize = 2*pi/nPhiBins

In [None]:
def clamp(num, min_value, max_value):
    """Clamps num between two boundaries, min_value and max_value."""
    return int(max(min(num, max_value), min_value))

def etaBin(eta):
    bin = (eta - minEta) / etaBinSize
    return clamp(bin,0,nEtaBins-1)
    

def phiBin (phi) : 
    bin =  (phi + pi) / phiBinSize
    return clamp(bin,0,nPhiBins-1)


def globalBin(eta, phi):
    return phiBin(phi) + etaBin(eta) * nPhiBins

output_idx  = list()
output_enFr = list()
allTracksters = list()
#for evt in range(totEvents):
totEvents = 20
for evt in range(totEvents):
    # the histogram contains a list of point ids per bin
    histogram = list()
    for i in range(nBins):
        histogram.append(list())
    tracksters = list()
    for p in points[evt] :
        # given eta and phi of a point, find the bin in the histogram 
        binId = globalBin(p[4],p[5])
        bin = histogram[binId]
        # append the id of the point to the list of the bin
        bin.append(int(p[0]))
    # now that we have filled the histogram,
    # check the bins that contain at least 5 points
    # and create a trackster with those ids
    for bin in histogram :
        tmpTrackster = []
        for point_id in bin:
            tmpTrackster.append(point_id)
        if len(tmpTrackster)>0:
            tracksters.append(tmpTrackster)
    
    # create output lists 
    # for every point we want as a result the three top tracksters it contributed to
    output_idx_tmp = [[-1, -1, -1] for point in range(len(points[evt]))]
    # and the fraction of the energy that was contributed to it
    output_enFr_tmp = [[0.0, 0.0, 0.0] for point in range(len(points[evt]))]

    for tracksterId in range(len(tracksters)):
    #     print("trackster: ", tracksterId, " contains ", tracksters[tracksterId])

        # I found at most only one trackster per point, which will get fraction 1.0
        for lcId in tracksters[tracksterId]:
            output_idx_tmp[lcId][0] = tracksterId
            output_enFr_tmp[lcId][0] = 1.0
    # for lcId in output_idx_tmp:
    #     print("layercluster ", lcId, " associated to trackster : " , lcId[0])
    # Adding my results to my global output for validation
    output_idx.append(output_idx_tmp)
    output_enFr.append(output_enFr_tmp)
    allTracksters.append(tracksters)
    print(len(allTracksters))
          


In [None]:
# print (output_idx[0])

In [None]:
import matplotlib.pyplot as plt
from math import ceil
fig = plt.figure()
ax = fig.add_subplot(projection='3d')
eventToVisualize = 0
xs = ak.to_numpy(pos_x[eventToVisualize])
ys = ak.to_numpy(pos_y[eventToVisualize])
zs = ak.to_numpy(pos_z[eventToVisualize])
ax.scatter(xs, ys, zs, )
plt.show()

In [None]:

## initialize list to hold the LCs
## lc_from_sim[0]: all LCs corresponding to SimTrakster 0, 
## lc_from_sim[1]: all LCs corresponding to SimTrackster 1, etc...
lc_idx_from_sim = list()
lc_enFrac_from_sim = list()
maxElem=0

for i_evt in range(totEvents):
    for i_sim_trackster in simTst_idx[i_evt]: 
        tmp_maxElem = np.amax(i_sim_trackster,axis=0)
        if (tmp_maxElem>maxElem):
            maxElem=tmp_maxElem
    for i in range(maxElem+1):
        lc_idx_from_sim.append(list())
        lc_enFrac_from_sim.append(list())


numSimTracksters = 2 #len(lc_idx_from_sim)
        
simTracksters = list()
simTracksters_lcenergy = list()
for i in range(totEvents):
    simTracksters.append(list())
    simTracksters_lcenergy.append(list())

    
for i_evt in range(totEvents):

    
    simTracksters_tmp = list()
    for i in range(numSimTracksters):
        simTracksters_tmp.append(list())


    simTracksters_lcenergy_tmp = list()

    for i in range(numSimTracksters):
        simTracksters_lcenergy_tmp.append(list())


    simTracksterTotalEnergy=[0]*numSimTracksters

    eventId = i_evt
    numberOfLayerClustersInEvent = len(points[eventId])


    for lcId in range(numberOfLayerClustersInEvent):
        simTrackstersInLC = simTst_idx[eventId][lcId]
        energyFractionsOfLCinSimTrackster = enFraction[eventId][lcId]
        totalEnergyOfLayerCluster = energy[eventId][lcId]
        for i in range(3):
            if(simTrackstersInLC[i]!= -1):
                simTracksters_tmp[simTrackstersInLC[i]].append(lcId)
                simTracksters_lcenergy_tmp[simTrackstersInLC[i]].append(energyFractionsOfLCinSimTrackster[i]*totalEnergyOfLayerCluster)
                simTracksterTotalEnergy[simTrackstersInLC[i]] += energyFractionsOfLCinSimTrackster[i]*totalEnergyOfLayerCluster

    simTracksters[eventId]          = simTracksters_tmp
    simTracksters_lcenergy[eventId] = simTracksters_lcenergy_tmp

#print(simTracksters[9][1])
print(len(simTracksters[0]))
#print(simTracksters_lcenergy)
#print(simTracksterTotalEnergy)


In [None]:
## compare SIM and RECO Tracksters

def matchLCToSimTrackster(sim_tracksters,lc_from_reco_trackster):
    for lc_from_sim_trackster in sim_tracksters:
        if (lc_from_reco_trackster == lc_from_sim_trackster):
            return True


eff_num_0p50 = 0;
tot_tracksters = 0;
pur_total = [0 for i in range(totEvents)]

## loop over the events
#for i_evt in range(totEvents):
for i_evt in range(totEvents):
    
    eff_num_0p50_trackster = 0;
    tot_tracksters += len(allTracksters[i_evt])
    
    ## loop over the trackster of each event
    for i_reco_trackster in allTracksters[i_evt]:
    
        sumEn_reco       = [0 for i in range(len(allTracksters[i_evt]))]
        sumEn_reco_match = [0 for i in range(numSimTracksters)]

        for i_lc_reco in i_reco_trackster:
            tmp_lc_sum_en = 0
            tmp_simtrackster_indx = -1
        
            ## check if matched to 1st simtrackster
            for i_sim_trackster in range (numSimTracksters): 
            
                tmp_enfraction = enFraction[i_evt]
                tmp_lc_sum_en += energy[i_evt][i_lc_reco]*tmp_enfraction[i_lc_reco][0]
            
                if (matchLCToSimTrackster(simTracksters[i_evt][i_sim_trackster],i_lc_reco)):
        
                    tmp_lc_energy = energy[i_evt][i_lc_reco]*tmp_enfraction[i_lc_reco][0]
                    sumEn_reco_match[i_sim_trackster] += tmp_lc_energy
                    tmp_simtrackster_indx = i_sim_trackster
    
        if ( (tmp_simtrackster_indx!=-1) and 
             (sumEn_reco_match[tmp_simtrackster_indx]>0.5*simTracksterTotalEnergy[tmp_simtrackster_indx])):
            eff_num_0p50 += 1
            eff_num_0p50_trackster += 1
    
        sumEn_reco[i_sim_trackster] = tmp_lc_sum_en
            
    pur_total[i_evt] = sum(sumEn_reco)/len(sumEn_reco)
    print ("Mean trackster efficiency / evt = " , eff_num_0p50_trackster/len(allTracksters[i_evt]) )
    print ("Mean trackster purity / evt     = " , sum(sumEn_reco)/len(sumEn_reco) )

print (" ")
print ("Reco trackster efficiency = " , eff_num_0p50/tot_tracksters )
print ("Reco trackster purity     = " , sum(pur_total)/10 )
    

