In [1]:
import uproot
import numpy as np

In [2]:
#############################
# Let's read the file
#############################

fileName = '/Users/isobel/Desktop/DUNE/2024/Hierarchy/HigherTier/files/ccnutree_0'

inputFileName = fileName + '.root'

treeFile = uproot.open(inputFileName)
tree = treeFile['ccnuselection/ccnusel']
branches = tree.arrays()

In [3]:
#############################
# Get event-level stuff
#############################
event = np.array(branches['Event'])
subrun = np.array(branches['SubRun'])
run = np.array(branches['Run'])

recoNuVertexX = np.array(branches['RecoNuVtxX'])
recoNuVertexY = np.array(branches['RecoNuVtxY'])
recoNuVertexZ = np.array(branches['RecoNuVtxZ'])

#############################
# True information.. (for cheating)
#############################
pfpTruePDG_main = branches['RecoPFPTruePDG']
pfpTrueMomX_main = branches['RecoPFPTrueMomX']
pfpTrueMomY_main = branches['RecoPFPTrueMomY']
pfpTrueMomZ_main = branches['RecoPFPTrueMomZ']
    
#############################
# Get GNN node stuff 
#############################
trackShowerScore_main = branches['RecoPFPTrackShowerScore']
nHits_main = branches['RecoPFPRecoNHits']
charge_main = branches['RecoPFPRecoCharge']
vertexX_main = branches['RecoPFPRecoVertexX']
vertexY_main = branches['RecoPFPRecoVertexY']
vertexZ_main = branches['RecoPFPRecoVertexZ']
trackEndX_main = branches['RecoTrackRecoEndX']
trackEndY_main = branches['RecoTrackRecoEndY']
trackEndZ_main = branches['RecoTrackRecoEndZ']
showerDirX_main = branches['RecoShowerRecoDirX']  # not the best direction estimate, placeholder
showerDirY_main = branches['RecoShowerRecoDirY']
showerDirZ_main = branches['RecoShowerRecoDirZ']
ivysaurusMuon_main = branches['RecoPFPIvysaurusMuon']
ivysaurusProton_main = branches['RecoPFPIvysaurusProton']
ivysaurusPion_main = branches['RecoPFPIvysaurusPion']
ivysaurusElectron_main = branches['RecoPFPIvysaurusElectron']
ivysaurusPhoton_main = branches['RecoPFPIvysaurusPhoton']
trackLength_main = branches['RecoTrackLength']
displacement_main = branches['RecoShowerPandrizzleDisplacement']
dca_main = branches['RecoShowerPandrizzleDCA']
nuVertexEnergyAsymmetry_main = branches['RecoShowerPandrizzleMaxPostShowerStartNuVertexEnergyAsymmetry']
nuVertexEnergyWeightedMeanRadialDistance_main = branches['RecoShowerPandrizzleMaxPostShowerStartNuVertexEnergyWeightedMeanRadialDistance']

#############################
# Get higher tier link stuff
#############################
parentPFPIndices_main = branches['ParentPFPIndex']
childPFPIndices_main = branches['ChildPFPIndex']
parentTrackScore_main = branches['ParentTrackScore']
parentNuVertexSeparation_main = branches['ParentNuVertexSeparation']
childNuVertexSeparation_main = branches['ChildNuVertexSeparation']
parentEndRegionNHits_main = branches['ParentEndRegionNHits']
parentEndRegionNParticles_main = branches['ParentEndRegionNParticles']
parentEndRegionRToWall_main = branches['ParentEndRegionRToWall']
vertexSeparation_main = branches['VertexSeparation']
separation3D_main = branches['Separation3D']
chargeRatio_main = branches['ChargeRatio']
pidLinkType_main = branches['PIDLinkType']
openingAngle_main = branches['OpeningAngle']
trackShowerLinkType_main = branches['TrackShowerLinkType']

#############################
# Get node truth
#############################
trueTrackID_main = branches['RecoPFPTrueTrackID']
trueVisibleParentTrackID_main = branches['RecoPFPTrueVisibleParentTrackID']
trueVisibleGeneration_main = branches['RecoPFPTrueVisibleGeneration']

#############################
# Get link truth
#############################
trueParentChildLink_main = branches['TrueParentChildLink']

In [4]:
########################################################
# Functions to pad our vectors
########################################################

def get_max_length(input_array) :
    
    lengths = [len(entry) for entry in input_array]
    lengths = np.array(lengths)
    
    return np.max(lengths)

def create_mask(input_array, max_len):
    
    file_mask = [True] * len(input_array)
    to_fill = [False] * (max_len - len(file_mask))
    file_mask = file_mask + to_fill
    
    return file_mask

def pad_array(input_array, max_len):
    
    pad = [0] * (max_len - len(input_array))
    input_array = list(input_array) + pad
    
    return input_array

In [5]:
####################
# Create file mask
####################

# Node
particle_max_length = get_max_length(pfpTruePDG_main)
particle_mask_main = [create_mask(entry, particle_max_length) for entry in pfpTruePDG_main]
particle_mask_main = np.array(particle_mask_main)

print('particle_max_length:', particle_max_length)

# Link
link_max_length = get_max_length(trueParentChildLink_main)
link_mask_main = [create_mask(entry, link_max_length) for entry in trueParentChildLink_main]
link_mask_main = np.array(link_mask_main)

print('link_max_length:', link_max_length)

particle_max_length: 50
link_max_length: 2107


In [6]:
##############################
# Pad vectors
##############################

#############################
# Event-level stuff
#############################
# I dont need to do this!

#############################
# True information.. (for cheating)
#############################
pfpTruePDG_main = [pad_array(entry, particle_max_length) for entry in pfpTruePDG_main]
pfpTruePDG_main = np.array(pfpTruePDG_main)

pfpTrueMomX_main = [pad_array(entry, particle_max_length) for entry in pfpTrueMomX_main]
pfpTrueMomX_main = np.array(pfpTrueMomX_main)

pfpTrueMomY_main = [pad_array(entry, particle_max_length) for entry in pfpTrueMomY_main]
pfpTrueMomY_main = np.array(pfpTrueMomY_main)

pfpTrueMomZ_main = [pad_array(entry, particle_max_length) for entry in pfpTrueMomZ_main]
pfpTrueMomZ_main = np.array(pfpTrueMomZ_main)
    
#############################
# GNN node stuff 
#############################
trackShowerScore_main = [pad_array(entry, particle_max_length) for entry in trackShowerScore_main]
trackShowerScore_main = np.array(trackShowerScore_main)

nHits_main = [pad_array(entry, particle_max_length) for entry in nHits_main]
nHits_main = np.array(nHits_main)

charge_main = [pad_array(entry, particle_max_length) for entry in charge_main]
charge_main = np.array(charge_main)

vertexX_main = [pad_array(entry, particle_max_length) for entry in vertexX_main]
vertexX_main = np.array(vertexX_main)

vertexY_main = [pad_array(entry, particle_max_length) for entry in vertexY_main]
vertexY_main = np.array(vertexY_main)

vertexZ_main = [pad_array(entry, particle_max_length) for entry in vertexZ_main]
vertexZ_main = np.array(vertexZ_main)

trackEndX_main = [pad_array(entry, particle_max_length) for entry in trackEndX_main]
trackEndX_main = np.array(trackEndX_main)

trackEndY_main = [pad_array(entry, particle_max_length) for entry in trackEndY_main]
trackEndY_main = np.array(trackEndY_main)

trackEndZ_main = [pad_array(entry, particle_max_length) for entry in trackEndZ_main]
trackEndZ_main = np.array(trackEndZ_main)

showerDirX_main = [pad_array(entry, particle_max_length) for entry in showerDirX_main]
showerDirX_main = np.array(showerDirX_main)

showerDirY_main = [pad_array(entry, particle_max_length) for entry in showerDirY_main]
showerDirY_main = np.array(showerDirY_main)

showerDirZ_main = [pad_array(entry, particle_max_length) for entry in showerDirZ_main]
showerDirZ_main = np.array(showerDirZ_main)

ivysaurusMuon_main = [pad_array(entry, particle_max_length) for entry in ivysaurusMuon_main]
ivysaurusMuon_main = np.array(ivysaurusMuon_main)

ivysaurusProton_main = [pad_array(entry, particle_max_length) for entry in ivysaurusProton_main]
ivysaurusProton_main = np.array(ivysaurusProton_main)

ivysaurusPion_main = [pad_array(entry, particle_max_length) for entry in ivysaurusPion_main]
ivysaurusPion_main = np.array(ivysaurusPion_main)

ivysaurusElectron_main = [pad_array(entry, particle_max_length) for entry in ivysaurusElectron_main]
ivysaurusElectron_main = np.array(ivysaurusElectron_main)

ivysaurusPhoton_main = [pad_array(entry, particle_max_length) for entry in ivysaurusPhoton_main]
ivysaurusPhoton_main = np.array(ivysaurusPhoton_main)

trackLength_main = [pad_array(entry, particle_max_length) for entry in trackLength_main]
trackLength_main = np.array(trackLength_main)

displacement_main = [pad_array(entry, particle_max_length) for entry in displacement_main]
displacement_main = np.array(displacement_main)

dca_main = [pad_array(entry, particle_max_length) for entry in dca_main]
dca_main = np.array(dca_main)

nuVertexEnergyAsymmetry_main = [pad_array(entry, particle_max_length) for entry in nuVertexEnergyAsymmetry_main]
nuVertexEnergyAsymmetry_main = np.array(nuVertexEnergyAsymmetry_main)

nuVertexEnergyWeightedMeanRadialDistance_main = [pad_array(entry, particle_max_length) for entry in nuVertexEnergyWeightedMeanRadialDistance_main]
nuVertexEnergyWeightedMeanRadialDistance_main = np.array(nuVertexEnergyWeightedMeanRadialDistance_main)

#############################
# Get higher tier node stuff
#############################
parentPFPIndices_main = [pad_array(entry, link_max_length) for entry in parentPFPIndices_main]
parentPFPIndices_main = np.array(parentPFPIndices_main)

childPFPIndices_main = [pad_array(entry, link_max_length) for entry in childPFPIndices_main]
childPFPIndices_main = np.array(childPFPIndices_main)

parentTrackScore_main = [pad_array(entry, link_max_length) for entry in parentTrackScore_main]
parentTrackScore_main = np.array(parentTrackScore_main)

parentNuVertexSeparation_main = [pad_array(entry, link_max_length) for entry in parentNuVertexSeparation_main]
parentNuVertexSeparation_main = np.array(parentNuVertexSeparation_main)

childNuVertexSeparation_main = [pad_array(entry, link_max_length) for entry in childNuVertexSeparation_main]
childNuVertexSeparation_main = np.array(childNuVertexSeparation_main)

parentEndRegionNHits_main = [pad_array(entry, link_max_length) for entry in parentEndRegionNHits_main]
parentEndRegionNHits_main = np.array(parentEndRegionNHits_main)

parentEndRegionNParticles_main = [pad_array(entry, link_max_length) for entry in parentEndRegionNParticles_main]
parentEndRegionNParticles_main = np.array(parentEndRegionNParticles_main)

parentEndRegionRToWall_main = [pad_array(entry, link_max_length) for entry in parentEndRegionRToWall_main]
parentEndRegionRToWall_main = np.array(parentEndRegionRToWall_main)

vertexSeparation_main = [pad_array(entry, link_max_length) for entry in vertexSeparation_main]
vertexSeparation_main = np.array(vertexSeparation_main)

separation3D_main = [pad_array(entry, link_max_length) for entry in separation3D_main]
separation3D_main = np.array(separation3D_main)

chargeRatio_main = [pad_array(entry, link_max_length) for entry in chargeRatio_main]
chargeRatio_main = np.array(chargeRatio_main)

pidLinkType_main = [pad_array(entry, link_max_length) for entry in pidLinkType_main]
pidLinkType_main = np.array(pidLinkType_main)

openingAngle_main = [pad_array(entry, link_max_length) for entry in openingAngle_main]
openingAngle_main = np.array(openingAngle_main)

trackShowerLinkType_main = [pad_array(entry, link_max_length) for entry in trackShowerLinkType_main]
trackShowerLinkType_main = np.array(trackShowerLinkType_main)

#############################
# Node truth
#############################

trueTrackID_main = [pad_array(entry, particle_max_length) for entry in trueTrackID_main]
trueTrackID_main = np.array(trueTrackID_main)

trueVisibleParentTrackID_main = [pad_array(entry, particle_max_length) for entry in trueVisibleParentTrackID_main]
trueVisibleParentTrackID_main = np.array(trueVisibleParentTrackID_main)

trueVisibleGeneration_main = [pad_array(entry, particle_max_length) for entry in trueVisibleGeneration_main]
trueVisibleGeneration_main = np.array(trueVisibleGeneration_main)

#############################
# Link truth
#############################

trueParentChildLink_main = [pad_array(entry, link_max_length) for entry in trueParentChildLink_main]
trueParentChildLink_main = np.array(trueParentChildLink_main)

In [7]:
file = 'stupidUPROOT_0'
outputFile = '/Users/isobel/Desktop/DUNE/2024/Hierarchy/HigherTier/files/' + file + '.npz'
    
np.savez(outputFile, \
         particle_mask=particle_mask_main, \
         link_mask=link_mask_main, \
         event=event, \
         subrun=subrun, \
         run=run, \
         recoNuVertexX=recoNuVertexX, \
         recoNuVertexY=recoNuVertexY, \
         recoNuVertexZ=recoNuVertexZ, \
         pfpTruePDG=pfpTruePDG_main, \
         pfpTrueMomX=pfpTrueMomX_main, \
         pfpTrueMomY=pfpTrueMomY_main, \
         pfpTrueMomZ=pfpTrueMomZ_main, \
         trackShowerScore=trackShowerScore_main, \
         nHits=nHits_main, \
         charge=charge_main, \
         vertexX=vertexX_main, \
         vertexY=vertexY_main, \
         vertexZ=vertexZ_main, \
         trackEndX=trackEndX_main, \
         trackEndY=trackEndY_main, \
         trackEndZ=trackEndZ_main, \
         showerDirX=showerDirX_main, \
         showerDirY=showerDirY_main, \
         showerDirZ=showerDirZ_main, \
         ivysaurusMuon=ivysaurusMuon_main, \
         ivysaurusProton=ivysaurusProton_main, \
         ivysaurusPion=ivysaurusPion_main, \
         ivysaurusElectron=ivysaurusElectron_main, \
         ivysaurusPhoton=ivysaurusPhoton_main, \
         trackLength=trackLength_main, \
         displacement=displacement_main, \
         dca=dca_main, \
         nuVertexEnergyAsymmetry=nuVertexEnergyAsymmetry_main, \
         nuVertexEnergyWeightedMeanRadialDistance=nuVertexEnergyWeightedMeanRadialDistance_main, \
         parentPFPIndices=parentPFPIndices_main, \
         childPFPIndices=childPFPIndices_main, \
         parentTrackScore=parentTrackScore_main, \
         parentNuVertexSeparation=parentNuVertexSeparation_main, \
         childNuVertexSeparation=childNuVertexSeparation_main, \
         parentEndRegionNHits=parentEndRegionNHits_main, \
         parentEndRegionNParticles=parentEndRegionNParticles_main, \
         parentEndRegionRToWall=parentEndRegionRToWall_main, \
         vertexSeparation=vertexSeparation_main, \
         separation3D=separation3D_main, \
         chargeRatio=chargeRatio_main, \
         pidLinkType=pidLinkType_main, \
         openingAngle=openingAngle_main, \
         trackShowerLinkType=trackShowerLinkType_main, \
         trueTrackID=trueTrackID_main, \
         trueVisibleParentTrackID=trueVisibleParentTrackID_main, \
         trueVisibleGeneration=trueVisibleGeneration_main, \
         trueParentChildLink=trueParentChildLink_main)