In [1]:
import uproot
import numpy as np
import torch

from tensorflow.keras.models import load_model

from HigherTier import HigherTierFileHelper
import GraphBuilder
import Models
import MPNN

In [2]:
#######################################
# Create our models
#######################################

############################
# Config 
############################
primaryFileSuffix = '_ParticleToNeutrinoTruth_EdgeAttr_SWAP'

# For primary GNN
NUM_LAYERS_GNN = 4
EMB_DIM_GNN = 16
INPUT_DIM_GNN = 14 # node features
EDGE_DIM_GNN = 5
GNN_MODEL_PATH = "/Users/isobel/Desktop/DUNE/2024/Hierarchy/models/gnn_" + primaryFileSuffix

# For primary edge classification model
PRIMARY_EDGE_CLASSIFIER_MODEL_PATH = "/Users/isobel/Desktop/DUNE/2024/Hierarchy/models/edge_classifier_" + primaryFileSuffix
OTHER_EDGE_CLASSIFIER_MODEL_PATH = '/Users/isobel/Desktop/DUNE/2024/Hierarchy/HigherTier/models/other_simple_model'


print('GNN_MODEL_PATH:', GNN_MODEL_PATH)

############################
# Setup the models for eval
############################

gnn_model = MPNN.MPNNModel(num_layers=NUM_LAYERS_GNN, emb_dim=EMB_DIM_GNN, input_dim=INPUT_DIM_GNN, edge_dim=EDGE_DIM_GNN)
primary_edge_classifier_model = Models.EdgeClassifier()

gnn_model.load_state_dict(torch.load(GNN_MODEL_PATH))
gnn_model.eval()

primary_edge_classifier_model.load_state_dict(torch.load(PRIMARY_EDGE_CLASSIFIER_MODEL_PATH))
primary_edge_classifier_model.eval()

other_edge_classifier_model = load_model(OTHER_EDGE_CLASSIFIER_MODEL_PATH)

GNN_MODEL_PATH: /Users/isobel/Desktop/DUNE/2024/Hierarchy/models/gnn__ParticleToNeutrinoTruth_EdgeAttr_SWAP


In [3]:
#############################
# Let's read the file
#############################

file = 'stupidUPROOT_0'
inputFileName = '/Users/isobel/Desktop/DUNE/2024/Hierarchy/HigherTier/files/without2DPFPs/' + file + '.npz'

In [4]:
data = np.load(inputFileName)

#############################
# Get file masks
#############################
particle_mask_main = data['particle_mask']
link_mask_main = data['link_mask']

#############################
# Get event-level stuff
#############################
recoNuVertexX = data['recoNuVertexX']
recoNuVertexY = data['recoNuVertexY']
recoNuVertexZ = data['recoNuVertexZ']

#############################
# True information.. (for cheating)
#############################
pfpTruePDG_main = data['pfpTruePDG']
pfpTrueMomX_main = data['pfpTrueMomX']
pfpTrueMomY_main = data['pfpTrueMomY']
pfpTrueMomZ_main = data['pfpTrueMomZ']
    
#############################
# Get GNN node stuff 
#############################
trackShowerScore_main = data['trackShowerScore']
nHits_main = data['nHits']
charge_main = data['charge']
vertexX_main = data['vertexX']
vertexY_main = data['vertexY']
vertexZ_main = data['vertexZ']
trackEndX_main = data['trackEndX']
trackEndY_main = data['trackEndY']
trackEndZ_main = data['trackEndZ']
showerDirX_main = data['showerDirX']  # not the best direction estimate, placeholder
showerDirY_main = data['showerDirY']
showerDirZ_main = data['showerDirZ']
ivysaurusMuon_main = data['ivysaurusMuon']
ivysaurusProton_main = data['ivysaurusProton']
ivysaurusPion_main = data['ivysaurusPion']
ivysaurusElectron_main = data['ivysaurusElectron']
ivysaurusPhoton_main = data['ivysaurusPhoton']
trackLength_main = data['trackLength']
displacement_main = data['displacement']
dca_main = data['dca']
nuVertexEnergyAsymmetry_main = data['nuVertexEnergyAsymmetry']
nuVertexEnergyWeightedMeanRadialDistance_main = data['nuVertexEnergyWeightedMeanRadialDistance']

#############################
# Get higher tier link stuff
#############################
parentPFPIndices_main = data['parentPFPIndices']
childPFPIndices_main = data['childPFPIndices']
parentTrackScore_main = data['parentTrackScore']
parentNuVertexSeparation_main = data['parentNuVertexSeparation']
childNuVertexSeparation_main = data['childNuVertexSeparation']
parentEndRegionNHits_main = data['parentEndRegionNHits']
parentEndRegionNParticles_main = data['parentEndRegionNParticles']
parentEndRegionRToWall_main = data['parentEndRegionRToWall']
vertexSeparation_main = data['vertexSeparation']
separation3D_main = data['separation3D']
chargeRatio_main = data['chargeRatio']
pidLinkType_main = data['pidLinkType']
openingAngle_main = data['openingAngle']
trackShowerLinkType_main = data['trackShowerLinkType']

#############################
# Get node truth
#############################
trueTrackID_main = data['trueTrackID']
trueVisibleParentTrackID_main = data['trueVisibleParentTrackID']
trueVisibleGeneration_main = data['trueVisibleGeneration']

#############################
# Get link truth
#############################
trueParentChildLink_main = data['trueParentChildLink']

#############################
# How many entries are we working with?
#############################
nEntries = particle_mask_main.shape[0]
print('We are working with:', nEntries, 'entries')
print(link_mask_main.shape[0])

#############################
# Network scores
#############################
trueTrackID_out = []
trueVisibleGeneration_out = []
trueVisibleParentTrackID_out = []

particleIndex_out = []
primaryScores_out = []

higherTierChildIndex_out = []
higherTierParentIndex_out = []
higherTierParentTrackID_out = []
higherTierScores_out = []

#############################
# GNN config
#############################

modeDict = {
    "ADD_NEUTRINO"                 : True,
    "CHEAT_DIRECTION"              : True,
    "CHEAT_PID"                    : True, 
    "MAKE_PARTICLE_PARTICLE_LINKS" : True,
    "EDGE_FRACTION"                : 0.8,
    "DO_NORMALISATION"             : True,
    "IS_PRIMARY_TRAINING"          : False,
    "IS_HIGHER_TIER_TRAINING"      : False,
    "MAX_NODE_CLASS"               : 4
}

We are working with: 98400 entries
98400


In [5]:
###############################################
# Find Primaries - Let's create our graphs!
###############################################

for iEvent in [26] : #range(nEntries) : 
    
    ########################################
    # Things to write out for this event
    ########################################
    trueTrackID_event = []
    trueVisibleGeneration_event = []
    trueVisibleParentTrackID_event = []

    particleIndex_event = []
    primaryScores_event = []

    higherTierChildIndex_event = []
    higherTierParentIndex_event = []
    higherTierScores_event = []
    
    ########################################
    # Read our input arrays
    ########################################
    particle_mask_event = particle_mask_main[iEvent]
    link_mask_event = link_mask_main[iEvent]

    # For higher tier network
    higherTierDict = {
        "parentTrackScore"           : parentTrackScore_main[iEvent][link_mask_event], \
        "parentNuVertexSeparation"   : parentNuVertexSeparation_main[iEvent][link_mask_event], \
        "childNuVertexSeparation"    : childNuVertexSeparation_main[iEvent][link_mask_event], \
        "parentEndRegionNHits"       : parentEndRegionNHits_main[iEvent][link_mask_event], \
        "parentEndRegionNParticles"  : parentEndRegionNParticles_main[iEvent][link_mask_event], \
        "parentEndRegionRToWall"     : parentEndRegionRToWall_main[iEvent][link_mask_event], \
        "vertexSeparation"           : vertexSeparation_main[iEvent][link_mask_event], \
        "separation3D"               : separation3D_main[iEvent][link_mask_event], \
        "chargeRatio"                : chargeRatio_main[iEvent][link_mask_event], \
        "pidLinkType"                : pidLinkType_main[iEvent][link_mask_event], \
        "openingAngle"               : openingAngle_main[iEvent][link_mask_event], \
        "trackShowerLinkType"        : trackShowerLinkType_main[iEvent][link_mask_event], \
        "trueParentChildLink"        : trueParentChildLink_main[iEvent][link_mask_event], \
        "parentPFPIndices"           : parentPFPIndices_main[iEvent][link_mask_event], \
        "childPFPIndices"            : childPFPIndices_main[iEvent][link_mask_event]
    }
        
    # For GNN
    eventDict = {
        "recoNuVertexX"                            : recoNuVertexX[iEvent], \
        "recoNuVertexY"                            : recoNuVertexY[iEvent], \
        "recoNuVertexZ"                            : recoNuVertexZ[iEvent], \
        "nParticles"                               : np.count_nonzero(particle_mask_event), \
        "trackShowerScore"                         : trackShowerScore_main[iEvent][particle_mask_event], \
        "nHits"                                    : nHits_main[iEvent][particle_mask_event], \
        "charge"                                   : charge_main[iEvent][particle_mask_event], \
        "vertexX"                                  : vertexX_main[iEvent][particle_mask_event], \
        "vertexY"                                  : vertexY_main[iEvent][particle_mask_event], \
        "vertexZ"                                  : vertexZ_main[iEvent][particle_mask_event], \
        "trackEndX"                                : trackEndX_main[iEvent][particle_mask_event], \
        "trackEndY"                                : trackEndY_main[iEvent][particle_mask_event], \
        "trackEndZ"                                : trackEndZ_main[iEvent][particle_mask_event], \
        "showerDirX"                               : showerDirX_main[iEvent][particle_mask_event], \
        "showerDirY"                               : showerDirY_main[iEvent][particle_mask_event], \
        "showerDirZ"                               : showerDirZ_main[iEvent][particle_mask_event], \
        "ivysaurusMuon"                            : ivysaurusMuon_main[iEvent][particle_mask_event], \
        "ivysaurusProton"                          : ivysaurusProton_main[iEvent][particle_mask_event], \
        "ivysaurusPion"                            : ivysaurusPion_main[iEvent][particle_mask_event], \
        "ivysaurusElectron"                        : ivysaurusElectron_main[iEvent][particle_mask_event], \
        "ivysaurusPhoton"                          : ivysaurusPhoton_main[iEvent][particle_mask_event], \
        "trackLength"                              : trackLength_main[iEvent][particle_mask_event], \
        "displacement"                             : displacement_main[iEvent][particle_mask_event], \
        "dca"                                      : dca_main[iEvent][particle_mask_event], \
        "isNeutrinoPDG"                            : np.zeros(pfpTruePDG_main[iEvent][particle_mask_event].shape), \
        "nuVertexEnergyAsymmetry"                  : nuVertexEnergyAsymmetry_main[iEvent][particle_mask_event], \
        "nuVertexEnergyWeightedMeanRadialDistance" : nuVertexEnergyWeightedMeanRadialDistance_main[iEvent][particle_mask_event], \
        "trueTrackID"                              : trueTrackID_main[iEvent][particle_mask_event], \
        "trueVisibleParentTrackID"                 : trueVisibleParentTrackID_main[iEvent][particle_mask_event], \
        "trueMomX"                                 : pfpTrueMomX_main[iEvent][particle_mask_event], \
        "trueMomY"                                 : pfpTrueMomY_main[iEvent][particle_mask_event], \
        "trueMomZ"                                 : pfpTrueMomZ_main[iEvent][particle_mask_event], \
        "truePDG"                                  : pfpTruePDG_main[iEvent][particle_mask_event], \
        "trueVisibleGeneration"                    : trueVisibleGeneration_main[iEvent][particle_mask_event]
    }
    
    ########################################
    # Get GNN input
    ########################################
    data_FC, pfp_index = GraphBuilder.InferenceGraphBuilder(eventDict, modeDict)
    
    ########################################
    # Get input for higher tiers network
    ########################################
    variables, y, parentPFPIndices, childPFPIndices = HigherTierFileHelper.readEvent(higherTierDict)

    ##################################################
    # Calculate Scores of neutrino -> particle edges
    ##################################################
    if (data_FC.num_nodes > 1) :
        pred = gnn_model(data_FC)

        # Assuming that the neutrino is the last node added (which it is in this config)
        nuEdgeMask = (data_FC.edge_index[0] == (data_FC.num_nodes - 1))
        target_index = np.reshape(data_FC.edge_index[0][nuEdgeMask].detach().numpy(), -1)
        source_index = np.reshape(data_FC.edge_index[1][nuEdgeMask].detach().numpy(), -1)
        edge_index = torch.tensor([target_index, source_index], dtype=torch.long)
        
        edge_pred = primary_edge_classifier_model(pred, edge_index)

    for iParticle in range(eventDict["nParticles"]) :
    
        # Move on if it is the neutrino
        if (eventDict['isNeutrinoPDG'][iParticle] == 1) :
            continue
            
        # Node is not in graph (inf ivy or 2D?)
        if (len(np.where(pfp_index == iParticle)[0]) == 0) :
            continue 
    
        primary_GNN_score = edge_pred[np.where(pfp_index == iParticle)[0][0]].item()
                
        trueTrackID_event.append(trueTrackID_main[iEvent][iParticle])
        trueVisibleGeneration_event.append(trueVisibleGeneration_main[iEvent][iParticle])
        trueVisibleParentTrackID_event.append(trueVisibleParentTrackID_main[iEvent][iParticle])
        particleIndex_event.append(iParticle)
        primaryScores_event.append(primary_GNN_score)

    ############################
    # Higher Tiers
    ############################    
    for iChild in range(eventDict["nParticles"]) :
            
        # We don't care about the neutrino here
        if (eventDict["isNeutrinoPDG"][iChild] == 1) :
            continue
            
        # Only look at 3D particles
        if (vertexX_main[iEvent][iChild] < -900) :
            continue
            
        for iParent in range(eventDict["nParticles"]) :
                
            if (eventDict["isNeutrinoPDG"][iParent] == 1) :
                continue
                
            # Only look at 3D particles
            if (vertexX_main[iEvent][iParent] < -900) :
                continue
                
            # Need to add more to the CCNuSelection
            linkIndex = HigherTierFileHelper.getLinkIndex(parentPFPIndices, childPFPIndices, iParent, iChild) 

            # If the particle-particle link hasn't been saved in the analyser
            if (linkIndex < 0) :
                print('ISOBEL SOMETHING HAS GONE WRONG - SAD')
                continue
                    
            linkVariables = variables[linkIndex].reshape(-1,12)
                                
            y_pred = other_edge_classifier_model.predict(linkVariables, verbose=2)
                        
            higherTierChildIndex_event.append(iChild)
            higherTierParentIndex_event.append(iParent)
            higherTierScores_event.append(y_pred[0][0])
            
            
#     print('//////////////////////////////////')
#     print('//////////// NEW EVENT ///////////')
#     print('//////////////////////////////////')
#     print('trueVisibleGeneration_event:', trueVisibleGeneration_event)
#     print('trueVisibleParentTrackID_event:', trueVisibleParentTrackID_event)
#     print('particleIndex_event:', particleIndex_event)
#     print('primaryScores_event:', primaryScores_event)
#     print('higherTierChildIndex_event:', higherTierChildIndex_event)
#     print('higherTierParentIndex_event:', higherTierParentIndex_event)
#     print('higherTierScores_event:', higherTierScores_event)
            

    trueTrackID_out.append(trueTrackID_event)
    trueVisibleGeneration_out.append(trueVisibleGeneration_event)
    trueVisibleParentTrackID_out.append(trueVisibleParentTrackID_event)
    particleIndex_out.append(particleIndex_event)
    primaryScores_out.append(primaryScores_event)
    higherTierChildIndex_out.append(higherTierChildIndex_event)
    higherTierParentIndex_out.append(higherTierParentIndex_event)
    higherTierScores_out.append(higherTierScores_event)
    
print('//////////////////////////////////')
print('//////////////////////////////////')
print('trueVisibleGeneration_out:', trueVisibleGeneration_out)
print('trueVisibleParentTrackID_out:', trueVisibleParentTrackID_out)
print('particleIndex_out:', particleIndex_out)
print('primaryScores_out:', primaryScores_out)
print('higherTierChildIndex_out:', higherTierChildIndex_out)
print('higherTierParentIndex_out:', higherTierParentIndex_out)
print('higherTierScores_out:', higherTierScores_out)

ISOBEL SOMETHING HAS GONE WRONG - SAD
ISOBEL SOMETHING HAS GONE WRONG - SAD
ISOBEL SOMETHING HAS GONE WRONG - SAD
1/1 - 0s - 66ms/epoch - 66ms/step
ISOBEL SOMETHING HAS GONE WRONG - SAD
1/1 - 0s - 6ms/epoch - 6ms/step
ISOBEL SOMETHING HAS GONE WRONG - SAD
ISOBEL SOMETHING HAS GONE WRONG - SAD
ISOBEL SOMETHING HAS GONE WRONG - SAD
//////////////////////////////////
//////////////////////////////////
trueVisibleGeneration_out: [[2, 3, 2]]
trueVisibleParentTrackID_out: [[0, 2, 0]]
particleIndex_out: [[0, 1, 2]]
primaryScores_out: [[0.9967396855354309, 0.0026916912756860256, 0.002705066232010722]]
higherTierChildIndex_out: [[1, 1]]
higherTierParentIndex_out: [[0, 2]]
higherTierScores_out: [[0.23875964, 0.7112097]]


  edge_index = torch.tensor([target_index, source_index], dtype=torch.long)


In [6]:
########################################################
# Python is annoying so we will have to pad our vectors
########################################################

def get_max_length(input_array) :
    
    lengths = [len(entry) for entry in input_array]
    lengths = np.array(lengths)
    
    return np.max(lengths)


def create_mask(input_array, max_len):
    
    file_mask = [True] * len(input_array)
    to_fill = [False] * (max_len - len(file_mask))
    file_mask = file_mask + to_fill
    
    return file_mask

def pad_array(input_array, max_len):
    
    pad = [0] * (max_len - len(input_array))
    input_array = input_array + pad
    
    return input_array

In [7]:
####################
# Create file mask
####################
particle_max_length = get_max_length(primaryScores_out)
particle_mask_out = [create_mask(entry, particle_max_length) for entry in primaryScores_out]
particle_mask_out = np.array(particle_mask_out)

link_max_length = get_max_length(higherTierScores_out)
link_mask_out = [create_mask(entry, link_max_length) for entry in higherTierScores_out]
link_mask_out = np.array(link_mask_out)

# print('--------- BEFORE ----------')
# print('trueTrackID_out:', trueTrackID_out)
# print('trueVisibleGeneration_out:', trueVisibleGeneration_out)
# print('trueVisibleParentTrackID_out:', trueVisibleParentTrackID_out)
# print('particleIndex_out:', particleIndex_out)
# print('primaryScores_out:', primaryScores_out)
# print('higherTierChildIndex_out:', higherTierChildIndex_out)
# print('higherTierParentIndex_out:', higherTierParentIndex_out)
# print('higherTierScores_out:', higherTierScores_out)
# print('---------------------------')

####################
# Pad vectors
####################
trueTrackID_out = [pad_array(entry, particle_max_length) for entry in trueTrackID_out]
trueTrackID_out = np.array(trueTrackID_out)

trueVisibleGeneration_out = [pad_array(entry, particle_max_length) for entry in trueVisibleGeneration_out]
trueVisibleGeneration_out = np.array(trueVisibleGeneration_out)

trueVisibleParentTrackID_out = [pad_array(entry, particle_max_length) for entry in trueVisibleParentTrackID_out]
trueVisibleParentTrackID_out = np.array(trueVisibleParentTrackID_out)

particleIndex_out = [pad_array(entry, particle_max_length) for entry in particleIndex_out]
particleIndex_out = np.array(particleIndex_out)

primaryScores_out = [pad_array(entry, particle_max_length) for entry in primaryScores_out]
primaryScores_out = np.array(primaryScores_out)

higherTierChildIndex_out = [pad_array(entry, link_max_length) for entry in higherTierChildIndex_out]
higherTierChildIndex_out = np.array(higherTierChildIndex_out)

higherTierParentIndex_out = [pad_array(entry, link_max_length) for entry in higherTierParentIndex_out]
higherTierParentIndex_out = np.array(higherTierParentIndex_out)

higherTierScores_out = [pad_array(entry, link_max_length) for entry in higherTierScores_out]
higherTierScores_out = np.array(higherTierScores_out)


# print('--------- AFTER ----------')
# print('trueTrackID_out:', trueTrackID_out)
# print('trueVisibleGeneration_out:', trueVisibleGeneration_out)
# print('trueVisibleParentTrackID_out:', trueVisibleParentTrackID_out)
# print('particleIndex_out:', particleIndex_out)
# print('primaryScores_out:', primaryScores_out)
# print('higherTierChildIndex_out:', higherTierChildIndex_out)
# print('higherTierParentIndex_out:', higherTierParentIndex_out)
# print('higherTierScores_out:', higherTierScores_out)
# print('---------------------------')

In [8]:
#############################
# Write out!
#############################

file = 'networkScores_TEST'
outputFile = '/Users/isobel/Desktop/DUNE/2024/Hierarchy/HigherTier/files/without2DPFPs/' + file + '.npz'
    
np.savez(outputFile, \
         particle_mask=particle_mask_out,\
         trueTrackID=trueTrackID_out, \
         trueVisibleGeneration=trueVisibleGeneration_out, \
         trueVisibleParentTrackID=trueVisibleParentTrackID_out, \
         particleIndex=particleIndex_out, \
         primaryScores=primaryScores_out, \
         link_mask=link_mask_out,\
         higherTierChildIndex=higherTierChildIndex_out, \
         higherTierParentIndex=higherTierParentIndex_out, \
         higherTierScores=higherTierScores_out)