In [None]:
# how many events to process?
BATCH_SIZE = 5

# note that the stuff in {} get expanded in the next cell

# updated geometry, big training
SUPERA_INPUT_FILE  = "{data-nd-lar-reco}/supera/geom-20210623/neutrino.0.larcv.root"
WEIGHTS_FILE = "{data-nd-lar-reco}/train/track+showergnn-380Kevs-15Kits-batch32/snapshot-1499.ckpt"
CONFIG_BASE  = "{personal-nd-lar-reco}/configs/config.inference.fullchain-singles.yaml"
# overlay (!)

#WEIGHTS_FILE = "{data-nd-lar-reco}/train/track+intergnn-1400evs-1000Kits-batch8/snapshot-49.ckpt"
#SUPERA_INPUT_FILE  = "{data-nd-lar-reco}/supera/geom-20210405-pileup/FHC.1000001.larcv.root"
#CONFIG_BASE  = "{personal-nd-lar-reco}/configs/config.inference.fullchain-pileup.yaml"


In [None]:
import os.path
import re

paths_to_try = {
    "data-nd-lar-reco": [
        "/media/hdd1/jwolcott/data/dune/nd/nd-lar-reco",
        "/gpfs/slac/staas/fs1/g/neutrino/jwolcott/data/dune/nd/nd-lar-reco",
    ],
    "personal-nd-lar-reco": [
        "/media/hdd1/jwolcott/app/dune-nd-lar-reco",
        "/gpfs/slac/staas/fs1/g/neutrino/jwolcott/app/dune-nd-lar-reco",
    ],
    "software-dir": [
        "/gpfs/slac/staas/fs1/g/neutrino/jwolcott/app",
        "/media/hdd1/jwolcott/app",
        "/dune/app/users/jwolcott/dunesoft",
    ]
}

pattern = re.compile(r"^\{(.*)\}(.*)")
def replace_prefix(dirname):
    matches = pattern.match(dirname)
    if not matches:
        print("Apparently no prefix in name:", dirname)
        print("Returning it unaltered!")
        return dirname

    prefix_string = matches.group(1)
    assert prefix_string in paths_to_try, "Unrecognized prefix string: " + prefix_string

    prefix=None
    for d in paths_to_try[prefix_string]:
        if os.path.isdir(d):
            prefix = d
            break
    assert prefix, "Couldn't realize prefix directory for prefix string '%s'" % prefix_string

    return prefix + matches.group(2)

In [None]:
SUPERA_INPUT_FILE  = replace_prefix(SUPERA_INPUT_FILE)
WEIGHTS_FILE = replace_prefix(WEIGHTS_FILE)
CONFIG_BASE  = replace_prefix(CONFIG_BASE)

for f in (SUPERA_INPUT_FILE, CONFIG_BASE):
    assert os.path.isfile(f), "Can't find file: " + f

In [None]:
from larcv import larcv

labels = {}
for name in ['Michel','Track','Shower','LEScatter','Delta', 'Ghost', 'Unknown']:
    labels[getattr(larcv,'kShape%s' % name)] = name

In [None]:
import importlib
import os.path
import sys

print(sys.executable)

modules_required = {
    # module name -> subdir path
    "mlreco": "{software-dir}/lartpc_mlreco3d",
    "larcv": "{software-dir}/larcv2/python",
}

for module_name, module_path in modules_required.items():
    software_dir = replace_prefix(module_path)

    success = False
    if software_dir:
        sys.path.insert(0, software_dir)
        try:
            importlib.import_module(module_name)
            success = True
        except:
            pass

    if not success:
        print("ERROR: couldn't find %s package" % module_name)
    else:
        print("Setup of %s ok from:" % module_name, software_dir)

# add the dune-nd-lar-reco path manually since it's not a package
nd_lar_path=replace_prefix("{software-dir}/dune-nd-lar-reco")
sys.path.append(nd_lar_path)
print("dune-nd-lar-reco available from:", nd_lar_path)

In [None]:
import numpy as np
#import plotly.plotly as py
import plotly.graph_objs as go
from plotly.offline import download_plotlyjs, init_notebook_mode, plot, iplot
init_notebook_mode(connected=False)
import yaml

## Configuration

In [None]:
from load_helpers import LoadConfig
cfg_dict = LoadConfig(CONFIG_BASE,
                      input_files=[SUPERA_INPUT_FILE],
                      model_file=WEIGHTS_FILE,
                      batch_size=BATCH_SIZE,
                      use_gpu=True)

## Configure

In [None]:
import itertools
def convert_to_geom_coords(values, metadata, evnums=[]):
    metadata = metadata[0]  # they are all the same
    # for coord in ("x", "y", "z"):
    #     print("min", coord, "=", getattr(metadata, "min_%s" % coord)())
    #     print ("voxel size", coord, "=",  getattr(metadata, "size_voxel_%s" % coord)())
    if len(evnums) > 0:
        values = itertools.compress(values, (i in evnums for i in range(len(values)) ))
    for ev in values:
        ev[:, 0] = ev[:, 0] * metadata.size_voxel_x() + metadata.min_x()
        ev[:, 1] = ev[:, 1] * metadata.size_voxel_y() + metadata.min_y()
        ev[:, 2] = ev[:, 2] * metadata.size_voxel_z() + metadata.min_z()

def convert_to_geom_size(value, metadata):
    metadata = metadata[0]  # they are all the same
    assert(metadata.size_voxel_x() == metadata.size_voxel_y() == metadata.size_voxel_z())
    
    return value * metadata.size_voxel_x()


In [None]:
from mlreco.main_funcs import process_config, prepare
# prepare function configures necessary "handlers"
hs=prepare(cfg_dict)

In [None]:
def cycle(data_io):
    for x in data_io:
        yield x

it = iter(cycle(hs.data_io))

data,output=hs.trainer.forward(it)
#print({k: v for k, v in output.items()})
print("done evaluating")

## Visualize the output!



In [None]:
# this cell gives you an idea of some of the ways you can inspect the output

import pprint
#print(data.keys())
#pprint.pprint(sorted(output.keys()))


# print(len(output["clust_fragments"][1]))
# print(len(output["clust_frag_seg"][1]))
# #print(len(output["clust_frag_batch_ids"]))
# print(output["fragments"][3])
# print(output["frag_group_pred"][3])
# print(len(output["frag_node_pred"]))
# print(len(output["frag_edge_pred"]))
#print(list(data["cluster_label"][0][data["cluster_label"][0][:, -1] == 1][:, 5]))

print([p.group_id() for p in data["particles_raw"][0] if abs(p.pdg_code()) == 13])

In [None]:
# this cell converts the PPN output into meaningful points

import numpy
from mlreco.utils.ppn import uresnet_ppn_type_point_selector

# there's post-processing that needs to be done with PPN before we transform coordinates
score_threshold = 0.5
type_score_threshold = 0.5
type_threshold = 2
if "model" in cfg_dict and "modules" in cfg_dict["model"] \
        and "dbscan_frag" in cfg_dict["model"]["modules"]:
    score_threshold = cfg_dict["model"]["modules"]["dbscan_frag"]["ppn_score_threshold"]
    type_score_threshold = cfg_dict["model"]["modules"]["dbscan_frag"]["ppn_type_score_threshold"]
    type_threshold = cfg_dict["model"]["modules"]["dbscan_frag"]["ppn_type_threshold"]

#ppn1_size = convert_to_geom_size(cfg_dict['model']['modules']['uresnet_ppn']['ppn']['spatial_size'] / cfg_dict['model']['modules']['uresnet_ppn']['ppn']['ppn1_size'],
#                                 metadata=data["metadata"])
ppn1_size_vox = cfg_dict['model']['modules']['uresnet_ppn']['ppn']['spatial_size'] / cfg_dict['model']['modules']['uresnet_ppn']['ppn']['ppn1_size']
#print("ppn1_size (voxels):", ppn1_size_vox)
    
ppn = [None,] * len(data["input_data"])
ppn1_voxels = [None,] * len(data["input_data"])
for entry in range(len(data["input_data"])):
    ppn[entry] = uresnet_ppn_type_point_selector(data['input_data'][entry],
                                                 output,
                                                 entry=entry,
                                                 score_threshold=score_threshold,
                                                 type_score_threshold=type_score_threshold,
                                                 type_threshold=type_threshold)
    
    ppn1_voxels[entry] = output['ppn1'][entry][:, :3]
    ppn1_voxels[entry] *= 66 # ppn1_size_vox
#    print("ppn1_voxels[%d]:" % entry, ppn1_voxels[entry])


output["ppn_post"] = ppn
output["ppn1_voxels"] = ppn1_voxels



    # print(ppn[1].dtype)
# print(ppn[1])

#output["ppn_post"] = numpy.concatenate(ppn, axis=0)
# for entry, ppn_points in enumerate(ppn):
#     print("there are", numpy.count_nonzero(ppn_points[:, -1] == 1), "'track' PPN points in entry", entry)

In [None]:
# here we convert the output into geometry (rather than pixel) coordinates

convert_list = ["input_data", "segment_label", "ppn_post", "ppn1_voxels", "particles_label", "cluster_label"]
#convert_list = []

for collection in (data, output):
    for key in collection:
        if key not in convert_list:
            continue

        vals = collection[key]
        if key == "ppn1_voxels":
            sys.stdout.flush()
        convert_to_geom_coords(vals, data["metadata"])



In [None]:
# useful extra function for use with axis ranging

def collection_range(coord, *arrays, scale=1):
    """
    Get the pair of (min, max) extrema over a collection of arrays, using just the indicated coordinate.
    :param coord: which coordinate to do it over
    :param arrays: the arrays to be compared
    :return: tuple (min, max) of extrema found
    """

    arrays = [a for a in arrays if len(a) > 0]
    vals = [ min(a[:,coord].min() for a in arrays),
             max(a[:,coord].max() for a in arrays) ]
#    print("vals before:", vals)
    vals[0] *= scale if vals[0] < 0 else scale - 1
    vals[1] *= scale if vals[1] > 0 else scale - 1
#    print("vals after:", vals)
    return vals

In [None]:
# this produces the "track end direction" estimate

import summarize
trk_summary = []
summarize.summarize_tracks(data, output, trk_summary)
#print(trk_summary)
trk_summary = numpy.row_stack(trk_summary)

import track_plotting
for evt_idx in range(len(data["particles_raw"])):
    # first: number of tracks
    evt_data = { k: data[k][evt_idx] for k in data }

    for trk_index in numpy.unique(data["track_group_pred"]):
		track_dir = track_plotting.reco_track_begin_dir(trk_index, data)



In [None]:
# here, finally, is the actual "event display" proper

import numpy
import plotly.graph_objs as go
import plotly.express as px

import scipy

from mlreco.visualization import scatter_points, scatter_cubes, plotly_layout3d
from mlreco.visualization.gnn import network_topology
from larcv import larcv

markersize = 2  # pixels...

# Plot a specific entry.
# Note that it needs to be < the BATCH_SIZE declared at top of file
entry=3


evt_info = vox = label = pred = ppn = clus_lbl \
         = clus = clus_seg = tracks = track_grp \
         = show_grp = showers = particles = particle_points \
         = inter_particles = inter_grp \
         = None

# Retrieve data
evt_info  = data  ['event_base']      [entry]
vox       = data  ['input_data'      ][entry]
label     = data  ['segment_label'   ][entry]
clus_lbl  = data  ["cluster_label"   ][entry]
pred      = output['segmentation'    ][entry]
ppn       = output['ppn_post'        ][entry]
ppn1_vox  = output['ppn1_voxels'     ][entry]
#ppn1_mask = output['mask_ppn1'       ][entry][:, 0].astype(bool)
ppn1_scores = scipy.special.softmax(output['ppn1'][entry][:, 4:], axis=1)[:, 1]
if any(o.startswith("fragments") for o in output):
    clus      = output['fragments' ][entry]
    clus_seg  = output['fragments_seg'  ][entry]
if any(o.startswith("track") for o in output):    
    tracks    = output['track_fragments' ][entry]
    trk_grp   = output['track_group_pred'][entry]
if any(o.startswith("shower") for o in output):
    show_grp  = output['shower_group_pred'][entry]
    showers   = output['shower_fragments' ][entry]
if any(o.startswith("inter") for o in output):
    inter_grp = output['inter_group_pred'][entry]
    inter_particles = output['inter_particles'][entry]

if any(d.startswith("particles") for d in data):
    particles =       data['particles_raw'][entry]
    particle_points = data['particles_label'][entry]

if vox is not None:
    print("number of voxels:", len(vox))
if not any(i is None for i in (clus, tracks)):
    print("number of fragments:", len(clus) + len(tracks))
if particles is not None:
    print("number of particles:", {name: len([p for p in particles if p.shape() == getattr(larcv,'kShape%s' % name)]) for name in labels.values()})
#print(output.keys())
#print(numpy.unique(ppn[:, 3]))
#print(ppn)

# we want to show all of each type of point
arrays = [i for i in (vox, label, pred, ppn, particle_points) if i is not None]

layout = go.Layout(
    showlegend=True,
    legend=dict(x=1.01,y=0.95),
    width=800,
    height=800,
    hovermode='closest',
    margin=dict(l=0,r=0,b=0,t=0),                                                                                                                                  
    template='plotly_dark',                                                                                                                                        
#    uirevision = 'same',
    scene = dict(xaxis = dict(nticks=10, range = collection_range(0, *arrays, scale=1.25), showticklabels=True, title='x (cm)'),
                 yaxis = dict(nticks=10, range = collection_range(1, *arrays, scale=1.25), showticklabels=True, title='y (cm)'),
                 zaxis = dict(nticks=10, range = collection_range(2, *arrays, scale=1.25), showticklabels=True, title='z (cm)'),
                 aspectmode='cube'),
    scene_camera = dict(up=dict(x=0, y=1, z=0),
                        center=dict(x=0, y=0, z=0),
                        eye=dict(x=0, y=1, z=-2))
)


# Plot energy depositions (input data)
thresh=0 #0.01
saturate=5
color_min=thresh
color_max=saturate
vox_thresh=vox[vox[:,4]>thresh]
markersize=numpy.tanh(vox_thresh[:,4])*3
#markersize=1
vox_E_saturate = numpy.minimum(vox_thresh[:,4], saturate)
#vox_E_saturate = numpy.full_like(vox_thresh[:,4], color_min)  # use this to make all edep colors white
trace  = scatter_points(vox_thresh,markersize=markersize,symbol="square",color=vox_E_saturate,colorscale='Reds',
                        cmin=color_min, cmax=color_max,
                        hovertext=['%.2f MeV' % v for v in vox_thresh[:,4]])
trace[-1].name = 'Energy deposits'

  
# import plotly.express as px
# f = px.histogram(label[:, 4])
# f.show()

if label is not None:
    trace += scatter_points(label,markersize=markersize,symbol="square",color=label[:,4],colorscale='Jet',
                            cmin=0,cmax=4,
                            hovertext=[labels[int(v)] for v in label[:,4]],
                            visible="legendonly")
    trace[-1].name = 'True sem. class'

# Plot semantic labels ... add hover text for semantic types
if not any(i is None for i in (pred, label)):
    trace += scatter_points(label,markersize=markersize,symbol="square",color=np.argmax(pred,axis=1),colorscale='Jet',
                            cmin=0,cmax=4,
                            hovertext=[labels[v] for v in np.argmax(pred,axis=1)])
    trace[-1].name = 'Pred. sem. class'
    
if clus_lbl is not None:
    # index 6 is shower cluster group,
    # index 7 is neutrino interaction
#    only_external_mask = clus_lbl[:, 7] == -1
    trace += scatter_points(clus_lbl,markersize=markersize,symbol="square",
                            color=clus_lbl[:, 7],colorscale='Jet',
                            cmin=0,cmax=max(clus_lbl[:, 7]), visible="legendonly")
    trace[-1].name = 'True interaction'
    

# Plot points of interest from PPN
if ppn is not None:
    trace += scatter_points(ppn[:, :3], symbol="diamond", markersize=3,
                            color=ppn[:, -1], cmin=0, cmax=5,  # type
                            colorscale="mygbm",
                            hovertext=[labels[int(v)] for v in ppn[:, -1]]    #ppn[:, 5], # score

                            )
    trace[-1].name = 'PPN points'

#    print("PPN counts:", {name: numpy.count_nonzero(ppn[:, -1] == val) for val, name in labels.items()})

# truth points
if particle_points is not None:
    trace += scatter_points(particle_points, markersize=3, symbol="circle",
                            color=particle_points[:, 4], cmin=0, cmax=5, colorscale="mygbm",
                            hovertext=[labels[v] for v in particle_points[:, 4]],
                            visible="legendonly")
    trace[-1].name = "True point labels"
    print(numpy.count_nonzero(particle_points[:, 4] == 1), "true 'track' particle points")
# trace[-1].marker.colorscale= ['cyan', 'rgb(255,234,0)', 'rgb(127, 188, 65)', 'purple', 'rgb(255,111,0)']

#print(trace)

#trace = []
colors = {
    11:   "orange",
    12:   "black",
    13:   "blue",
    14:   "black",
    22:   "yellow",
    111:  "white",
    130:  "gray",
    211:  "purple",
    311:  "red",
    321:  "cyan",
    2112: "darkslategray",
    2212: "green",
    3122: "white",
    3222: "white",

}
if particles is not None:
    vals = dict([(t, []) for t in ("x", "y", "z", "line_color", "text")])
    for particle in particles:
    #     if particle.last_step().as_point3d().distance(particle.first_step().as_point3d()) < 4:
    #         continue
        if abs(particle.pdg_code()) > 1000000000:
            colors[abs(particle.pdg_code())] = "gray"

        vals["line_color"].append(colors[abs(particle.pdg_code())])
        vals["text"].append("pdg=" + str(particle.pdg_code()))
        # to make same length as values, just duplicate the last one since None will cause issues
        for attr in ("line_color", "text"):
            for i in range(2):
                vals[attr].append(vals[attr][-1])

        for coord in ("x", "y", "z"):
            for step in ("first_step", "last_step"):
                vals[coord].append(getattr(getattr(particle, "%s" % step)(), coord)())

            # separator
            vals[coord].append(None)

    #print(colors)
    trace.append(go.Scatter3d(vals, mode="lines", line_dash="dot", line_width=3, hovertext=vals["text"], visible="legendonly"))
    #    break
    trace[-1].name = "True trajs"
    

# show all the fragments
if not any(i is None for i in (vox, clus)):
    trace += network_topology(vox, clus, edge_index=[],
                              clust_labels=range(len(clus)),
                              edge_labels=[],
                              mode='scatter', markersize=2, linewidth=2,
                              colorscale='mygbm',
                              cmin=0,
                              cmax=0 if len(clus) == 0 else len(clus))
    trace[-1].name = "All fragments"

#show only regrouped track fragments
if not any(i is None for i in (vox, tracks, trk_grp)):
    trace += network_topology(vox, tracks, edge_index=[],
                              clust_labels=trk_grp,
                              edge_labels=[],
                              mode='scatter', markersize=2, linewidth=2,
                              colorscale='mygbm',
                              cmin=0 if len(trk_grp) == 0 else min(trk_grp),
                              cmax=0 if len(trk_grp) == 0 else max(trk_grp)+1)
    trace[-1].name = "Regrouped track"

    # add track end vectors
    track_indices = numpy.unique(trk_grp)
    trk_summary_ev = trk_summary[trk_summary[:, 2] == evt_info[0][2]][:, 3:]
    for trk_index, trk_info in enumerate(trk_summary_ev):
        # returns: (trk_end_x, trk_end_y, trk_end_z, vec_end_x, vec_end_y, vec_end_z)
        vals = dict([(t, []) for t in ("x", "y", "z")])
    #    print(evt_info)
    #    print(trk_summary[trk_summary[:, 2] == evt_info[0][2]])
    #    trk_end_vec = trk_summary_ev[, :]
    #    print(trk_end_vec)
    #     print("trk_info:", trk_info)
        end_displ = trk_info[-3:]
        trk_end_vec = trk_info[-6:-3] + (end_displ * 25 / numpy.linalg.norm(end_displ))
        for idx, coord in enumerate(("x", "y", "z")):
            vals[coord].append(trk_info[-6+idx])
            vals[coord].append(trk_end_vec[idx])

    #    print("for track", trk_index, "vals passed to scatter3d:", vals)
        trace.append(go.Scatter3d(vals,
                                  mode="lines",
                                  line_width=3,
                                  line_color=px.colors.cyclical.mygbm[trk_index % len(px.colors.cyclical.mygbm)],
                                  hovertext="trk %d end vec" % trk_index,
                                  name="Track end vecs",
                                  showlegend=(trk_index == 0),
                                  legendgroup="Track end vectors",
                                  visible="legendonly"))



#show only regrouped EM fragments
if not any(i is None for i in (vox, showers, show_grp)):
    trace += network_topology(vox, showers, edge_index=[],
                              clust_labels=show_grp, edge_labels=[],
                              mode='scatter', markersize=2, linewidth=2,
                              colorscale='mygbm',
                              cmin=0 if len(show_grp) == 0 else min(show_grp),
                              cmax=0 if len(show_grp) == 0 else max(show_grp),
                              visible="legendonly")
    trace[-1].name = "Regrouped shower"

if not any(i is None for i in (vox, inter_particles, inter_grp)):
    trace += network_topology(vox, inter_particles, edge_index=[],
                              clust_labels=inter_grp, edge_labels=[],
                              mode='scatter', markersize=2, linewidth=2,
                              colorscale='Jet',
                              cmin=0 if len(inter_grp) == 0 else min(inter_grp),
                              cmax=0 if len(inter_grp) == 0 else max(inter_grp),
                              visible="legendonly")
    trace[-1].name = "Regrouped interaction"
    
    if highlight_ixn is not None:
        trace += network_topology(vox, inter_particles, edge_index=[],
                              clust_labels=[c if c == highlight_ixn else -1 for c in inter_grp], edge_labels=[],
                              mode='scatter', markersize=2, linewidth=2,
                              colorscale='Jet',
                              cmin=0 if len(inter_grp) == 0 else min(inter_grp),
                              cmax=0 if len(inter_grp) == 0 else max(inter_grp),
                              visible="legendonly")
        trace[-1].name = "Highlighted interaction"
   
    
# show regions selected by intermediate PPN masks
ppn1_size_geom = convert_to_geom_size(ppn1_size_vox, metadata=data["metadata"])
# print("ppn1_size (vox):", ppn1_size_vox)
# print("ppn1_size (geom):", ppn1_size_geom)
ppn1_mask = ppn1_scores > score_threshold
graph_ppn1 = scatter_cubes(ppn1_vox[ppn1_mask] / ppn1_size_geom,
                           cubesize=[ppn1_size_geom,]*3,
                           opacity=0.4,
                           hovertext=ppn1_scores[ppn1_mask],
                           name="PPN1 attention regions",
                           legendgroup="PPN1 attention regions",
                           visible="legendonly")
for i, g in enumerate(graph_ppn1):
    g.showlegend = i == 0
#print(graph_ppn1[0])
trace += graph_ppn1
    
# show
fig = go.Figure(data=trace,layout=layout)
fig.update_layout(legend=dict(x=1.1, y=0.9))
#iplot(fig)


fig.add_annotation(text="Run/subrun/event %d/%d/%d" % tuple(evt_info[0]), xref="paper", yref="paper", x=1.2, y=0.2)



fig.show()