## _Building Graphs: Truth of Input Edges_

In [1]:
import glob, os, sys, yaml

In [2]:
import numpy as np
import scipy as sp
import pandas as pd
import matplotlib.pyplot as plt
%matplotlib inline

In [3]:
import pprint
import seaborn as sns
import trackml.dataset

In [4]:
import torch
from torch_geometric.data import Data
import itertools

In [5]:
# append parent dir
sys.path.append('..')

In [7]:
from src import Compose_Event, Draw_Compose_Event

### _(+) - Input Data_

In [8]:
# mu- data (old)
# input_dir = './data_sets/pandaml/data_3.0_7.0_GeV/'

# mu+mu- data (current)
input_dir = '../data_sets/pandaml/data_0.1_1.5_GeV/'

# pbarp data (coming)
# input_dir = os.environ['HOME']+'/current/2_deepana/pandaml/data/'

In [None]:
# Find All Input Data Files (hits.csv, cells.csv, particles.csv, truth.csv)
all_files = os.listdir(input_dir)

# Extract File Prefixes (use e.g. xxx-hits.csv)
suffix = '-hits.csv'
file_prefixes = sorted(os.path.join(input_dir, f.replace(suffix, ''))
                       for f in all_files if f.endswith(suffix))

In [None]:
# file_prefixes[:10]

In [None]:
event_id = 1
event_prefix = file_prefixes[event_id]

In [None]:
# load an event
hits, tubes, particles, truth = trackml.dataset.load_event(event_prefix)

# memory usage
mem_bytes = (hits.memory_usage(index=True).sum() 
             + tubes.memory_usage(index=True).sum() 
             + particles.memory_usage(index=True).sum() 
             + truth.memory_usage(index=True).sum())

print('{} memory usage {:.2f} MB'.format(os.path.basename(event_prefix), mem_bytes / 2**20))

In [None]:
hits.head()

In [None]:
hits.layer_id.unique()

In [None]:
tubes.head()

In [None]:
particles.head()

In [None]:
truth.head()

### _(+) - Build Event_

- functions from _event_utils.py_

In [None]:
event = compose_event(event_prefix,skewed=False)
draw_event(event,figsize=(10, 10));

In [None]:
event.head()

In [None]:
event.layer.unique()

## _Build Graphs_

### _(A) - True Edges (Layerwise)_

In [None]:
from LightningModules.Processing.utils.event_utils import get_layerwise_edges

In [None]:
# get true edges
true_edges, hits = get_layerwise_edges(event)

In [None]:
hits.head()

### _(B) - Input Edges (Layerwise)_

In [None]:
from LightningModules.Processing.utils.event_utils import get_input_edges

In [None]:
# get input Edges
input_edges = get_input_edges(hits, filtering=True)

In [None]:
# input_edges ~ edge_index
edge_index = input_edges

In [None]:
input_edges.shape

In [None]:
input_edges[0][:10]

In [None]:
input_edges[1][:10]

### _(+) - Plotting Input Edges_

In [None]:
# plotting input_edges
plt.close('all')
fig = plt.figure(figsize=(10,10))
ax = fig.add_subplot()

p_ids = np.unique(event.particle_id.values)
det = pd.read_csv("src/stt.csv")
skw = det.query('skewed==0')
nkw = det.query('skewed==1') # one may look for +ve/-ve polarity
    
# detector layout
plt.scatter(skw.x.values, skw.y.values, s=20, facecolors='none', edgecolors='lightgreen')
plt.scatter(nkw.x.values, nkw.y.values, s=20, facecolors='none', edgecolors='coral')

# particle tracks
sel_pids, sel_pids_fr = np.unique(hits.particle_id, return_counts=True)

for pid in sel_pids:
    idx = hits.particle_id == pid
    ax.scatter(hits[idx].x.values, hits[idx].y.values, label='particle_id: %d' %pid)
    
# input edges
# for iedge in range(edge_index.shape[1]):
for iedge in range(56):
    pt1 = hits.iloc[input_edges[0][iedge]]
    pt2 = hits.iloc[input_edges[1][iedge]]
    ax.plot([pt1.x, pt2.x], [pt1.y, pt2.y], color='k', alpha=0.3, lw=1.5)

ax.set_title('Event ID # %d' % event_id)
ax.set_xlabel('x [cm]')
ax.set_ylabel('y [cm]')
ax.set_xlim(-40, 40)
ax.set_ylim(-40, 40)
ax.grid(False)
# ax.legend(fontsize=10, loc='best')
fig.tight_layout()
# fig.savefig("input_edges.png")

### _(C) - Truth of Input Edges_

- We have `true_edges`, `input_edges`
- We need `y` (truth of `input_edges`)

In [None]:
import scipy as sp

In [None]:
# select a device
device = 'cuda' if torch.cuda.is_available() else 'cpu'

In [None]:
def graph_intersection(input_graph, truth_graph):
    """Find the truth of 'input_graph' by using the 'truth_graph'. 
    Here input_graph:=edge_index and truth_grahp:=layerwise_true_edges.
    See https://scipy-lectures.org/advanced/scipy_sparse/coo_matrix.html"""
    
    array_size = max(input_graph.max().item(), truth_graph.max().item()) + 1
    
    # Get Graphs
    l1 = input_graph.cpu().numpy()
    l2 = truth_graph.cpu().numpy()
    
    # Build a sparse matrix in COOrdinate format.
    e_1 = sp.sparse.coo_matrix(
        (np.ones(l1.shape[1]), l1), shape=(array_size, array_size)
    ).tocsr()
    
    e_2 = sp.sparse.coo_matrix(
        (np.ones(l2.shape[1]), l2), shape=(array_size, array_size)
    ).tocsr()
    
    # Find Edge Intersection from e_1 (input_graph) and e_2 (truth_graph)
    e_intersection = (e_1.multiply(e_2) - ((e_1 - e_2) > 0)).tocoo()
    
    # New Input Graph from Edge Intersection. Although `new_input_graph` equiv. to `input_graph`
    # but here the `edge_index` is sorted in ascending order, in `input_graph` the edge list is 
    # listed according to geometry. It is way `edge_index` is built. It should be used along with `y`.
    new_input_graph = (
        torch.from_numpy(np.vstack([e_intersection.row, e_intersection.col]))
        .long()
        .to(device)
    )
    
    # Get Truth for Input Graph
    y = e_intersection.data > 0

    return new_input_graph, y

In [None]:
# let convert input_edges, true_edges to torch tensor
ie = torch.tensor(input_edges)
te = torch.tensor(true_edges)

In [None]:
type(ie)

In [None]:
# returns sorted input_graph
new_input_graph, y = graph_intersection(ie, te)

In [None]:
new_input_graph.shape

In [None]:
new_input_graph[:5]

In [None]:
y.shape

In [None]:
type(y)

In [None]:
y[:20]