## _HitPairs using ExaTrkX Pipeline_

In [1]:
import glob, os, sys, yaml

In [2]:
import numpy as np
import scipy as sp
import pandas as pd
import matplotlib.pyplot as plt
%matplotlib inline

In [3]:
import pprint
pp = pprint.PrettyPrinter(indent=2)
import seaborn as sns
import trackml.dataset

In [4]:
import torch
from torch_geometric.data import Data
import itertools

In [5]:
device = 'cuda' if torch.cuda.is_available() else 'cpu'
os.environ['EXATRKX_DATA'] = os.path.abspath(os.curdir)

### _2.1 - Config File_

In [6]:
# load processing config file (trusted source)
config_file = os.path.join(os.curdir, 'LightningModules/GNN/configs/train_quickstart_DNN.yaml')
with open(config_file) as f:
    try:
        config = yaml.load(f, Loader=yaml.FullLoader) # equiv: yaml.full_load(f)
    except yaml.YAMLError as e:
        print(e)

In [7]:
# pp.pprint(config)

In [8]:
# Modifications
config['project'] = 'DNNStudy'
config['datatype_names'] = ['train', 'val', 'test']
config['datatype_split'] = [800, 100, 100]
config['input_dir']  = os.path.join(os.environ['EXATRKX_DATA'],'run/feature_store')
config['output_dir'] = os.path.join(os.environ['EXATRKX_DATA'],'run/dnn_processed')

In [9]:
# pp.pprint(config)

### _2.2 - Input Data_

In [10]:
# Read Event from the Testset
inputdir=os.path.expandvars(config['input_dir']+"/train")
outputdir=os.path.expandvars(config['output_dir'])
os.makedirs(outputdir, exist_ok=True)

In [11]:
all_files = glob.glob(os.path.join(inputdir, "*"))
all_files = sorted(all_files)
print("Total Test Events: ", len(all_files))

Total Test Events:  8000


In [12]:
all_files[:10]

['/home/adeak977/current/3_deeptrkx/ctd2022/run/feature_store/train/0',
 '/home/adeak977/current/3_deeptrkx/ctd2022/run/feature_store/train/1',
 '/home/adeak977/current/3_deeptrkx/ctd2022/run/feature_store/train/10',
 '/home/adeak977/current/3_deeptrkx/ctd2022/run/feature_store/train/100',
 '/home/adeak977/current/3_deeptrkx/ctd2022/run/feature_store/train/1000',
 '/home/adeak977/current/3_deeptrkx/ctd2022/run/feature_store/train/1001',
 '/home/adeak977/current/3_deeptrkx/ctd2022/run/feature_store/train/1002',
 '/home/adeak977/current/3_deeptrkx/ctd2022/run/feature_store/train/1003',
 '/home/adeak977/current/3_deeptrkx/ctd2022/run/feature_store/train/1004',
 '/home/adeak977/current/3_deeptrkx/ctd2022/run/feature_store/train/1005']

In [13]:
filename = all_files[5]

In [14]:
# os.path.split(path) := os.path.dirname(path) + os.path.basename(path)
print("os.path.dirname(path) : ", os.path.split(filename)[0])
print("os.path.basename(path): ", os.path.split(filename)[1])

os.path.dirname(path) :  /home/adeak977/current/3_deeptrkx/ctd2022/run/feature_store/train
os.path.basename(path):  1001


In [15]:
# load a file
feature_data = torch.load(filename, map_location=device)
print("Length of Data: {}".format(len(feature_data)))

Length of Data: 10


In [16]:
print(feature_data.keys)

['layerwise_true_edges', 'edge_index', 'layers', 'pt', 'hid', 'modulewise_true_edges', 'y_pid', 'pid', 'x', 'event_file']


In [17]:
print(feature_data)

Data(x=[161, 3], pid=[161], layers=[161], event_file='/home/adeak977/current/3_deeptrkx/ctd2022/train_all/event0000001001', hid=[161], pt=[161], modulewise_true_edges=[2, 151], layerwise_true_edges=[2, 166], edge_index=[2, 852], y_pid=[852])


In [18]:
x = feature_data.x
edge_index = feature_data.edge_index

### _Input Features to Network_

- The `forward()` function gets `x, edge_index` from outside. Where `x = [r, phi, z]` is node feature and `edge_index` contains _edges_ (_aka node/hit pairs)
- However, `EdgeClassifier` needs $x_i, x_j$ for each edge. So one needs to concatenate features of nodes in each edge.

Let's see how it can be achieved.

In [19]:
# start = edge_index[0]
# end = edge_index[1]
start, end = edge_index

In [20]:
# This yield true
# start == edge_index[0]

In [21]:
# This yield true
# end == edge_index[1]

In [22]:
edge_index.shape

torch.Size([2, 852])

In [23]:
x.shape

torch.Size([161, 3])

In [24]:
x[start].shape

torch.Size([852, 3])

In [25]:
x[end].shape

torch.Size([852, 3])

In [26]:
x[0]

tensor([ 0.1663, -0.6570,  0.0027])

In [27]:
x[4]

tensor([ 0.1749, -0.6667,  0.0027])

In [28]:
x[start][0]

tensor([ 0.1663, -0.6570,  0.0027])

In [29]:
x[end][0]

tensor([ 0.1749, -0.6667,  0.0027])

In [30]:
edge_inputs = torch.cat([x[start], x[end]], dim=1)

In [31]:
edge_inputs[0]

tensor([ 0.1663, -0.6570,  0.0027,  0.1749, -0.6667,  0.0027])

### _2.3 - Network Model_

In [32]:
from LightningModules.GNN.gnn_base import GNNBase

In [33]:
from LightningModules.GNN.Models.dense_network import EdgeClassifier

In [34]:
model = EdgeClassifier(config)

In [35]:
print(model)

EdgeClassifier(
  (network): Sequential(
    (0): Linear(in_features=6, out_features=4000, bias=True)
    (1): ReLU()
    (2): Linear(in_features=4000, out_features=2000, bias=True)
    (3): ReLU()
    (4): Linear(in_features=2000, out_features=2000, bias=True)
    (5): ReLU()
    (6): Linear(in_features=2000, out_features=2000, bias=True)
    (7): ReLU()
    (8): Linear(in_features=2000, out_features=1000, bias=True)
    (9): ReLU()
    (10): Linear(in_features=1000, out_features=1, bias=True)
  )
)


### _2.4 - Training_

In [36]:
from pytorch_lightning import Trainer

In [37]:
# dataset as accessed in model
model.setup('fit')

In [38]:
trainer = Trainer(max_epochs=10)

GPU available: False, used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs


In [39]:
trainer.fit(model)


  | Name    | Type       | Params
---------------------------------------
0 | network | Sequential | 18.0 M
---------------------------------------
18.0 M    Trainable params
0         Non-trainable params
18.0 M    Total params
72.144    Total estimated model params size (MB)


Validation sanity check: 0it [00:00, ?it/s]

AttributeError: 'NoneType' object has no attribute 'repeat'

### _Test Training Set_

In [None]:
trainset = model.trainset

In [None]:
example_data = trainset[0]
r, phi, ir = example_data.x.T

In [None]:
x, y = r * np.cos(phi * np.pi), r * np.sin(phi * np.pi)

In [None]:
plt.figure(figsize=(8, 8))
plt.scatter(x, y, s=2)
plt.title("Azimuthal View of Detector", fontsize=24)
plt.xlabel("x", fontsize=18)
plt.ylabel("y", fontsize=18)

In [None]:
e = example_data.edge_index
pid = example_data.pid
true_edges = pid[e[0]] == pid[e[1]]

In [None]:
plt.figure(figsize=(8,8))
plt.plot(x[e[:, true_edges]], y[e[:, true_edges]], c="k")
plt.scatter(x, y, s=5)
plt.title("Azimuthal View of Detector", fontsize=24)
plt.xlabel("x", fontsize=18)
plt.ylabel("y", fontsize=18)

In [None]:
plt.figure(figsize=(8,8))
# plt.plot(x[e[:, ~true_edges]], y[e[:, ~true_edges]], c="r")
plt.plot(x[e[:, (~true_edges)][:, 0:-1:5]], y[e[:, (~true_edges)][:, 0:-1:5]], c="r")
plt.scatter(x, y, s=5)
plt.title("Azimuthal View of Detector", fontsize=24)
plt.xlabel("x", fontsize=18)
plt.ylabel("y", fontsize=18)