In [1]:
import os
os.environ['TRKXINPUTDIR'] = '/global/cfs/cdirs/m3443/data/trackml-kaggle/train_all'
os.environ['TRKXOUTPUTDIR'] = '/global/cscratch1/sd/xju/heptrkx/iml2020/run100'

In [2]:
# system import
import pkg_resources
import yaml
import pprint
import random
random.seed(1234)
import numpy as np
import pandas as pd
import itertools
import matplotlib.pyplot as plt
%matplotlib widget

# 3rd party
import torch
from torch_geometric.data import Data
from trackml.dataset import load_event
from pytorch_lightning import Trainer
from pytorch_lightning.callbacks import ModelCheckpoint

import os
os.environ['TRKXINPUTDIR']="/global/cfs/cdirs/m3443/data/trackml-kaggle/train_10evts"
os.environ['TRKXOUTPUTDIR']= "/global/cfs/projectdirs/m3443/usr/caditi97/iml2020/out0"

# local import
import sys
sys.path.append('/global/homes/c/caditi97/heptrkx-gnn-tracking/')
import heptrkx
from heptrkx.dataset import event as master
from exatrkx import config_dict # for accessing predefined configuration files
from exatrkx import outdir_dict # for accessing predefined output directories
from exatrkx.src import utils_dir

sys.path.append('/global/u2/c/caditi97/exatrkx-iml2020/exatrkx')
# for preprocessing
from exatrkx import FeatureStore
from exatrkx.src import utils_torch

# for embedding
from exatrkx import LayerlessEmbedding
from exatrkx import EmbeddingInferenceCallback
# for filtering
from exatrkx import VanillaFilter
from exatrkx import FilterInferenceCallback

In [3]:
embed_ckpt_dir = '/global/cfs/cdirs/m3443/data/lightning_models/embedding/checkpoints/epoch=10.ckpt'
filter_ckpt_dir = '/global/cfs/cdirs/m3443/data/lightning_models/filtering/checkpoints/epoch=54.ckpt'
outdir = '/global/cfs/projectdirs/m3443/usr/caditi97/iml2020/view_embedding'

In [4]:
evtid = 8000
n_pids = 10
event_file = os.path.join(os.environ['TRKXINPUTDIR'], 'event{:09}'.format(evtid))

In [5]:
event = master.Event(os.environ['TRKXINPUTDIR'])
event.read(evtid)

False

In [6]:
pids = event.particles[(event.particles.nhits) > 5]
np.random.seed(456)
rnd = np.random.randint(0, pids.shape[0], n_pids)
print("random idx: ", rnd)
sel_pids = pids.particle_id.values[rnd]

AttributeError: 'Event' object has no attribute '_particles'

In [None]:
event._hits = event.hits[event.hits.particle_id.isin(sel_pids)]
hits = event.cluster_info(utils_dir.detector_path)

In [None]:
hits.columns

In [None]:
hits.shape

In [None]:
hits = hits.assign(R=np.sqrt((hits.x - hits.vx)**2 + (hits.y - hits.vy)**2 + (hits.z - hits.vz)**2))
hits = hits.sort_values('R').reset_index(drop=True).reset_index(drop=False)
hit_list = hits.groupby(['particle_id', 'layer'], sort=False)['index'].agg(lambda x: list(x)).groupby(level=0).agg(lambda x: list(x))

e = []
for row in hit_list.values:
    for i, j in zip(row[0:-1], row[1:]):
        e.extend(list(itertools.product(i, j)))

layerless_true_edges = np.array(e).T

In [None]:
data = Data(x=torch.from_numpy(hits[['r', 'phi', 'z']].to_numpy()/np.array([1000, np.pi, 1000])).float(),\
            pid=torch.from_numpy(hits.particle_id.to_numpy()),
            layers=torch.from_numpy(hits.layer.to_numpy()), hid=torch.from_numpy(hits.hit_id.to_numpy()))

In [None]:
cell_features = ['cell_count', 'cell_val', 'leta', 'lphi', 'lx', 'ly', 'lz', 'geta', 'gphi']

In [None]:
data.layerless_true_edges = torch.from_numpy(layerless_true_edges)
data.cell_data = torch.from_numpy(hits[cell_features].values).float()

In [None]:
data

### Evaluating Embedding

In [None]:
action = 'embedding'

config_file = pkg_resources.resource_filename(
                    "exatrkx",
                    os.path.join('configs', config_dict[action]))
with open(config_file) as f:
    e_config = yaml.load(f, Loader=yaml.FullLoader)

pp = pprint.PrettyPrinter(indent=4)
pp.pprint(e_config)

In [None]:
e_config['train_split'] = [1, 0, 0]
e_config['r_val'] = 2.0

In [None]:
e_model = LayerlessEmbedding(e_config)

In [None]:
e_model = e_model.load_from_checkpoint(embed_ckpt_dir, hparams=e_config)

In [None]:
e_model.eval()

In [None]:
spatial = e_model(torch.cat([data.cell_data, data.x], axis=-1))

In [None]:
spatial.shape

In [None]:
spatial_np = spatial.detach().numpy()

In [None]:
data.pid

In [None]:
fig = plt.figure(figsize=(6,6))
for pid in sel_pids:
    idx = hits.particle_id == pid
    plt.scatter(spatial_np[idx, 0], spatial_np[idx, 1])
    
plt.savefig(os.path.join(outdir, "embedding_0_1.pdf"))

In [None]:
fig = plt.figure(figsize=(6,6))
for pid in sel_pids:
    idx = hits.particle_id == pid
    plt.scatter(spatial_np[idx, 2], spatial_np[idx, 3])
plt.savefig(os.path.join(outdir, "embedding_2_3.pdf"))

In [None]:
fig = plt.figure(figsize=(6,6))
for pid in sel_pids:
    idx = hits.particle_id == pid
    plt.scatter(spatial_np[idx, 4], spatial_np[idx, 5])
plt.savefig(os.path.join(outdir, "embedding_4_5.pdf"))

In [None]:
fig = plt.figure(figsize=(6,6))
for pid in sel_pids:
    idx = hits.particle_id == pid
    plt.scatter(spatial_np[idx, 6], spatial_np[idx, 7])
plt.savefig(os.path.join(outdir, "embedding_6_7.pdf"))

In [None]:
e_spatial = utils_torch.build_edges(spatial, e_model.hparams['r_val'], e_model.hparams['knn_val'])

In [None]:
e_spatial.shape

In [None]:
e_spatial[:, 0]

In [None]:
e_spatial_np = e_spatial.detach().numpy()

In [None]:
hits.iloc[[0, 2]].head()

In [None]:
event.particles[event.particles.particle_id.isin(sel_pids)]

In [None]:
hits.iloc[[e_spatial[0, 0], e_spatial[0, 1]]].x.values

In [None]:
e_spatial_np[0, 0], e_spatial_np[1, 0]

In [None]:
fig = plt.figure(figsize=(8,8))
ax = fig.add_subplot(111, projection='3d')
for pid in sel_pids:
    ax.scatter(hits[hits.particle_id == pid].x.values, hits[hits.particle_id == pid].y.values,  hits[hits.particle_id == pid].z.values)

# add edges
e_spatial_np_t = e_spatial_np.T
for iedge in range(e_spatial_np.shape[1]):
    ax.plot(hits.iloc[e_spatial_np_t[iedge]].x.values, hits.iloc[e_spatial_np_t[iedge]].y.values, hits.iloc[e_spatial_np_t[iedge]].z.values, color='k', alpha=0.3, lw=1.)
ax.set_xlabel('X Label')
ax.set_ylabel('Y Label')
ax.set_zlabel('Z Label')

plt.savefig(os.path.join(outdir, "emedding_edges_3d.pdf"))

In [None]:
fig = plt.figure(figsize=(8,8))
ax = fig.add_subplot(111)
for pid in sel_pids:
    ax.scatter(hits[hits.particle_id == pid].x.values, hits[hits.particle_id == pid].y.values)

# add edges
e_spatial_np_t = e_spatial_np.T
for iedge in range(e_spatial_np.shape[1]):
    ax.plot(hits.iloc[e_spatial_np_t[iedge]].x.values, hits.iloc[e_spatial_np_t[iedge]].y.values, color='k', alpha=0.3, lw=2.)
ax.set_xlabel('X')
ax.set_ylabel('Y')
plt.savefig(os.path.join(outdir, "embedding_edges_x_y.pdf"))

In [None]:
fig = plt.figure(figsize=(8,8))
ax = fig.add_subplot(111)
for pid in sel_pids:
    ax.scatter(hits[hits.particle_id == pid].z.values, hits[hits.particle_id == pid].r.values)

# add edges
e_spatial_np_t = e_spatial_np.T
for iedge in range(e_spatial_np.shape[1]):
    ax.plot(hits.iloc[e_spatial_np_t[iedge]].z.values, hits.iloc[e_spatial_np_t[iedge]].r.values, color='k', alpha=0.3, lw=1.)
ax.set_xlabel('z')
ax.set_ylabel('r')
plt.savefig(os.path.join(outdir, "embedding_edges_z_r.pdf"))

In [None]:
fig = plt.figure(figsize=(8,8))
ax = fig.add_subplot(111)
ax.scatter(hits.x.values, hits.y.values)
plt.savefig(os.path.join(outdir, "embedding_hits_x_y.pdf"))

In [None]:
fig = plt.figure(figsize=(8,8))
ax = fig.add_subplot(111)
ax.scatter(hits.z.values, hits.r.values)
plt.savefig(os.path.join(outdir, "embedding_hits_z_r.pdf"))

In [None]:
fig = plt.figure(figsize=(8,8))
ax = fig.add_subplot(111)
for pid in sel_pids:
    ax.scatter(hits[hits.particle_id == pid].x.values, hits[hits.particle_id == pid].y.values)

plt.savefig(os.path.join(outdir, "embedding_hits_truth_x_y.pdf"))

In [None]:
fig = plt.figure(figsize=(8,8))
ax = fig.add_subplot(111)
for pid in sel_pids:
    ax.scatter(hits[hits.particle_id == pid].z.values, hits[hits.particle_id == pid].r.values)

plt.savefig(os.path.join(outdir, "embedding_hits_truth_z_r.pdf"))

In [None]:
fig = plt.figure(figsize=(8,8))
ax = fig.add_subplot(111)
for pid in sel_pids:
    ax.scatter(hits[hits.particle_id == pid].x.values, hits[hits.particle_id == pid].y.values)

# add edges
for iedge in range(e_spatial_np.shape[1]):
    ax.plot(hits.iloc[e_spatial_np_t[iedge]].x.values, hits.iloc[e_spatial_np_t[iedge]].y.values, color='k', alpha=0.3, lw=2.)

ax.set_axis_off()
plt.savefig(os.path.join(outdir, "embedding_front.pdf"))