Triplet Track Param Fitting & Classification
=================================

tasks: 
-----------------
Read an event using heptrkx.master
Read all triplets from 1 event
Look up hitid of each triplets in the event table to get necessary info (pt, pos, …)
Fit the triplets to get tracking parameters
Classify tracking parameters

In [12]:
import sys
sys.path.append('/global/homes/c/cheliu/exatrkx-neurips19_tf2/gnn-tracking/heptrkx')

import numpy as np
import matplotlib.pyplot as plt
import tensorflow as tf
from heptrkx import master
from heptrkx.preprocess.utils_mldata import get_track_parameters
from heptrkx.postprocess.trackfitter import conformal_mapping
from sklearn.cluster import DBSCAN
import pandas as pd
from pandas import DataFrame
from trackml.score import score_event

Data Directories:
------------

In [2]:
triplets_dir = "/global/project/projectdirs/m3443/usr/dtmurnane/XY_Triplets_1/"
events_dir = "/global/cfs/cdirs/m3443/data/trackml-kaggle/train_all/"

Event ID:
----------

In [3]:
evtid = 9919

Utility Functions:
-------------

In [4]:
# Should I remove noise hits?? 
# returns pandas dataframe for event with evtid
def load_event(source_dir, evtid):
    event = master.Event(source_dir, evtid)
#     event.read(evtid)
    event.remove_noise_hits()
    event.remove_duplicated_hits()
    return event

# return pandas dataframe for all triplets in event with evtid
def load_triplets(source_dir, evtid):
    dir_str = '{}event{}'.format(source_dir, evtid)
    df = pd.read_csv(dir_str, names=['evtid', 'hitid0', 'hitid1', 'hitid2'])
    #header=['evtid', 'hitid0', 'hitid1', 'hitid2']
    return df



In [5]:
# event has attributes particles, cells, hits, truth, evtid
event = load_event(events_dir, evtid)
triplets_df = load_triplets(triplets_dir, evtid)

In [20]:
# create a df with columns: [evtid, hit_id1, hit_id2, hit_id3, x1, y1, z1, x2, y2, z2, x3, y3, z3]

In [6]:
event.truth.head()

Unnamed: 0,hit_id,particle_id,tx,ty,tz,tpx,tpy,tpz,weight
0,1,653028130721628160,-56.447201,-12.0558,-1502.5,-0.304385,-0.043068,-7.98553,8e-06
1,2,369296612553392128,-73.316803,-11.2346,-1502.5,-0.954215,-0.119488,-19.4755,1.4e-05
2,3,765613585920425984,-94.426201,-12.0557,-1502.5,-0.275608,-0.007431,-4.19443,7e-06
3,4,135115341805125632,-98.233597,-1.99301,-1502.5,-1.89966,-0.068547,-29.0418,2.5e-05
4,5,860190346326310912,-58.658901,-5.16357,-1502.5,-0.181605,-0.034195,-4.80375,8e-06


In [6]:
triplets_df.head()

Unnamed: 0,evtid,hitid0,hitid1,hitid2
0,9919,14494,14500,22777
1,9919,16487,16572,24946
2,9919,22559,22560,29153
3,9919,14500,22777,29633
4,9919,14498,23034,30206


Merge Triplets DataFrame and Truth DataFrame
----------------------------------

In [7]:
truth_df = event.truth
# event_table == triplets_df
full_df = pd.DataFrame.copy(triplets_df)
for counter in range(3):
    leftColumn = "hitid" + str(counter)
    clean_truth_df = pd.DataFrame.copy(truth_df)
    clean_truth_df = clean_truth_df.drop(columns=["tpx", "tpy", "tpz", "weight"])
    clean_truth_df = clean_truth_df.rename(columns=(lambda colName: colName + str(counter)))
    full_df = pd.merge(full_df, clean_truth_df, left_on=leftColumn, right_on=("hit_id" + str(counter)))
full_df = full_df.drop(columns=[full_df.columns[i] for i in range(1, 4)])
full_df.head()

Unnamed: 0,evtid,hit_id0,particle_id0,tx0,ty0,tz0,hit_id1,particle_id1,tx1,ty1,tz1,hit_id2,particle_id2,tx2,ty2,tz2
0,9919,14494,72061304906457090,-23.116699,22.170601,-465.843994,14500,72061304906457090,-24.460899,22.8022,-466.221008,22777,72061304906452993,-56.504501,44.760799,-455.858002
1,9919,16487,774619410802417666,-29.047899,12.1427,-89.785896,16572,774619410802417666,-30.788099,12.4312,-89.771797,24946,774619410802417666,-68.758102,20.201099,-89.294197
2,9919,16487,774619410802417666,-29.047899,12.1427,-89.785896,24946,774619410802417666,-68.758102,20.201099,-89.294197,31695,774619410802417666,-110.783997,32.755001,-88.997299
3,9919,16572,774619410802417666,-30.788099,12.4312,-89.771797,24946,774619410802417666,-68.758102,20.201099,-89.294197,31695,774619410802417666,-110.783997,32.755001,-88.997299
4,9919,22559,675548877654331393,-29.5805,12.178,417.037994,22560,675548877654331393,-31.249701,13.0395,416.94101,29153,675548877654331393,-64.584396,31.6936,414.859009


Compute Tracking Parameters for Each Triplet
--------------------------------------

In [8]:
pos_df = full_df[['tx0', 'tx1', 'tx2', 'ty0', 'ty1', 'ty2', 'tz0', 'tz1', 'tz2']]
pos_df.head()


Unnamed: 0,tx0,tx1,tx2,ty0,ty1,ty2,tz0,tz1,tz2
0,-23.116699,-24.460899,-56.504501,22.170601,22.8022,44.760799,-465.843994,-466.221008,-455.858002
1,-29.047899,-30.788099,-68.758102,12.1427,12.4312,20.201099,-89.785896,-89.771797,-89.294197
2,-29.047899,-68.758102,-110.783997,12.1427,20.201099,32.755001,-89.785896,-89.294197,-88.997299
3,-30.788099,-68.758102,-110.783997,12.4312,20.201099,32.755001,-89.771797,-89.294197,-88.997299
4,-29.5805,-31.249701,-64.584396,12.178,13.0395,31.6936,417.037994,416.94101,414.859009


In [9]:
%%time
def helper(row):    
    pos_arr = []
    for i in range(0, 9, 3):
        a = row[i:i+3]
        a = a.reset_index()
        a = a.drop(labels='index', axis=1)
        pos_arr.append(a)        
    x, y, z = pos_arr[0], pos_arr[1], pos_arr[2]
    x = x.to_numpy().flatten()
    y = y.to_numpy().flatten()
    z = z.to_numpy().flatten()
    print(x)
    print(y)
    print(z)
#     print(x**2 + y**2 + z**2)
    return conformal_mapping(x, y, z)

track_params = pos_df.apply(helper, axis=1)


[-23.1167 -24.4609 -56.5045]
[22.1706 22.8022 44.7608]
[-465.844 -466.221 -455.858]


IndexError: invalid index to scalar variable.

In [10]:
track_params

NameError: name 'track_params' is not defined

In [12]:
track_params.to_numpy()

array([(13.753137856405573, -470.9243526363415, 1.3570038259157016, nan, 0.07519954833984374),
       (9.10607941472449, -90.06504391660023, 1.5084167966362725, 0.025426074908341516, 0.21843660278320312),
       (8.4345542243816, -89.96138965205176, 1.4780059966694798, 0.014977942723598684, 0.28114599609375),
       ...,
       (2.8936392383016027, -5.703993319413549, -1.0921443149798846, 0.8517130976786559, 1.3369403320312498),
       (2.134771330028002, -219.96455542137656, 1.4968386676408214, 1.0631199546908052, -0.37135861816406246),
       (9.479395349665538, -18.369319500883627, 2.0661972778149247, 0.8684097386645122, -0.8834395751953125)],
      dtype=object)

Clustering Triplets with DBSCAN
--------------------------

In [13]:
clf = DBSCAN(eps=0.5, min_samples=2)
clustering = clf.fit(track_params)

ValueError: setting an array element with a sequence.