In [1]:
import sys, os, glob, yaml

In [2]:
import math
import random
import numpy as np
import pandas as pd

import matplotlib.pyplot as plt
%matplotlib inline

In [3]:
import trackml.dataset
import seaborn as sns

In [4]:
sys.path.append('src')

### _Dataset_

In [5]:
# mu- data (old)
# input_dir = './data_sets/pandaml/data_3.0_7.0_GeV/'

# mu+mu- data (current)
input_dir = './data_sets/pandaml/data_0.1_1.5_GeV/'

# pbarp data (coming)
# input_dir = os.environ['HOME']+'/current/2_deepana/pandaml/data/'

In [6]:
# Find All Input Data Files (hits.csv, cells.csv, particles.csv, truth.csv)
all_files = os.listdir(input_dir)

# Extract File Prefixes (use e.g. xxx-hits.csv)
suffix = '-hits.csv'
file_prefixes = sorted(os.path.join(input_dir, f.replace(suffix, ''))
                       for f in all_files if f.endswith(suffix))

In [7]:
file_prefixes[:10]

['./data_sets/pandaml/data_0.1_1.5_GeV/event0000000000',
 './data_sets/pandaml/data_0.1_1.5_GeV/event0000000001',
 './data_sets/pandaml/data_0.1_1.5_GeV/event0000000002',
 './data_sets/pandaml/data_0.1_1.5_GeV/event0000000003',
 './data_sets/pandaml/data_0.1_1.5_GeV/event0000000004',
 './data_sets/pandaml/data_0.1_1.5_GeV/event0000000005',
 './data_sets/pandaml/data_0.1_1.5_GeV/event0000000006',
 './data_sets/pandaml/data_0.1_1.5_GeV/event0000000007',
 './data_sets/pandaml/data_0.1_1.5_GeV/event0000000008',
 './data_sets/pandaml/data_0.1_1.5_GeV/event0000000009']

In [8]:
event_id = 0
event_prefix = file_prefixes[event_id]

In [9]:
# load an event
hits, tubes, particles, truth = trackml.dataset.load_event(event_prefix)

# memory usage
mem_bytes = (hits.memory_usage(index=True).sum() 
             + tubes.memory_usage(index=True).sum() 
             + particles.memory_usage(index=True).sum() 
             + truth.memory_usage(index=True).sum())

print('{} memory usage {:.2f} MB'.format(os.path.basename(event_prefix), mem_bytes / 2**20))

event0000000000 memory usage 0.03 MB


In [10]:
hits.head()

Unnamed: 0,hit_id,x,y,z,volume_id,layer_id,module_id
0,1,-14.8697,-10.605,35.0,9,0,38
1,2,-15.7443,-11.11,35.0,9,1,144
2,3,-16.618999,-11.615,35.0,9,2,256
3,4,-17.4937,-12.12,35.0,9,3,374
4,5,-18.368401,-12.625,35.0,9,4,498


In [11]:
tubes.head()

Unnamed: 0,hit_id,isochrone,depcharge,energyloss,volume_id,layer_id,module_id,skewed,sector_id
0,1,0.034734,15204100.0,15.2041,9,0,38,0,2
1,2,0.04814,25058700.0,25.0587,9,1,144,0,2
2,3,0.055623,13596100.0,13.5961,9,2,256,0,2
3,4,0.046304,12138100.0,12.1381,9,3,374,0,2
4,5,0.009433,26845900.0,26.8459,9,4,498,0,2


In [12]:
particles.head()

Unnamed: 0,particle_id,vx,vy,vz,px,py,pz,q,nhits,pdgcode,start_time
0,1,0.0,0.0,0.0,0.417332,0.938952,-0.457557,1,25,13,0
1,2,0.0,0.0,0.0,-0.493657,-0.352344,0.71238,1,24,13,0
2,3,0.0,0.0,0.0,-0.166948,0.05912,0.264588,1,26,13,0
3,4,0.0,0.0,0.0,0.57395,0.219286,-0.417693,1,22,13,0
4,5,0.0,0.0,0.0,-0.296571,0.510891,0.733194,1,26,13,0


In [13]:
truth.head()

Unnamed: 0,hit_id,tx,ty,tz,tpx,tpy,tpz,weight,particle_id
0,1,-14.852,-10.6347,43.741699,-0.273197,-0.162639,0.765421,1.0,10
1,2,-15.7222,-11.1478,46.171398,-0.274899,-0.160593,0.765203,1.0,10
2,3,-16.597,-11.6535,48.600101,-0.276364,-0.157971,0.765186,1.0,10
3,4,-17.4767,-12.1505,51.0298,-0.277693,-0.155004,0.765268,1.0,10
4,5,-18.360901,-12.6388,53.460499,-0.279028,-0.152464,0.765253,1.0,10


### _Build Event_

* Why **pt** is built from **tpx** & **tpy** rather than **px** & **py**

What does **left** and **inner** means here?

```
# whether noise is true or false
if noise:
    truth = truth.merge(particles[["particle_id", "vx", "vy", "vz"]], on="particle_id", how="left")
else:
    truth = truth.merge(particles[["particle_id", "vx", "vy", "vz"]], on="particle_id", how="inner")
    
# this is strange to build pt from tpx & tpy
truth = truth.assign(pt=np.sqrt(truth.tpx**2 + truth.tpy**2))
```

Investigate as one should expect that why the pt is constructed from `tpx` and `tpy` in both cases. What is the impact of merging from `left` or `inner`.


This is from Exa.TrkX-HSF Processing Stage.

In [14]:
# if noise==false
# first merge truth & particles on particle_id, assuming
event = truth.merge(particles[["particle_id", "vx", "vy", "vz"]], on="particle_id", how="inner")

In [15]:
event.head()

Unnamed: 0,hit_id,tx,ty,tz,tpx,tpy,tpz,weight,particle_id,vx,vy,vz
0,1,-14.852,-10.6347,43.741699,-0.273197,-0.162639,0.765421,1.0,10,0.0,0.0,0.0
1,2,-15.7222,-11.1478,46.171398,-0.274899,-0.160593,0.765203,1.0,10,0.0,0.0,0.0
2,3,-16.597,-11.6535,48.600101,-0.276364,-0.157971,0.765186,1.0,10,0.0,0.0,0.0
3,4,-17.4767,-12.1505,51.0298,-0.277693,-0.155004,0.765268,1.0,10,0.0,0.0,0.0
4,5,-18.360901,-12.6388,53.460499,-0.279028,-0.152464,0.765253,1.0,10,0.0,0.0,0.0


In [16]:
# assign pt (from tpx & tpy ???) and add to truth~event
event = event.assign(pt=np.sqrt(truth.tpx**2 + truth.tpy**2))

In [17]:
event.head()

Unnamed: 0,hit_id,tx,ty,tz,tpx,tpy,tpz,weight,particle_id,vx,vy,vz,pt
0,1,-14.852,-10.6347,43.741699,-0.273197,-0.162639,0.765421,1.0,10,0.0,0.0,0.0,0.317943
1,2,-15.7222,-11.1478,46.171398,-0.274899,-0.160593,0.765203,1.0,10,0.0,0.0,0.0,0.31837
2,3,-16.597,-11.6535,48.600101,-0.276364,-0.157971,0.765186,1.0,10,0.0,0.0,0.0,0.318327
3,4,-17.4767,-12.1505,51.0298,-0.277693,-0.155004,0.765268,1.0,10,0.0,0.0,0.0,0.318025
4,5,-18.360901,-12.6388,53.460499,-0.279028,-0.152464,0.765253,1.0,10,0.0,0.0,0.0,0.317965


In [18]:
# merge hits with event~truth, first find r & phi
event = hits.assign(r=np.sqrt(hits.x**2 + hits.y**2), phi=np.arctan2(hits.y, hits.x)).merge(event, on="hit_id")

In [19]:
event.head()

Unnamed: 0,hit_id,x,y,z,volume_id,layer_id,module_id,r,phi,tx,...,tz,tpx,tpy,tpz,weight,particle_id,vx,vy,vz,pt
0,1,-14.8697,-10.605,35.0,9,0,38,18.264008,-2.522066,-14.852,...,43.741699,-0.273197,-0.162639,0.765421,1.0,10,0.0,0.0,0.0,0.317943
1,2,-15.7443,-11.11,35.0,9,1,144,19.269537,-2.527083,-15.7222,...,46.171398,-0.274899,-0.160593,0.765203,1.0,10,0.0,0.0,0.0,0.31837
2,3,-16.618999,-11.615,35.0,9,2,256,20.275585,-2.531606,-16.597,...,48.600101,-0.276364,-0.157971,0.765186,1.0,10,0.0,0.0,0.0,0.318327
3,4,-17.4937,-12.12,35.0,9,3,374,21.282009,-2.535701,-17.4767,...,51.0298,-0.277693,-0.155004,0.765268,1.0,10,0.0,0.0,0.0,0.318025
4,5,-18.368401,-12.625,35.0,9,4,498,22.288759,-2.539426,-18.360901,...,53.460499,-0.279028,-0.152464,0.765253,1.0,10,0.0,0.0,0.0,0.317965


In [20]:
# assign event_id to this event
event = event.assign(event_id=int(event_prefix[-10:]))

In [21]:
event.head()

Unnamed: 0,hit_id,x,y,z,volume_id,layer_id,module_id,r,phi,tx,...,tpx,tpy,tpz,weight,particle_id,vx,vy,vz,pt,event_id
0,1,-14.8697,-10.605,35.0,9,0,38,18.264008,-2.522066,-14.852,...,-0.273197,-0.162639,0.765421,1.0,10,0.0,0.0,0.0,0.317943,0
1,2,-15.7443,-11.11,35.0,9,1,144,19.269537,-2.527083,-15.7222,...,-0.274899,-0.160593,0.765203,1.0,10,0.0,0.0,0.0,0.31837,0
2,3,-16.618999,-11.615,35.0,9,2,256,20.275585,-2.531606,-16.597,...,-0.276364,-0.157971,0.765186,1.0,10,0.0,0.0,0.0,0.318327,0
3,4,-17.4937,-12.12,35.0,9,3,374,21.282009,-2.535701,-17.4767,...,-0.277693,-0.155004,0.765268,1.0,10,0.0,0.0,0.0,0.318025,0
4,5,-18.368401,-12.625,35.0,9,4,498,22.288759,-2.539426,-18.360901,...,-0.279028,-0.152464,0.765253,1.0,10,0.0,0.0,0.0,0.317965,0


In [22]:
event.describe()

Unnamed: 0,hit_id,x,y,z,volume_id,layer_id,module_id,r,phi,tx,...,tpx,tpy,tpz,weight,particle_id,vx,vy,vz,pt,event_id
count,240.0,240.0,240.0,240.0,240.0,240.0,240.0,240.0,240.0,240.0,...,240.0,240.0,240.0,240.0,240.0,240.0,240.0,240.0,240.0,240.0
mean,120.5,1.66686,3.719131,34.890762,9.0,11.45,1968.0375,28.352209,0.168121,1.605767,...,0.200206,0.125378,0.260635,1.0,5.416667,0.0,0.0,0.0,0.691633,0.0
std,69.42622,21.761808,19.167187,9.106841,0.0,7.06497,1377.139541,7.101119,1.770805,21.818062,...,0.509809,0.549222,0.539377,0.0,2.863954,0.0,0.0,0.0,0.370443,0.0
min,1.0,-40.2355,-35.855,-10.8937,9.0,0.0,8.0,16.626699,-3.128167,-40.362999,...,-0.441819,-0.948592,-0.457411,1.0,1.0,0.0,0.0,0.0,0.164137,0.0
25%,60.75,-20.1178,-14.14,35.0,9.0,5.0,707.75,22.012431,-1.099636,-19.97675,...,-0.288401,-0.140807,-0.413396,1.0,3.0,0.0,0.0,0.0,0.318017,0.0
50%,120.5,9.62154,6.259525,35.0,9.0,11.0,1781.5,28.089541,0.387275,9.6308,...,0.316095,0.269666,0.504254,1.0,5.0,0.0,0.0,0.0,0.609522,0.0
75%,180.25,17.712375,18.81125,35.0,9.0,17.0,3132.25,34.808197,1.194868,17.833325,...,0.43368,0.45717,0.719044,1.0,8.0,0.0,0.0,0.0,1.024018,0.0
max,240.0,38.486198,37.369999,92.460999,9.0,25.0,4540.0,40.399979,3.141593,38.4039,...,1.33693,0.976567,0.95405,1.0,10.0,0.0,0.0,0.0,1.386718,0.0
