# Exploratory analysis of ACTS data

This notebook demonstrates usage of the latest ACTS data (matching what will be used in the Tracking ML Challenge) and explores some of the data's characteristics

In [1]:
from __future__ import print_function

import os

import numpy as np
import pandas as pd

import matplotlib.pyplot as plt
from mpl_toolkits.mplot3d import Axes3D

from dataset import load_event

%matplotlib notebook

In [54]:
def draw_scatter_2d(x, y, s=1, marker='.', xlabel=None, ylabel=None, label=None, **kwargs):
    plt.scatter(x, y, s=s, marker=marker, label=label, **kwargs)
    if xlabel is not None:
        plt.xlabel(xlabel)
    if ylabel is not None:
        plt.ylabel(ylabel)

## Load the data

In [2]:
data_dir = '/global/cscratch1/sd/sfarrell/ACTS/prod_20171031_233247'

In [3]:
# Load the first event
sample = 'event000000000'
data = load_event(os.path.join(data_dir, sample))
hits, particles, truth = map(pd.DataFrame, data)

/global/cscratch1/sd/sfarrell/ACTS/prod_20171031_233247/event000000000


In [4]:
# Calculate derived variables
hits['r'] = np.sqrt(hits.x**2 + hits.y**2)
hits['phi'] = np.arctan2(hits.y, hits.x)

In [5]:
hits.head()

Unnamed: 0,hit_id,volume_id,layer_id,module_id,x,y,z,ex,ey,ez,phi,theta,ephi,etheta,ncells,r
0,1,7,2,1,-65.425499,-2.06459,-1502.5,-1.0,-1.0,-1.0,-3.110047,3.09805,-1.0,-1.0,2,65.458069
1,2,7,2,1,-55.7528,-1.95534,-1502.5,-1.0,-1.0,-1.0,-3.106535,3.10448,-1.0,-1.0,1,55.787079
2,3,7,2,1,-53.653801,0.767741,-1502.5,-1.0,-1.0,-1.0,3.127285,3.10589,-1.0,-1.0,1,53.659294
3,4,7,2,1,-93.733101,-7.25157,-1502.5,-1.0,-1.0,-1.0,-3.064382,3.0791,-1.0,-1.0,2,94.013191
4,5,7,2,1,-37.351002,2.65265,-1502.5,-1.0,-1.0,-1.0,3.070692,3.11668,-1.0,-1.0,2,37.445076


In [6]:
hits.shape, particles.shape

((104996, 16), (13748, 8))

In [7]:
# Draw the full 3D distribution of hits
ax = plt.figure(figsize=(9, 6)).add_subplot(111, projection='3d')
ax.scatter(hits.z, hits.x, hits.y, s=0.5)
ax.set_xlabel('z [mm]')
ax.set_ylabel('x [mm]')
ax.set_zlabel('y [mm]')
ax.view_init(10, 110)
plt.tight_layout()

<IPython.core.display.Javascript object>

### 2D scatter plots

In [8]:
# Draw the hits by volume
plt.figure(figsize=(9,4))
for vol, vol_hits in hits.groupby('volume_id'):
    plt.scatter(vol_hits.z, vol_hits.r, s=5, label=vol)
plt.xlabel('z [mm]')
plt.ylabel('r [mm]')
plt.legend(loc=0, title='volume')
plt.tight_layout()

# Draw the hits by layer
plt.figure(figsize=(9,4))
for lay, lay_hits in hits.groupby('layer_id'):
    plt.scatter(lay_hits.z, lay_hits.r, s=5, label=lay)
plt.xlabel('z [mm]')
plt.ylabel('r [mm]')
plt.legend(loc=0, title='layer')
plt.tight_layout()

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

In [9]:
# Visualize modules on the first layer
lay0_hits = hits.groupby(['volume_id', 'layer_id']).get_group((8, 2))
plt.figure()
# Draw each module separately
for mod, mod_hits in lay0_hits.groupby('module_id'):
    plt.scatter(mod_hits.z, mod_hits.phi, s=100, marker='.')
plt.tight_layout()

<IPython.core.display.Javascript object>

In [10]:
# Draw X-Y view of the barrel volumes
barrel_vols = [8, 13, 17]
vol_groups = hits.groupby('volume_id')

plt.figure(figsize=(8,8))
for vol in barrel_vols:
    vol_hits = vol_groups.get_group(vol)
    plt.scatter(vol_hits.x, vol_hits.y, s=5)
plt.xlabel('x [mm]')
plt.ylabel('y [mm]')
plt.tight_layout()

<IPython.core.display.Javascript object>

In [11]:
# Draw X-Y view of the endcaps
endcap_vols = [7, 9, 12, 14, 16, 18]
plt.figure(figsize=(8,8))
for vol in endcap_vols:
    vol_hits = vol_groups.get_group(vol)
    plt.scatter(vol_hits.x, vol_hits.y, s=5)
plt.xlabel('x [mm]')
plt.ylabel('y [mm]')
plt.tight_layout()

<IPython.core.display.Javascript object>

## Particle distributions

In [14]:
# Calculate derived variables
particles['pt'] = np.sqrt(particles.px**2 + particles.py**2)

In [12]:
particles.head()

Unnamed: 0,particle_id,vx,vy,vz,px,py,pz,q
0,0,-0.004456,0.00465,3.19061,614.398987,-575.362,30067.800781,1
1,137438953472,-0.004456,0.00465,3.19061,-188.929001,-5.88943,-4348.370117,-1
2,206158430208,-0.004456,0.00465,3.19061,32.6366,441.131989,-7357.939941,1
3,274877906944,-0.004456,0.00465,3.19061,62.283699,-224.740005,-1324.439941,-1
4,343597383680,-0.004456,0.00465,3.19061,481.276001,785.393982,-110532.0,1


In [19]:
plt.figure()
plt.hist(particles.pt*1e-3, log=True)
plt.xlabel('$p_T$ [GeV]')

<IPython.core.display.Javascript object>

<matplotlib.text.Text at 0x2b9398ff8410>

## Visualize some tracks

In [37]:
hits = hits.merge(truth, on='hit_id')

In [38]:
hits.head()

Unnamed: 0,hit_id,volume_id,layer_id,module_id,x,y,z,ex,ey,ez,phi,theta,ephi,etheta,ncells,r,particle_id
0,1,7,2,1,-65.425499,-2.06459,-1502.5,-1.0,-1.0,-1.0,-3.110047,3.09805,-1.0,-1.0,2,65.458069,137438953472
1,2,7,2,1,-55.7528,-1.95534,-1502.5,-1.0,-1.0,-1.0,-3.106535,3.10448,-1.0,-1.0,1,55.787079,1168231104512
2,3,7,2,1,-53.653801,0.767741,-1502.5,-1.0,-1.0,-1.0,3.127285,3.10589,-1.0,-1.0,1,53.659294,40536932131799040
3,4,7,2,1,-93.733101,-7.25157,-1502.5,-1.0,-1.0,-1.0,-3.064382,3.0791,-1.0,-1.0,2,94.013191,40543323043135488
4,5,7,2,1,-37.351002,2.65265,-1502.5,-1.0,-1.0,-1.0,3.070692,3.11668,-1.0,-1.0,2,37.445076,54047249977573376


In [55]:
hits_groups = hits.groupby('particle_id')

# Choose a random sampling of particles
n = 4
pids = np.random.choice(truth.particle_id, size=n)
for pid in pids:
    par_hits = hits_groups.get_group(pid)
    plt.figure(figsize=(9,3))
    plt.subplot(131)
    draw_scatter_2d(par_hits.x, par_hits.y, s=100, xlabel='x [mm]', ylabel='y [mm]')
    plt.subplot(132)
    draw_scatter_2d(par_hits.phi, par_hits.r, s=100, xlabel='$\phi$', ylabel='r [mm]')
    plt.subplot(133)
    draw_scatter_2d(par_hits.z, par_hits.r, s=100, xlabel='z [mm]', ylabel='r [mm]')
    plt.tight_layout()

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>