In [None]:
import sys, os, glob, yaml

In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
%matplotlib inline

In [None]:
import pprint
import seaborn as sns
import trackml.dataset

In [None]:
import torch
from torch_geometric.data import Data
import itertools

In [None]:
device = 'cuda' if torch.cuda.is_available() else 'cpu'

In [None]:
os.environ['EXATRKX_DATA'] = os.path.abspath(os.curdir)

### _Dataset_

In [None]:
# mu+mu- data (current)
input_dir = 'train_all'

In [None]:
# Find All Input Data Files (hits.csv, cells.csv, particles.csv, truth.csv)
all_files = os.listdir(input_dir)

# Extract File Prefixes (use e.g. xxx-hits.csv)
suffix = '-hits.csv'
file_prefixes = sorted(os.path.join(input_dir, f.replace(suffix, ''))
                       for f in all_files if f.endswith(suffix))

In [None]:
file_prefixes[:10]

In [None]:
event_id = 1
event_prefix = file_prefixes[event_id]

In [None]:
# load an event
hits, tubes, particles, truth = trackml.dataset.load_event(event_prefix)

# memory usage
mem_bytes = (hits.memory_usage(index=True).sum() 
             + tubes.memory_usage(index=True).sum() 
             + particles.memory_usage(index=True).sum() 
             + truth.memory_usage(index=True).sum())

print('{} memory usage {:.2f} MB'.format(os.path.basename(event_prefix), mem_bytes / 2**20))

In [None]:
hits.head()

In [None]:
tubes.head()

In [None]:
particles.head()

In [None]:
truth.head()

### _Build Event_

In [None]:
from src import Compose_Event, Draw_Compose_Event

In [None]:
event = Compose_Event(event_prefix)

In [None]:
event.shape

In [None]:
Draw_Compose_Event(event, figsize=(10, 10), save_fig=True);

### SttCSVReader

In [None]:
from src import SttCSVReader, Draw_Reader_Event

In [None]:
csv_reader = SttCSVReader(input_dir, True, True)

In [None]:
data = csv_reader(event_id)

In [None]:
data.hits.head()

In [None]:
data.hits.shape

In [None]:
data.hits.layer_id.unique()

In [None]:
data.event_file

In [None]:
data.event.head()

In [None]:
data.particles

## _Detector Layout_

In [None]:
Draw_Reader_Event(data, figsize=(10,10));

## _Plot True & False Edges_

- processed events from the `feature_store/train`
- code from `notebooks/example_gnn.ipynb`

In [None]:
from src.drawing import detector_layout
from src.utils_math import polar_to_cartesian

In [None]:
event_idx = 1

In [None]:
inputdir="run/feature_store/train"
proc_files = sorted(glob.glob(os.path.join(inputdir, "*")))

In [None]:
# event_id
proc_files[event_idx]

In [None]:
feature_data = torch.load(proc_files[event_idx], map_location=device)

In [None]:
# get spatial coordinates, note: x = [x,y,ir]
r, phi, ir = feature_data.x.T

In [None]:
# coord. transform, compensate scaling
x, y = polar_to_cartesian(r, phi)
ir = ir*100

In [None]:
# plot all hits
fig, ax = detector_layout(figsize=(10,10))
ax.scatter(x, y, s=20)

ax.set_title("Azimuthal View of Detector", fontsize=15)
ax.grid(False)
fig.tight_layout()
# fig.savefig('event_{}.png'.format(event_idx))

In [None]:
# get true_edges
e = feature_data.edge_index
pid = feature_data.pid
true_edges = pid[e[0]] == pid[e[1]]

In [None]:
# plot only true edges
fig, ax = detector_layout(figsize=(10,10))

ax.plot(x[e[:, true_edges]], y[e[:, true_edges]], c="b")
ax.scatter(x, y, s=20)

ax.set_title("Azimuthal View of Detector", fontsize=15)
ax.grid(False)
fig.tight_layout()
fig.savefig('true_edges_{}.png'.format(event_idx))

In [None]:
# plot only false edges
fig, ax = detector_layout(figsize=(10,10))

ax.plot(x[e[:, ~true_edges]], y[e[:, ~true_edges]], c="r")
ax.scatter(x, y, s=20)

ax.set_title("Azimuthal View of Detector", fontsize=15)
ax.grid(False)
fig.tight_layout()
fig.savefig('false_edges_{}.png'.format(event_idx))

In [None]:
# plot all edges (true + false), only few false edges
fig, ax = detector_layout(figsize=(10,10))

ax.plot(x[e[:, (~true_edges)][:, 0:-1:5]], y[e[:, (~true_edges)][:, 0:-1:5]], c="r")
ax.scatter(x, y, s=20)

ax.set_title("Azimuthal View of Detector", fontsize=15)
ax.grid(False)
fig.tight_layout()
fig.savefig('true_edges_less_{}.png'.format(event_idx))

## _Inspect HDFStore_

- See [IO tools (text, CSV, HDF5, â€¦)](https://pandas.pydata.org/docs/user_guide/io.html)

#### HDF5 Store

- read/write using `pd.HDFStore("path/to/*.h5")`
- read/write using `pd.read_hdf()`, `pd.to_hdf()` similar to `pd.read_csv()` and `pd.to_csv()`
    *. `pd.read_hdf()`: Read from the store, close it if we opened it.
- close when finished

In [None]:
# path to HDF5 Store
hdf5_path = 'run/trkx_reco_eval/eval_particles.h5'

In [None]:
# Method 1: use context manager to open store
with pd.HDFStore(hdf5_path, 'r') as store:
    print(store)

In [None]:
# Method 2: Use constructor method. Don't forget to close it using store.close()
store = pd.HDFStore(hdf5_path)

In [None]:
store.groups()

In [None]:
# get stored dataframe using group
store.get('data').head()

In [None]:
# OR, get stored dataframe using group
store['data'].head()

In [None]:
store.close()

In [None]:
store.is_open

In [None]:
# Method 3: Retrieve pandas object stored in h5 file
store = pd.read_hdf(hdf5_path)

In [None]:
store.head()