## _Histograms_

- _Hit Distribution_
- _Momentum Distribution_
- _A.O.B._

In [None]:
import sys, os, glob, yaml

In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

%matplotlib inline

In [None]:
import pprint
import seaborn as sns
import trackml.dataset

In [None]:
import torch
from torch_geometric.data import Data
import itertools

In [None]:
# append parent dir
sys.path.append("..")

# local imports
from src import Compose_Event, Draw_Compose_Event

## _Input Data_

In [None]:
# mu- data (old)
# input_dir = './data_sets/pandaml/data_3.0_7.0_GeV/'

# mu+mu- data (current)
input_dir = "../train_quick"

# pbarp data (coming)
# input_dir = os.environ['HOME']+'/current/2_deepana/pandaml/data/'

In [None]:
# Find All Input Data Files (hits.csv, cells.csv, particles.csv, truth.csv)
all_files = os.listdir(input_dir)
all_files[:10]

In [None]:
# Extract File Prefixes (use e.g. xxx-hits.csv)
suffix = "-hits.csv"
file_prefixes = sorted(
    os.path.join(input_dir, f.replace(suffix, ""))
    for f in all_files
    if f.endswith(suffix)
)
file_prefixes[:10]

In [None]:
# number of events
len(file_prefixes)

In [None]:
# OR, Extract File Prefixes (only works if we don't have any additional files e.g. *.root, *.log etc.)
all_events = sorted(
    np.unique([os.path.join(input_dir, event[:15]) for event in all_files])
)
all_events[:10]

In [None]:
# number of events (in addition to *.csv, *.root and *.log files exists in this dir.)
len(all_events)

In [None]:
event_id = 1

In [None]:
# Fetch an event, use event_id (int)
prefix = "event{:010d}".format(event_id)  # OR,
prefix = str("event{!s}".format(format(event_id, "010d")))  # a little better
event_prefix = os.path.join(
    input_dir, prefix
)  # event_prefix ~ event_file = input_dir + prefix

In [None]:
# hits, tubes, particles, truth = trackml.dataset.load_event(event_prefix)

In [None]:
# OR, use event_id to fectch one file from list of all files
event_prefix = file_prefixes[event_id]
print(event_prefix)

In [None]:
# load an event
hits, tubes, particles, truth = trackml.dataset.load_event(event_prefix)

# memory usage
mem_bytes = (
    hits.memory_usage(index=True).sum()
    + tubes.memory_usage(index=True).sum()
    + particles.memory_usage(index=True).sum()
    + truth.memory_usage(index=True).sum()
)

print(
    "{} memory usage {:.2f} MB".format(
        os.path.basename(event_prefix), mem_bytes / 2**20
    )
)

In [None]:
# hits.head()
# tubes.head()
# particles.head()
# truth.head()

### _(1) - Detector Layout_

In [None]:
event = Compose_Event(event_prefix, selection=False, noise=False, skewed=False)
Draw_Compose_Event(event, figsize=(9, 9));

In [None]:
event.particle_id.unique().shape[0]

In [None]:
event = Compose_Event(event_prefix, selection=True, noise=False, skewed=False)
Draw_Compose_Event(event, figsize=(9, 9));

In [None]:
event.particle_id.unique().shape[0]

### _(2) - Draw Individual Tracks_

In [None]:
# preprocess hits
# hits['r'] = hits.apply(lambda row: np.sqrt(row.x**2 + row.y**2), axis=1)
hits_ = hits.assign(r=hits.apply(lambda row: np.sqrt(row.x**2 + row.y**2), axis=1))
hits_.head()

In [None]:
# we already have sorted hits, lets draw a single track.
data = hits_[truth.particle_id == particles.iloc[1, 0]]
data.head()

In [None]:
# Using Object Oriented API
fig, ax = plt.subplots(figsize=(8, 8))

ax.scatter(data.x.values, data.y.values)
ax.plot(data.x.values, data.y.values, "-o")

ax.set_title("Single Track")
ax.set_xlabel("x [cm]")
ax.set_ylabel("y [cm]")
# ax.set_xlim(-40, 40)
# ax.set_ylim(-40, 40)
fig.tight_layout()
# fig.savefig('event.png')

In [None]:
# Using Object Oriented API
plt.close("all")
fig, ax = plt.subplots(figsize=(8, 8))

a, b = np.polyfit(data.x, data.y, 1)
y = a * data.x.values + b

ax.scatter(data.x.values, data.y.values)
ax.plot(data.x.values, y, "r")
ax.set_title("Fitted Line")
ax.set_xlabel("x [cm]")
ax.set_ylabel("y [cm]")
# ax.set_xlim(-40, 40)
# ax.set_ylim(-40, 40)
fig.tight_layout()
# fig.savefig('event.png')

### _(3) - Momentum Distributions_

In [None]:
# calculate & assign pt
particles = particles.assign(pt=np.sqrt(particles.px**2 + particles.py**2))

In [None]:
particles.head()

In [None]:
particles.pt

In [None]:
# Using Pyplot API
plt.close("all")
plt.style.use("seaborn")
fig = plt.figure(figsize=(8, 8))

plt.hist(particles.pt, bins=10)

plt.xlabel("p_t [GeV]")
plt.ylabel("counts")
# plt.xlim((0.1, 1.5))
# plt.ylim((5, 40))
plt.tight_layout()

In [None]:
# find average hits per track
print(
    "Average number of hits per tracks: {}".format(
        hits.hit_id.count() / particles.particle_id.unique().size
    )
)

## _Misc._