## Data Loading

- Try different ways to load data.

In [None]:
import sys, os, glob, yaml

In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

%matplotlib inline

In [None]:
import pprint
import seaborn as sns
import trackml.dataset

In [None]:
import torch
from torch_geometric.data import Data
import itertools

In [None]:
# append parent dir
sys.path.append("..")

In [None]:
# local imports
from src import Compose_Event, Draw_Compose_Event

## _Input Data_

In [None]:
# mu- data (old)
# input_dir = './data_sets/pandaml/data_3.0_7.0_GeV/'

# mu+mu- data (current)
input_dir = "../data_all"

# pbarp data (coming)
# input_dir = os.environ['HOME']+'/current/2_deepana/pandaml/data/'

### _(1) - Fetch all Files at Once_

In [None]:
# Find All Input Data Files (hits.csv, cells.csv, particles.csv, truth.csv)
all_files = os.listdir(input_dir)
all_files[:10]

In [None]:
# Extract File Prefixes (use e.g. xxx-hits.csv)
suffix = "-hits.csv"
file_prefixes = sorted(
    os.path.join(input_dir, f.replace(suffix, ""))
    for f in all_files
    if f.endswith(suffix)
)
file_prefixes[:10]

In [None]:
# number of events
len(file_prefixes)

In [None]:
# OR, Extract File Prefixes (only works if we don't have any additional files e.g. *.root, *.log etc.)
all_events = sorted(
    np.unique([os.path.join(input_dir, event[:15]) for event in all_files])
)
all_events[:10]

In [None]:
# number of events (in addition to *.csv, *.root and *.log files exists in this dir.)
len(all_events)

In [None]:
event_id = 1

In [None]:
# OR, use event_id to fectch one file from list of all files
event_prefix = file_prefixes[event_id]
print(event_prefix)

In [None]:
# load an event
hits, tubes, particles, truth = trackml.dataset.load_event(event_prefix)

# memory usage
mem_bytes = (
    hits.memory_usage(index=True).sum()
    + tubes.memory_usage(index=True).sum()
    + particles.memory_usage(index=True).sum()
    + truth.memory_usage(index=True).sum()
)

print(
    "{} memory usage {:.2f} MB".format(
        os.path.basename(event_prefix), mem_bytes / 2**20
    )
)

In [None]:
hits.head()

In [None]:
tubes.head()

In [None]:
particles.head()

In [None]:
truth.head()

### _(2) - Fetch a Single Event_

In [None]:
event_id = 1

In [None]:
# Fetch an event, use event_id (int)
prefix = "event{:010d}".format(event_id)  # OR,
prefix = str("event{!s}".format(format(event_id, "010d")))  # a little better
event_prefix = os.path.join(
    input_dir, prefix
)  # event_prefix ~ event_file = input_dir + prefix

In [None]:
# load an event
hits, tubes, particles, truth = trackml.dataset.load_event(event_prefix)

# memory usage
mem_bytes = (
    hits.memory_usage(index=True).sum()
    + tubes.memory_usage(index=True).sum()
    + particles.memory_usage(index=True).sum()
    + truth.memory_usage(index=True).sum()
)

print(
    "{} memory usage {:.2f} MB".format(
        os.path.basename(event_prefix), mem_bytes / 2**20
    )
)

In [None]:
hits.head()

In [None]:
tubes.head()

In [None]:
particles.head()

In [None]:
truth.head()

## _Detector Layout_

In [None]:
event = Compose_Event(event_prefix, skewed=False)

In [None]:
event.head()

In [None]:
Draw_Compose_Event(event, figsize=(10, 10), save_fig=False);