In [5]:
import numpy as np
import matplotlib.pyplot as plt
from collections import namedtuple
#internal
import os, sys, glob, yaml, datetime, argparse
import csv
import tensorflow as tf


In [6]:
Graph = namedtuple('Graph', ['X', 'Ri', 'Ro', 'y'])

In [7]:
def sparse_to_graph(X, Ri_rows, Ri_cols, Ro_rows, Ro_cols, y, dtype=np.float32):
    n_nodes, n_edges = X.shape[0], Ri_rows.shape[0]
    Ri = np.zeros((n_nodes, n_edges), dtype=dtype)
    Ro = np.zeros((n_nodes, n_edges), dtype=dtype)
    Ri[Ri_rows, Ri_cols] = 1
    Ro[Ro_rows, Ro_cols] = 1
    return Graph(X, Ri, Ro, y)

In [8]:
def load_graph(filename):
    """Reade a single graph NPZ"""
    with np.load(filename) as f:
        return sparse_to_graph(**dict(f.items()))

In [9]:
class GraphDataset():
    def __init__(self, input_dir, n_samples=None):
        input_dir = os.path.expandvars(input_dir)
        filenames = [os.path.join(input_dir, f) for f in os.listdir(input_dir)
                     if f.startswith('event') and f.endswith('.npz')]
        self.filenames = (
            filenames[:n_samples] if n_samples is not None else filenames)

    def __getitem__(self, index):
        return load_graph(self.filenames[index])

    def __len__(self):
        return len(self.filenames)

In [10]:
def get_dataset(input_dir,n_files):
    return GraphDataset(input_dir, n_files)

In [11]:
def map2angle(arr0):
    # Mapping the cylindrical coordinates to [0,1]
    arr = np.zeros(arr0.shape, dtype=np.float32)
    r_min     = 0.
    r_max     = 1.1
    arr[:,0] = (arr0[:,0]-r_min)/(r_max-r_min)

 Now we will see how these functions above are used piecewise.

In [13]:
# We grab our dataset that can be found in data/train and use 50 files of it (all in config)
train_data = get_dataset('/Users/lucascurtin/Desktop/qtrkx-gnn-tracking/data/train', 50) 

In [26]:
# train_data is a GraphDataset object, and is made by diving into our data file and grabbing the files up to the 50th one
train_data[0]

Graph(X=array([[ 0.03249963, -0.45238492,  0.12183899],
       [ 0.0717338 , -0.44740212,  0.266498  ],
       [ 0.11588637, -0.4448925 ,  0.427269  ],
       ...,
       [ 0.2606789 , -0.98216856, -0.44779998],
       [ 0.3633492 , -0.9724082 , -0.6242    ],
       [ 0.5020463 , -0.9593005 , -0.8642    ]], dtype=float32), Ri=array([[0., 0., 0., ..., 0., 0., 0.],
       [1., 0., 0., ..., 0., 0., 0.],
       [0., 0., 0., ..., 0., 0., 0.],
       ...,
       [0., 0., 0., ..., 0., 0., 0.],
       [0., 0., 0., ..., 0., 0., 0.],
       [0., 0., 0., ..., 0., 0., 0.]], dtype=float32), Ro=array([[1., 1., 1., ..., 0., 0., 0.],
       [0., 0., 0., ..., 0., 0., 0.],
       [0., 0., 0., ..., 0., 0., 0.],
       ...,
       [0., 0., 0., ..., 0., 0., 0.],
       [0., 0., 0., ..., 0., 0., 0.],
       [0., 0., 0., ..., 0., 0., 0.]], dtype=float32), y=array([1., 0., 0., ..., 0., 1., 1.], dtype=float32))

We can see that we've got lots of arrays i.e. X, Ri, Ro and y. We will now dive into seeing how they got these.