In [3]:
import os
import sys 
sys.path.append('../')

import torch
from trackml.dataset import load_event
import numpy as np
import pandas as pd
from torch_geometric.data import Dataset
from matplotlib import pyplot as plt
import mplhep as hep
hep.style.use("CMS")
from IPython.core.display import display, HTML
display(HTML("<style>.container { width:100% !important; }</style>"))

  from IPython.core.display import display, HTML


In [4]:
evtid = '21512'
prefix = f'/tigress/jdezoort/codalab/train_1/event0000{evtid}'
hits, particles, truth = load_event(
        prefix, parts=['hits', 'particles', 'truth'])

In [5]:
import os
from os.path import join
import torch
from torch_geometric.data import Dataset, Data

def calc_eta(r, z):
    theta = np.arctan2(r, z)
    eta = -1.0 * np.log(np.tan(theta / 2.0))

def append_features(hits, particles, truth):
    particles['pt'] = np.sqrt(particles.px**2 + 
                              particles.py**2)
    particles['eta_pt'] = calc_eta(particles.pt, 
                                   particles.pz)
    truth = (truth[['hit_id', 'particle_id']]
             .merge(particles[['particle_id', 'pt', 'eta_pt', 'q', 'vx', 'vy']], 
                    on='particle_id'))
    hits['r'] = np.sqrt(hits.x**2 + hits.y**2)
    hits['phi'] = np.arctan2(hits.y, hits.x)
    hits['eta_rz'] = calc_eta(hits.r, hits.z)
    hits['u'] = hits['x']/(hits['x']**2 + hits['y']**2)
    hits['v'] = hits['y']/(hits['x']**2 + hits['y']**2)
    hits = (hits[['hit_id', 'r', 'phi', 'eta_rz', 
                  'x', 'y', 'z', 'u', 'v', 'volume_id']]
            .merge(truth[['hit_id', 'particle_id', 'pt', 'eta_pt']], 
                          on='hit_id'))
    data = Data(x=hits[['x', 'y', 'z', 'r', 'phi', 'eta_rz', 'u', 'v']].values,
                particle_id=hits['particle_id'].values, 
                pt=hits['pt'].values)
    return data

class TrackClouds(Dataset):
    def __init__(self, root: str, processed_file_dir: str,
                 n_sectors: int, pre_transform=None):
        self.root = root
        self.processed_file_dir = processed_file_dir
        self.raw_file_path = root
        self.processed_file_path = processed_file_dir
        self.n_sectors = n_sectors
        self.idx_dict = {}
        counter = 0
        for i in range(1000):
            for j in range(self.n_sectors):
                self.idx_dict[counter] = (i, j)
                
        suffix = '-hits.csv.gz'
        self.prefixes, self.exists = [], {}
        for p in os.listdir(self.raw_file_path):
            if str(p).endswith(suffix):
                prefix = str(p).replace(suffix, '')
                evtid = int(prefix[-9:])
                if f'data{evtid}_s0.pt' in self.processed_file_names:
                    self.exists[evtid] = True
                else: self.exists[evtid] = False
                self.prefixes.append(prefix)
        self.dataset = []
       
        super(TrackClouds, self).__init__(processed_file_dir,
                                          pre_transform=pre_transform)
        
    @property
    def raw_file_names(self):
        return os.listdir(self.raw_file_path)

    @property
    def processed_file_names(self):
        return os.listdir(self.processed_file_path)
        
    def len(self) -> int:
        return len(self.dataset)

    def get(self, idx: int) -> Data:
        evtid, s = self.idx_dict[idx]
        name = f'data{evtid}_s{s}.pt'
        return torch.load(join(self.processed_dir, name))
        
    def process(self):
        idx = 0
        for i, f in enumerate(self.prefixes):
            s = 0
            evtid = int(f[-9:])
            name=f'data{evtid}_s{s}.pt'
            if self.exists[evtid]: 
                print(join(self.processed_dir, name))
                data = torch.load(join(self.processed_dir, name))
                self.dataset.append(data)
                continue
            print('Processing', evtid)
            hits, particles, truth = load_event(
                f, parts=['hits', 'particles', 'truth']
            )
            data = self.pre_transform(hits, particles, truth)
            torch.save(data, join(self.processed_dir, name))
            self.dataset.append(data)
            idx += 1


In [14]:
import os
from os.path import join
import torch
from torch_geometric.data import Data

class PointClouds():
    def __init__(self, outdir: str, indir: str,
                 n_sectors: int, redo=False):
        self.outdir = outdir
        self.indir = indir
        self.n_sectors = n_sectors
        self.redo = redo
        self.idx_dict = {}
        counter = 0
        for i in range(1000):
            for j in range(self.n_sectors):
                self.idx_dict[counter] = (i, j)
                
        suffix = '-hits.csv.gz'
        self.prefixes, self.exists = [], {}
        for p in os.listdir(self.indir):
            if str(p).endswith(suffix):
                prefix = str(p).replace(suffix, '')
                evtid = int(prefix[-9:])
                if f'data{evtid}_s0.pt' in os.listdir(outdir):
                    self.exists[evtid] = True
                else: self.exists[evtid] = False
                self.prefixes.append(join(indir, prefix))
                
        self.data_list = []
        self.process()
    
    def calc_eta(self, r, z):
        theta = np.arctan2(r, z)
        return -1.0 * np.log(np.tan(theta / 2.0))
                
    def append_features(self, hits, particles, truth):
        particles['pt'] = np.sqrt(particles.px**2 + 
                                  particles.py**2)
        particles['eta_pt'] = self.calc_eta(particles.pt, 
                                            particles.pz)
        truth = (truth[['hit_id', 'particle_id']]
                 .merge(particles[['particle_id', 'pt', 'eta_pt', 'q', 'vx', 'vy']], 
                        on='particle_id'))
        hits['r'] = np.sqrt(hits.x**2 + hits.y**2)
        hits['phi'] = np.arctan2(hits.y, hits.x)
        hits['eta_rz'] = self.calc_eta(hits.r, hits.z)
        hits['u'] = hits['x']/(hits['x']**2 + hits['y']**2)
        hits['v'] = hits['y']/(hits['x']**2 + hits['y']**2)
        hits = (hits[['hit_id', 'r', 'phi', 'eta_rz', 
                      'x', 'y', 'z', 'u', 'v', 'volume_id']]
                .merge(truth[['hit_id', 'particle_id', 'pt', 'eta_pt']], 
                              on='hit_id'))
        data = Data(x=hits[['x', 'y', 'z', 'r', 'phi', 'eta_rz', 'u', 'v']].values,
                    particle_id=hits['particle_id'].values, 
                    pt=hits['pt'].values)
        return data

    def process(self):
        for i, f in enumerate(self.prefixes):
            print(f)
            s = 0
            evtid = int(f[-9:])
            name=f'data{evtid}_s{s}.pt'
            if self.exists[evtid] and not self.redo:
                data = torch.load(join(self.outdir, name))
                self.data_list.append(data)
            else: 
                hits, particles, truth = load_event(
                    f, parts=['hits', 'particles', 'truth']
                )
                data = self.append_features(hits, particles, truth)
                torch.save(data, join(self.outdir, name))
                self.data_list.append(data)

In [27]:
from torch_geometric.loader import DataListLoader
tc = PointClouds(indir='/tigress/jdezoort/codalab/train_1', outdir='../point_clouds/',
                 n_sectors=1)
for i in tc:
    print(i)

/tigress/jdezoort/codalab/train_1/event000021289
/tigress/jdezoort/codalab/train_1/event000021054
/tigress/jdezoort/codalab/train_1/event000021371
/tigress/jdezoort/codalab/train_1/event000021460
/tigress/jdezoort/codalab/train_1/event000021612
/tigress/jdezoort/codalab/train_1/event000021949
/tigress/jdezoort/codalab/train_1/event000021853
/tigress/jdezoort/codalab/train_1/event000021164
/tigress/jdezoort/codalab/train_1/event000021475
/tigress/jdezoort/codalab/train_1/event000021135
/tigress/jdezoort/codalab/train_1/event000021845
/tigress/jdezoort/codalab/train_1/event000021583
/tigress/jdezoort/codalab/train_1/event000021297
/tigress/jdezoort/codalab/train_1/event000021795
/tigress/jdezoort/codalab/train_1/event000021139
/tigress/jdezoort/codalab/train_1/event000021700
/tigress/jdezoort/codalab/train_1/event000021457
/tigress/jdezoort/codalab/train_1/event000021886
/tigress/jdezoort/codalab/train_1/event000021553
/tigress/jdezoort/codalab/train_1/event000021609
/tigress/jdezoort/co

/tigress/jdezoort/codalab/train_1/event000021100
/tigress/jdezoort/codalab/train_1/event000021608
/tigress/jdezoort/codalab/train_1/event000021392
/tigress/jdezoort/codalab/train_1/event000021450
/tigress/jdezoort/codalab/train_1/event000021755
/tigress/jdezoort/codalab/train_1/event000021876
/tigress/jdezoort/codalab/train_1/event000021030
/tigress/jdezoort/codalab/train_1/event000021310
/tigress/jdezoort/codalab/train_1/event000021266
/tigress/jdezoort/codalab/train_1/event000021489
/tigress/jdezoort/codalab/train_1/event000021512
/tigress/jdezoort/codalab/train_1/event000021011
/tigress/jdezoort/codalab/train_1/event000021503
/tigress/jdezoort/codalab/train_1/event000021774
/tigress/jdezoort/codalab/train_1/event000021456
/tigress/jdezoort/codalab/train_1/event000021828
/tigress/jdezoort/codalab/train_1/event000021017
/tigress/jdezoort/codalab/train_1/event000021021
/tigress/jdezoort/codalab/train_1/event000021480
/tigress/jdezoort/codalab/train_1/event000021336
/tigress/jdezoort/co

/tigress/jdezoort/codalab/train_1/event000021729
/tigress/jdezoort/codalab/train_1/event000021454
/tigress/jdezoort/codalab/train_1/event000021546
/tigress/jdezoort/codalab/train_1/event000021479
/tigress/jdezoort/codalab/train_1/event000021945
/tigress/jdezoort/codalab/train_1/event000021905
/tigress/jdezoort/codalab/train_1/event000021725
/tigress/jdezoort/codalab/train_1/event000021094
/tigress/jdezoort/codalab/train_1/event000021029
/tigress/jdezoort/codalab/train_1/event000021726
/tigress/jdezoort/codalab/train_1/event000021198
/tigress/jdezoort/codalab/train_1/event000021631
/tigress/jdezoort/codalab/train_1/event000021772
/tigress/jdezoort/codalab/train_1/event000021153
/tigress/jdezoort/codalab/train_1/event000021357
/tigress/jdezoort/codalab/train_1/event000021167
/tigress/jdezoort/codalab/train_1/event000021280
/tigress/jdezoort/codalab/train_1/event000021168
/tigress/jdezoort/codalab/train_1/event000021275
/tigress/jdezoort/codalab/train_1/event000021995
/tigress/jdezoort/co

/tigress/jdezoort/codalab/train_1/event000021388
/tigress/jdezoort/codalab/train_1/event000021887
/tigress/jdezoort/codalab/train_1/event000021036
/tigress/jdezoort/codalab/train_1/event000021561
/tigress/jdezoort/codalab/train_1/event000021731
/tigress/jdezoort/codalab/train_1/event000021998
/tigress/jdezoort/codalab/train_1/event000021907
/tigress/jdezoort/codalab/train_1/event000021797
/tigress/jdezoort/codalab/train_1/event000021544
/tigress/jdezoort/codalab/train_1/event000021738
/tigress/jdezoort/codalab/train_1/event000021334
/tigress/jdezoort/codalab/train_1/event000021138
/tigress/jdezoort/codalab/train_1/event000021452
/tigress/jdezoort/codalab/train_1/event000021083
/tigress/jdezoort/codalab/train_1/event000021933
/tigress/jdezoort/codalab/train_1/event000021767
/tigress/jdezoort/codalab/train_1/event000021584
/tigress/jdezoort/codalab/train_1/event000021125
/tigress/jdezoort/codalab/train_1/event000021709
/tigress/jdezoort/codalab/train_1/event000021810
/tigress/jdezoort/co

/tigress/jdezoort/codalab/train_1/event000021362
/tigress/jdezoort/codalab/train_1/event000021210
/tigress/jdezoort/codalab/train_1/event000021330
/tigress/jdezoort/codalab/train_1/event000021208
/tigress/jdezoort/codalab/train_1/event000021131
/tigress/jdezoort/codalab/train_1/event000021623
/tigress/jdezoort/codalab/train_1/event000021351
/tigress/jdezoort/codalab/train_1/event000021743
/tigress/jdezoort/codalab/train_1/event000021076
/tigress/jdezoort/codalab/train_1/event000021630
/tigress/jdezoort/codalab/train_1/event000021276
/tigress/jdezoort/codalab/train_1/event000021715
/tigress/jdezoort/codalab/train_1/event000021747
/tigress/jdezoort/codalab/train_1/event000021973
/tigress/jdezoort/codalab/train_1/event000021770
/tigress/jdezoort/codalab/train_1/event000021723
/tigress/jdezoort/codalab/train_1/event000021320
/tigress/jdezoort/codalab/train_1/event000021216
/tigress/jdezoort/codalab/train_1/event000021035
/tigress/jdezoort/codalab/train_1/event000021040
/tigress/jdezoort/co

/tigress/jdezoort/codalab/train_1/event000021621
/tigress/jdezoort/codalab/train_1/event000021819
/tigress/jdezoort/codalab/train_1/event000021104
/tigress/jdezoort/codalab/train_1/event000021144
/tigress/jdezoort/codalab/train_1/event000021948
/tigress/jdezoort/codalab/train_1/event000021915
/tigress/jdezoort/codalab/train_1/event000021850
/tigress/jdezoort/codalab/train_1/event000021645
/tigress/jdezoort/codalab/train_1/event000021379
/tigress/jdezoort/codalab/train_1/event000021028
/tigress/jdezoort/codalab/train_1/event000021920
/tigress/jdezoort/codalab/train_1/event000021722
/tigress/jdezoort/codalab/train_1/event000021098
/tigress/jdezoort/codalab/train_1/event000021524
/tigress/jdezoort/codalab/train_1/event000021925
/tigress/jdezoort/codalab/train_1/event000021328
/tigress/jdezoort/codalab/train_1/event000021993
/tigress/jdezoort/codalab/train_1/event000021848
/tigress/jdezoort/codalab/train_1/event000021349
/tigress/jdezoort/codalab/train_1/event000021765
/tigress/jdezoort/co

TypeError: 'PointClouds' object is not iterable

In [28]:
loader = DataListLoader(tc.data_list, batch_size=1)
for l in loader: print(l)

[Data(x=[108424, 8], particle_id=[108424], pt=[108424])]
[Data(x=[93175, 8], particle_id=[93175], pt=[93175])]
[Data(x=[87969, 8], particle_id=[87969], pt=[87969])]
[Data(x=[86389, 8], particle_id=[86389], pt=[86389])]
[Data(x=[93257, 8], particle_id=[93257], pt=[93257])]
[Data(x=[95729, 8], particle_id=[95729], pt=[95729])]
[Data(x=[94415, 8], particle_id=[94415], pt=[94415])]
[Data(x=[100194, 8], particle_id=[100194], pt=[100194])]
[Data(x=[97464, 8], particle_id=[97464], pt=[97464])]
[Data(x=[97113, 8], particle_id=[97113], pt=[97113])]
[Data(x=[98544, 8], particle_id=[98544], pt=[98544])]
[Data(x=[84374, 8], particle_id=[84374], pt=[84374])]
[Data(x=[97816, 8], particle_id=[97816], pt=[97816])]
[Data(x=[98700, 8], particle_id=[98700], pt=[98700])]
[Data(x=[117978, 8], particle_id=[117978], pt=[117978])]
[Data(x=[88167, 8], particle_id=[88167], pt=[88167])]
[Data(x=[101670, 8], particle_id=[101670], pt=[101670])]
[Data(x=[89450, 8], particle_id=[89450], pt=[89450])]
[Data(x=[87752, 

In [None]:
import os
import torch
from torch_geometric.data import InMemoryDataset, download_url

class TrackClouds(InMemoryDataset):
    def __init__(self, root, transform=None, pre_transform=None, pre_filter=None):
        super().__init__(root, transform, pre_transform, pre_filter)
        self.data, self.slices = torch.load(self.processed_paths[0])

    @property
    def raw_file_names(self):
        return os.listdir(self.root)

    @property
    def processed_file_names(self):
        return ['data{e}_s0.pt' 
                for e in np.arange(21000, 22000, 1)]

    def process(self):
        data_list = []
        if self.pre_filter is not None:
            data_list = [data for data in data_list if self.pre_filter(data)]

        if self.pre_transform is not None:
            data_list = [self.pre_transform(data) for data in data_list]

        data, slices = self.collate(data_list)
        torch.save((data, slices), self.processed_paths[0])

In [None]:
        #prefixes = [str(p).replace(suffix, '')
        #            for p in self.raw_file_path.iterdir()
        #            if str(p).endswith(suffix)]