In [1]:
import os
store_dir = '/global/cfs/projectdirs/m3443/usr/caditi97/iml2020/misaligned/new_mis/'
og_evts = '/global/cfs/projectdirs/m3443/data/trackml-kaggle/train_10evts/'
os.environ['TRKXINPUTDIR']=f"{store_dir}shift_x/"
os.environ['TRKXOUTPUTDIR']= f"{store_dir}shift_x_pre/"

In [2]:
import pkg_resources
import yaml
import pprint
import random
import numpy as np
import pandas as pd
import itertools
import matplotlib.pyplot as plt
import tqdm
from os import listdir
from os.path import isfile, join
import matplotlib.cm as cm
import sys
import csv
import time
import pickle
# %matplotlib widget

sys.path.append('/global/homes/c/caditi97/exatrkx-iml2020/exatrkx/src/')

# 3rd party
import torch
import torch.nn.functional as F
from torch_geometric.data import Data
from trackml.dataset import load_event
from pytorch_lightning import Trainer
from pytorch_lightning.callbacks import ModelCheckpoint


# local import
from exatrkx import config_dict # for accessing predefined configuration files
from exatrkx import outdir_dict # for accessing predefined output directories
from exatrkx.src import utils_dir
from exatrkx.src import utils_robust
from utils_robust import *


# for preprocessing
from exatrkx import FeatureStore
from exatrkx.src import utils_torch

# for embedding
from exatrkx import LayerlessEmbedding
from exatrkx.src import utils_torch
from torch_cluster import radius_graph
from utils_torch import build_edges
from embedding.embedding_base import *

# for filtering
from exatrkx import VanillaFilter

# for GNN
import tensorflow as tf
from graph_nets import utils_tf
from exatrkx import SegmentClassifier
import sonnet as snt

# for labeling
from exatrkx.scripts.tracks_from_gnn import prepare as prepare_labeling
from exatrkx.scripts.tracks_from_gnn import clustering as dbscan_clustering

# track efficiency
from trackml.score import _analyze_tracks
from exatrkx.scripts.eval_reco_trkx import make_cmp_plot, pt_configs, eta_configs
from functools import partial

In [3]:
event_path = '/global/cfs/cdirs/m3443/data/trackml-kaggle/train_all/'
eventid = 'event000001000'

In [4]:
misl = [0.02,0.1,0.4,0.6,0.8,1]
evts = np.arange(1000,1010,1)

In [5]:
store_dir = '/global/cfs/projectdirs/m3443/usr/caditi97/iml2020/misaligned/new_mis/'
og_evts = '/global/cfs/projectdirs/m3443/data/trackml-kaggle/train_10evts/'

In [6]:
h,c,p,t = trackml.dataset.load_event(f"{og_evts}{eventid}")
h

Unnamed: 0,hit_id,x,y,z,volume_id,layer_id,module_id
0,1,-64.409897,-7.163700,-1502.5,7,2,1
1,2,-55.336102,0.635342,-1502.5,7,2,1
2,3,-83.830498,-1.143010,-1502.5,7,2,1
3,4,-96.109100,-8.241030,-1502.5,7,2,1
4,5,-62.673599,-9.371200,-1502.5,7,2,1
...,...,...,...,...,...,...,...
120934,120935,-763.862976,51.569401,2944.5,18,12,97
120935,120936,-808.705017,3.459260,2944.5,18,12,97
120936,120937,-982.935974,41.460899,2952.5,18,12,98
120937,120938,-942.698975,18.489100,2952.5,18,12,98


In [7]:
def save_pickle(data, store_dir, store_path, eventid):
    with open('pathtofile','wb') as csvFile: 
        writer = csv.writer(csvFile)
        writer.writerows(csvstff)

In [8]:
def preprocess():
    action = 'build'
    config_file = pkg_resources.resource_filename("exatrkx",os.path.join('configs', config_dict[action]))
    with open(config_file) as f:
        b_config = yaml.load(f, Loader=yaml.FullLoader)
    pp = pprint.PrettyPrinter(indent=4)
    b_config['pt_min'] = 0
    b_config['endcaps'] = True
    b_config['n_workers'] = 2
    b_config['n_files'] = 10
    b_config['noise'] = 0
    pp.pprint(b_config)
    preprocess_dm = FeatureStore(b_config)
    preprocess_dm.prepare_data()

# Remove Volume 8 Layer 6

In [9]:
def remove_v8l6(hits,cells,particles,truth,store_dir,eventid):
    
    nhits = hits[(hits['volume_id']==8) & (hits['layer_id']==6)]
    fhits = hits[~hits['hit_id'].isin(nhits['hit_id'])]
    fcells = cells[~cells['hit_id'].isin(fhits['hit_id'])]
    ftruth = truth[~truth['hit_id'].isin(fhits['hit_id'])]
    fparticles = particles[particles['particle_id'].isin(ftruth['particle_id'])]
    
    name = f"{store_dir}remove_v8l6/event00000{eventid}-"
    
    fhits.to_csv(name+"hits.csv", index=False)
    fcells.to_csv(name+"cells.csv", index=False)
    ftruth.to_csv(name+"truth.csv", index=False)
    fparticles.to_csv(name+"particles.csv", index=False)

In [10]:
# select 10 events, remove v8l6 
for evtid in evts:
    hits, cells, particles, truth = trackml.dataset.load_event(f"{og_evts}event00000{str(evtid)}")
    remove_v8l6(hits,cells,particles,truth,store_dir,evtid)

In [11]:
v,_,_,_ = trackml.dataset.load_event("/global/cfs/projectdirs/m3443/usr/caditi97/iml2020/misaligned/new_mis/remove_v8l6/event000001000")

In [12]:
# # check inference
# import os
# preprocess()
v[(v['volume_id']==8) & (v['layer_id']==6)]

Unnamed: 0,hit_id,x,y,z,volume_id,layer_id,module_id


# Shift only x coordinate of every hit

In [13]:
def shift_x(hits,cells,particles,truth,store_dir,eventid,mis):
    
    # 1000 micron = 1 milimeter
    # one direction
    hits.loc[:, 'x'] = hits[hits['volume_id']==8]['x']+mis
#     hits.loc[hits['x']<0, 'x'] = hits['x']-mis
#     hits.loc[hits['x']>0, 'x'] = hits['x']+mis
    
    name = f"{store_dir}shift_x/{mis}/event00000{eventid}-"
    
    hits.to_csv(name+"hits.csv", index=False)
    cells.to_csv(name+"cells.csv", index=False)
    truth.to_csv(name+"truth.csv", index=False)
    particles.to_csv(name+"particles.csv", index=False)

In [14]:
for mis in misl:
    for evtid in evts:
        hits, cells, particles, truth = trackml.dataset.load_event(f"{og_evts}event00000{str(evtid)}")
        shift_x(hits,cells,particles,truth,store_dir,evtid,mis)

In [15]:
h,c,p,t = trackml.dataset.load_event("/global/cfs/projectdirs/m3443/usr/caditi97/iml2020/misaligned/new_mis/shift_x/1/event000001000")

In [16]:
h[h['volume_id'] == 8]

Unnamed: 0,hit_id,x,y,z,volume_id,layer_id,module_id
16873,16874,-31.554401,-3.648710,-469.864990,8,2,1
16874,16875,-32.153702,-1.934740,-423.516998,8,2,1
16875,16876,-25.362400,-18.423700,-461.375000,8,2,2
16876,16877,-11.034100,-29.649799,-464.428009,8,2,3
16877,16878,-14.127200,-28.159300,-439.713989,8,2,3
...,...,...,...,...,...,...,...
46040,46041,-169.602997,26.208099,440.653015,8,8,1091
46041,46042,-169.606995,26.379601,420.931000,8,8,1091
46042,46043,-170.904999,9.853000,438.321991,8,8,1092
46043,46044,-171.016006,10.968900,472.256012,8,8,1092


# Shift only x coordinate of Volume 8 all layers

In [17]:
def shift_x_v8(hits,cells,particles,truth,store_dir,eventid,mis):
    
    # 1000 micron = 1 milimeter
    #one direction
    hits.loc[(hits['volume_id']==8), 'x'] = hits[hits['volume_id']==8]['x']+mis
#     hits.loc[(hits['volume_id']==8) & (hits['x']<0), 'x'] = hits[hits['volume_id']==8]['x']-mis
#     hits.loc[(hits['volume_id']==8) & (hits['x']>0), 'x'] = hits[hits['volume_id']==8]['x']+mis
    
    name = f"{store_dir}shift_x_v8/{mis}/event00000{eventid}-"
    
    hits.to_csv(name+"hits.csv", index=False)
    cells.to_csv(name+"cells.csv", index=False)
    truth.to_csv(name+"truth.csv", index=False)
    particles.to_csv(name+"particles.csv", index=False)

In [19]:
for mis in misl:
    for evtid in evts:
        hits, cells, particles, truth = trackml.dataset.load_event(f"{og_evts}event00000{str(evtid)}")
        shift_x_v8(hits,cells,particles,truth,store_dir,evtid,mis)

In [30]:
h,c,p,t = trackml.dataset.load_event("/global/cfs/projectdirs/m3443/usr/caditi97/iml2020/misaligned/new_mis/shift_x_v8/1/event000001000")

In [31]:
h

Unnamed: 0,hit_id,x,y,z,volume_id,layer_id,module_id
0,1,-64.409897,-7.163700,-1502.5,7,2,1
1,2,-55.336102,0.635342,-1502.5,7,2,1
2,3,-83.830498,-1.143010,-1502.5,7,2,1
3,4,-96.109100,-8.241030,-1502.5,7,2,1
4,5,-62.673599,-9.371200,-1502.5,7,2,1
...,...,...,...,...,...,...,...
120934,120935,-763.862976,51.569401,2944.5,18,12,97
120935,120936,-808.705017,3.459260,2944.5,18,12,97
120936,120937,-982.935974,41.460899,2952.5,18,12,98
120937,120938,-942.698975,18.489100,2952.5,18,12,98


# Shift x and y coordinates of all layers in volume 8

In [22]:
def shift_xy_v8(hits,cells,particles,truth,store_dir,eventid,mis):
    
    # 1000 micron = 1 milimeter
    # two directions
#     hits.loc[(hits['volume_id']==8) & (hits['x']<0), 'x'] = hits[hits['volume_id']==8]['x']-mis
#     hits.loc[(hits['volume_id']==8) & (hits['x']>0), 'x'] = hits[hits['volume_id']==8]['x']+mis
#     hits.loc[(hits['volume_id']==8) & (hits['y']<0), 'x'] = hits[hits['volume_id']==8]['y']-mis
#     hits.loc[(hits['volume_id']==8) & (hits['y']>0), 'x'] = hits[hits['volume_id']==8]['y']+mis
    # one direction
    hits.loc[(hits['volume_id']==8), 'x'] = hits[hits['volume_id']==8]['x']+mis
    hits.loc[(hits['volume_id']==8), 'y'] = hits[hits['volume_id']==8]['y']+mis
    
    name = f"{store_dir}shift_xy_v8/{mis}/event00000{eventid}-"
    
    hits.to_csv(name+"hits.csv", index=False)
    cells.to_csv(name+"cells.csv", index=False)
    truth.to_csv(name+"truth.csv", index=False)
    particles.to_csv(name+"particles.csv", index=False)

In [25]:
for mis in misl:
    for evtid in evts:
        hits, cells, particles, truth = trackml.dataset.load_event(f"{og_evts}event00000{str(evtid)}")
        shift_xy_v8(hits,cells,particles,truth,store_dir,evtid,mis)

In [28]:
h,c,p,t = trackml.dataset.load_event("/global/cfs/projectdirs/m3443/usr/caditi97/iml2020/misaligned/new_mis/shift_xy_v8/1/event000001000")

In [29]:
h 

Unnamed: 0,hit_id,x,y,z,volume_id,layer_id,module_id
0,1,-64.409897,-7.163700,-1502.5,7,2,1
1,2,-55.336102,0.635342,-1502.5,7,2,1
2,3,-83.830498,-1.143010,-1502.5,7,2,1
3,4,-96.109100,-8.241030,-1502.5,7,2,1
4,5,-62.673599,-9.371200,-1502.5,7,2,1
...,...,...,...,...,...,...,...
120934,120935,-763.862976,51.569401,2944.5,18,12,97
120935,120936,-808.705017,3.459260,2944.5,18,12,97
120936,120937,-982.935974,41.460899,2952.5,18,12,98
120937,120938,-942.698975,18.489100,2952.5,18,12,98
