In [1]:
%matplotlib inline

import pandas as pd
import numpy as np
from trackml.dataset import load_event
import seaborn as sns
import matplotlib.pyplot as plt

from scripts.tracks import get_tracks
from scripts.sh import train, evaluate

sns.set_style('whitegrid')
pd.set_option('display.max_columns', None)

In [2]:
path2data = '/home/alepfu/Desktop/dataset/train_100_events/'
event_id = 'event000001000'
hits, cells, particles, truth = load_event(path2data + event_id)

full_merge = pd.merge(pd.merge(truth, pd.DataFrame(hits), on=['hit_id']), pd.DataFrame(particles), on=['particle_id'])
hits = full_merge.copy()

In [3]:
N_tracks = 0
hits = get_tracks(hits, N_tracks=N_tracks, include_zero_weights=False, include_zero_ID=False)

In [52]:
single_track = hits.loc[hits['particle_id'] == 238697583478833152]
single_track = single_track[['hit_id','x','y','z','volume_id','layer_id']]
single_track.sort_values('z', inplace=True)
single_track

Unnamed: 0,hit_id,x,y,z,volume_id,layer_id
55741,22300,26.501801,-17.426399,10.9711,8,2
55742,30035,61.852699,-37.716499,18.223499,8,4
55743,36935,100.469002,-56.576599,25.776501,8,6
55744,43293,153.919006,-77.621498,35.6702,8,8
55746,75296,238.207993,-99.942802,50.799999,13,2
55745,74685,241.330994,-100.509003,51.0,13,2
55747,82193,339.393005,-110.342003,67.900002,13,4
55748,88713,490.631989,-93.694603,94.0,13,6
55749,94648,657.117981,-26.131701,125.199997,13,8
55750,110874,814.51001,109.238998,168.199997,17,2


### Train the hashing model and evaluate all possible hits

In [89]:
from scipy.spatial.distance import cdist

first_hit = single_track.iloc[0]
next_hit = single_track.iloc[1]
volume_id = first_hit['volume_id']
layer_id = first_hit['layer_id']

possible_hits = hits.loc[(hits['volume_id'] == volume_id) & (hits['layer_id'] == layer_id + 2)]

n_bit = 128
model = train(hits[['x','y','z']].values, n_bit)

codes_base = evaluate(possible_hits[['x','y','z']].values, model, compact=False)
codes_query = evaluate(np.array([first_hit[['x','y','z']].values]), model, compact=False)
codes_next_hit = evaluate(np.array([next_hit[['x','y','z']].values]), model, compact=False)

distances = cdist(codes_base, codes_query, 'hamming')
distances = distances.ravel()

print('dist to next hit:', cdist(codes_next_hit, codes_query, 'hamming'))
print('found hits:', possible_hits.iloc[distances < 0.2].shape[0])



dist to next hit: [[0.1015625]]
found hits: 373
