In [1]:
import pandas as pd
from trackml.dataset import load_event
from trackml.dataset import load_dataset
from trackml.randomize import shuffle_hits
from trackml.score import score_event 
%run utils.ipynb
import time

In [2]:
df = pd.read_csv('sample_submission.csv')

In [3]:
df.head(10)

Unnamed: 0,event_id,hit_id,track_id
0,0,1,0
1,0,2,0
2,0,3,0
3,0,4,0
4,0,5,0
5,0,6,0
6,0,7,0
7,0,8,0
8,0,9,0
9,0,10,0


In [4]:
%ls data/test_data

event000000000-cells.csv  event000000042-cells.csv  event000000084-cells.csv
event000000000-hits.csv   event000000042-hits.csv   event000000084-hits.csv
event000000001-cells.csv  event000000043-cells.csv  event000000085-cells.csv
event000000001-hits.csv   event000000043-hits.csv   event000000085-hits.csv
event000000002-cells.csv  event000000044-cells.csv  event000000086-cells.csv
event000000002-hits.csv   event000000044-hits.csv   event000000086-hits.csv
event000000003-cells.csv  event000000045-cells.csv  event000000087-cells.csv
event000000003-hits.csv   event000000045-hits.csv   event000000087-hits.csv
event000000004-cells.csv  event000000046-cells.csv  event000000088-cells.csv
event000000004-hits.csv   event000000046-hits.csv   event000000088-hits.csv
event000000005-cells.csv  event000000047-cells.csv  event000000089-cells.csv
event000000005-hits.csv   event000000047-hits.csv   event000000089-hits.csv
event000000006-cells.csv  event000000048-cells.csv  event000000090-cel

In [5]:
itr =  load_dataset('data/test_data', parts=['hits','cells'])
event_id, hits, cells = next(itr)
hits.head()

Unnamed: 0,hit_id,x,y,z,volume_id,layer_id,module_id
0,1,-60.826698,-4.16023,-1502.5,7,2,1
1,2,-84.729401,-7.29528,-1502.5,7,2,1
2,3,-42.592999,5.04875,-1502.5,7,2,1
3,4,-62.497501,2.72992,-1502.5,7,2,1
4,5,-66.203697,-10.6012,-1502.5,7,2,1


In [6]:
def remove_disk_hits(hits_df):
    hits_df['is_cylinder'] = hits_df.apply(lambda x: is_cylinder(x['volume_id'], x['layer_id']), axis = 1)
    
    return hits_df[hits_df.is_cylinder == True], hits_df[hits_df.is_cylinder == False]


"""
inputs

track_candidates: tensor of shape [num_seeds x 10 x 3] 3 for r,phi,z

track_candidates_map: tensor of shape [num_seeds x 10 x 2] 2 for mse, hit_ids

"""
def prune(track_candidates, track_candidates_map, hits_df, event_id):
    start = time.time()
    inner_time_accumulated = 0
    hits_array = hits_df.values
    hits_assigned = np.zeros (len (hits_array), dtype=bool)
    good_tracks = []
    track_mses = np.sum (track_candidates_map[:,:,0], axis = 1)
    sorted_idxs = np.argsort (track_mses)
    sorted_track_mapping = track_candidates_map[sorted_idxs]
    for track in sorted_track_mapping:
        temp_hit_ids = np.round (track[:,1]).astype (int)
        valid_track = not np.sum (hits_assigned[temp_hit_ids-1])
        if (valid_track):
            hits_assigned[temp_hit_ids-1] = True
            good_tracks.append (temp_hit_ids)
            
    super_combined = []
    for track_id, track in enumerate(good_tracks):
        reshaped_track = track.reshape((-1,1))
        trackIds = np.full(reshaped_track.shape,track_id+1)
        combined = np.concatenate((reshaped_track,trackIds),axis=1) #assuming trackID=0 will be for bad tracks
        if (track_id == 0):
            super_combined = combined
        else:
            super_combined = np.concatenate ([super_combined, combined])
    
    hit_ids = np.arange (1,len (hits_assigned) + 1)
    unassigned_hit_ids = hit_ids[np.logical_not (hits_assigned)]
    unassigned_hit_ids = unassigned_hit_ids.reshape ((-1,1))
    trackIds = np.zeros (unassigned_hit_ids.shape)
    combined = np.concatenate ((unassigned_hit_ids, trackIds), axis = 1)
    super_combined = np.concatenate ([super_combined, combined])
    
    event_id_column = np.full ((super_combined.shape[0], 1), event_id)
    super_combined = np.concatenate ([event_id_column, super_combined], axis = 1)
    
    return super_combined.astype (int)

    


In [22]:
hits, _, _ , truth = load_data_single_event(1050)
track_cand = np.load('./experimental/hits_from_tracks_event_1050.npy')
mse_hitids = np.load('./experimental/mse_hitid_event_1050.npy')
results = prune(track_cand, mse_hitids,hits, 1050)
results_df = pd.DataFrame(data=results, columns = ['event_id', 'hit_id', 'track_id'])

In [23]:
score_event(truth,results_df)

0.032376184563013916

In [19]:
for event_data in load_dataset ('./data/test_data', parts = ['hits']):
    event_id = event_data[0]
    print (event_id)
    hits = event_data[1]
    track_cand = np.load('./reconstructed_test_events/hits_from_tracks_event_' + str (event_id) + '.npy')
    mapping = np.load('./reconstructed_test_events/mse_hitid_event_' + str (event_id) + '.npy')
    our_result = prune (track_cand, mapping, hits, event_id)
    our_result_df = pd.DataFrame (data = our_result, columns = ['event_id', 'hit_id', 'track_id'])
    if (event_id == 0):
        our_result_df.to_csv('./submission.csv', index = False)
    else:
        our_result_df.to_csv(open ('./submission.csv', 'a'), index = False, header = False)


0
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124


In [15]:
mapping = np.load('./mse_hitid.npy')
track_cand = np.load('./hits_from_tracks.npy')
hits, _, _,truth = load_data_single_event(1050)
 
start = time.time()
results = prune(track_cand, mapping, hits, 1050)
print('total time: ', time.time()-start)
our_result = prune(track_cand, mapping, hits, 1050)
#np.save ('./submission_sample.npy', our_result)
our_result_df = pd.DataFrame (data = our_result, columns = ['event_id', 'hit_id', 'track_id'])
print (our_result_df.head())
score_event (truth, our_result_df)

total time:  0.16388845443725586
   event_id  hit_id  track_id
0      1050   20237         1
1      1050   27501         1
2      1050   33877         1
3      1050   39687         1
4      1050   69212         1


0.032376184563013916

   event_id  hit_id  track_id
0      1050   20237         1
1      1050   27501         1
2      1050   33877         1
3      1050   39687         1
4      1050   69212         1


0.032376184563013916

In [10]:
track1 = np.arange(10)
track2 = np.arange(10,20)
tracks = np.concatenate(([track1],[track2]), axis=0 )
len(tracks)

2

In [11]:
tracks

array([[ 0,  1,  2,  3,  4,  5,  6,  7,  8,  9],
       [10, 11, 12, 13, 14, 15, 16, 17, 18, 19]])

In [12]:
print (submission_from_good_tracks (results))

NameError: name 'submission_from_good_tracks' is not defined

In [None]:
combined

In [None]:
"""


out = np.take(track_mapping,[0], axis=2) # grab mse per hit per track
track_mses = np.sum(out,axis=1) #calc mse per track

sorted_idxs=np.argsort(track_mses, axis = 0)
#flat_idxs = sorted_idxs.flatten()
sorted_tracks = track_candidates[sorted_idxs]

sorted_track_mapping = track_mapping[sorted_idxs]



hits_ref = hits #remember to pass the hits_df in!
hits_ref['assigned']=False
hits_ref['track_id'] = np.nan
track_id = 1
for batchIdx, batch in enumerate(sorted_tracks):
    for trackIdx, track in enumerate(batch):
        temp_hit_ids = []
        valid_track = True
        
        for hitIdx, hit in enumerate(track): 
            #print(hit)
            hit_id = sorted_track_mapping[batchIdx][trackIdx][hitIdx][1]
            temp_hit_ids.append(hit_id)
        
        for hit_id in temp_hit_ids:
            if hits_ref.loc[hits_ref['hit_id']==hit_id]['assigned'].item() is True:
                valid_track = False
                break
        
        
        if valid_track:
            for hit_id in temp_hit_ids:
                hits_ref.loc[hits_ref['hit_id']==hit_id]['assigned'] = True
                hits_ref.loc[hits_ref['hit_id']==hit_id]['track_id'] = track_id
            track_id +=1
            

            
"""             
            
            