In [4]:
%matplotlib inline
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import os

from trackml.dataset import load_event, load_dataset
from trackml.score import score_event

In [151]:
from sklearn.preprocessing import StandardScaler
import hdbscan
from scipy import stats
from tqdm import tqdm
from sklearn.cluster import DBSCAN

class Clusterer(object):
    def __init__(self,eps, rz_scales=[0.65, 0.965, 1.528]):                        
        self.rz_scales=rz_scales
        self.eps = eps
#     def __init__(self,rz_scales=[1., 1., 1.]):                        
#         self.rz_scales=rz_scales
        
    def _eliminate_outliers(self,labels,M):
        norms=np.zeros((len(labels)),np.float32)
        indices=np.zeros((len(labels)),np.float32)
        for i, cluster in tqdm(enumerate(labels),total=len(labels)):
            if cluster == 0:
                continue
            index = np.argwhere(self.clusters==cluster)
            index = np.reshape(index,(index.shape[0]))
            indices[i] = len(index)
            x = M[index]
            norms[i] = self._test_quadric(x)
        threshold1 = np.percentile(norms,90)*5
        threshold2 = 20
        threshold3 = 7
        for i, cluster in enumerate(labels):
            if norms[i] > threshold1 or indices[i] > threshold2 or indices[i] < threshold3:
                self.clusters[self.clusters==cluster]=0            
    def _test_quadric(self,x):
        Z = np.zeros((x.shape[0],10), np.float32)
        Z[:,0] = x[:,0]**2
        Z[:,1] = 2*x[:,0]*x[:,1]
        Z[:,2] = 2*x[:,0]*x[:,2]
        Z[:,3] = 2*x[:,0]
        Z[:,4] = x[:,1]**2
        Z[:,5] = 2*x[:,1]*x[:,2]
        Z[:,6] = 2*x[:,1]
        Z[:,7] = x[:,2]**2
        Z[:,8] = 2*x[:,2]
        Z[:,9] = 1
        v, s, t = np.linalg.svd(Z,full_matrices=False)        
        smallest_index = np.argmin(np.array(s))
        T = np.array(t)
        T = T[smallest_index,:]        
        norm = np.linalg.norm(np.dot(Z,T), ord=2)**2
        return norm

    def _preprocess(self, hits):
        
        x = hits.x.values
        y = hits.y.values
        z = hits.z.values

        r = np.sqrt(x**2 + y**2 + z**2)
        hits['x2'] = x/r
        hits['y2'] = y/r

        r = np.sqrt(x**2 + y**2)
        hits['z2'] = z/r

        ss = StandardScaler()
        X = ss.fit_transform(hits[['x2', 'y2', 'z2']].values)
        for i, rz_scale in enumerate(self.rz_scales):
            X[:,i] = X[:,i] * rz_scale
       
        return X
    def _init(self, dfh):
        
        dfh['r'] = np.sqrt(dfh.x**2+dfh.y**2+dfh.z**2)
        dfh['rt'] = np.sqrt(dfh.x**2+dfh.y**2)
        dfh['a0'] = np.arctan2(dfh.y,dfh.x)
        dfh['a0_1'] = np.arctan2(dfh.y2,dfh.x2)
        dfh['r2'] = np.sqrt(dfh.x**2+dfh.y**2)
        dfh['z1'] = dfh['z']/dfh['rt'] 
        
        x = dfh.x.values
        y = dfh.y.values
        z = dfh.z.values
        
        x2 = dfh.x2.values
        y2 = dfh.y2.values
        z2 = dfh.z2.values
        dfh['rho'] = np.sqrt(x**2 + y**2 + z**2)
        dfh['r'] = np.sqrt(x**2 + y**2)
        dfh['r2'] = np.sqrt(x2**2 + y2**2)
#         dfh['phi'] = np.degrees(np.arctan2(hits['y2'], hits['x2']))
        dfh['phi'] = np.arctan2(dfh['y'], dfh['x'])
        dfh['phi2'] = np.arctan2(dfh['y2'], dfh['x2'])
#         dfh['phi_1'] = np.round(np.degrees(np.arctan2(hits['y2'], hits['x2'])), 5)
#         dfh['theta'] = np.degrees(np.arctan2(hits['r'], hits['z2']))
        dfh['theta'] = np.arctan2(dfh['r'], dfh['z'])
        phi = dfh['phi'].values
        theta = dfh['theta'].values
        rho = dfh['rho'].values
        r = dfh['r'].values
        
        phi = dfh['phi2'].values
        r = dfh['r2'].values
        
        dfh['tan_dip'] = phi/theta
        dfh['tan_dip1'] = phi/z2
        dfh['z2_1'] = 1/z2
        dfh['z2_2'] = phi/z2 + 1/z2

        dz = 0.00012
        stepdz = 0.000005
        for ii in tqdm(range(24)):
            dz = dz + ii*stepdz
            dfh['a1'] = dfh['a0']+dz*dfh['z']*np.sign(dfh['z'].values)
            dfh['x1'] = dfh['a1']/dfh['z1']
            dfh['x2'] = 1/dfh['z1']
            dfh['x3'] = dfh['x1']+dfh['x2']
            
            dfh['z2_1'] = dfh['a1']/z2
            dfh['z2_2'] = 1/z2
            dfh['z2_3'] =  dfh['z2_1'] + dfh['z2_2']
            dz1 = 0.0012
            stepdz1 = 0.00005
            dz1 = dz1 + ii*stepdz1
            ss = StandardScaler()
            dfs = ss.fit_transform(dfh[['a1','z1','x1','x2','x3']].values)
#             dfs = ss.fit_transform(dfh[['a1','z2','z2_1', 'z2_2', 'z2_3']].values)
#             dfs = ss.fit_transform(dfh[['a1', 'z2', 'phi2', 'r2', 'z2_1', 'z2_2', 'z2_3']].values)
#             'z2', 'phi2', 'r2','tan_dip1', 'z2_1', 'z2_2'
#             clusters = DBSCAN(eps=self.eps-dz1,min_samples=1,metric='manhattan',n_jobs=8).fit(dfs).labels_
            
            clusters = DBSCAN(eps=0.0035-dz,min_samples=1,metric='manhattan',n_jobs=8).fit(dfs).labels_
#             clusters = DBSCAN(self.eps-dz,min_samples=1,metric='manhattan',n_jobs=8).fit(dfs).labels_
            if ii==0:
                dfh['s1']= clusters
                dfh['N1'] = dfh.groupby('s1')['s1'].transform('count')
            else:
                dfh['s2'] = clusters
                dfh['N2'] = dfh.groupby('s2')['s2'].transform('count')
                maxs1 = dfh['s1'].max()
                cond = np.where(dfh['N2'].values>dfh['N1'].values)
                s1 = dfh['s1'].values
                s1[cond] = dfh['s2'].values[cond]+maxs1
                dfh['s1'] = s1
                dfh['s1'] = dfh['s1'].astype('int64')
                self.clusters = dfh['s1'].values
                dfh['N1'] = dfh.groupby('s1')['s1'].transform('count')
        dz = 0.00012
        stepdz = -0.000005
        for ii in tqdm(range(24)):
            dz = dz + ii*stepdz
            dfh['a1'] = dfh['a0']+dz*dfh['z']*np.sign(dfh['z'].values)
            dfh['x1'] = dfh['a1']/dfh['z1']
            dfh['x2'] = 1/dfh['z1']
            dfh['x3'] = dfh['x1']+dfh['x2']
            
            # vary phi2
            dfh['a2'] = dfh['phi2']+dz*dfh['z']*np.sign(dfh['z'].values)
            dfh['z2_1'] = dfh['a2']/z2 # dip angle?
            dfh['z2_2'] = 1/z2
            dfh['z2_3'] =  dfh['z2_1'] + dfh['z2_2']
            
            dz1 = 0.0012
            stepdz1 = 0.00005
            dz1 = dz1 + ii*stepdz1
            ss = StandardScaler()
            dfs = ss.fit_transform(dfh[['a1','z1','x1','x2','x3']].values)
#             dfs = ss.fit_transform(dfh[['a1', 'z2', 'phi2', 'r2', 'z2_1', 'z2_2', 'z2_3']].values)
#             dfs = ss.fit_transform(dfh[['a1','z2','z2_1', 'z2_2', 'z2_3']].values)
            clusters = DBSCAN(eps=0.0035+dz,min_samples=1,metric='manhattan',n_jobs=8).fit(dfs).labels_
#             clusters = DBSCAN(eps=self.eps+dz1,min_samples=1,metric='manhattan',n_jobs=8).fit(dfs).labels_
#             clusters = DBSCAN(self.eps+dz,min_samples=1,metric='manhattan',n_jobs=8).fit(dfs).labels_
#             'z2', 'phi2', 'r2','tan_dip1', 'z2_1', 'z2_2'
#             dfs = ss.fit_transform(dfh[['a1','z2','r2','phi2','tan_dip1', 'z2_1', 'z2_2']].values)
#             clusters = DBSCAN(eps=0.011+dz,min_samples=1,metric='mahattan',n_jobs=8).fit(dfs).labels_
            
            dfh['s2'] = clusters
            dfh['N2'] = dfh.groupby('s2')['s2'].transform('count')
            maxs1 = dfh['s1'].max()
            cond = np.where(dfh['N2'].values>dfh['N1'].values)
            s1 = dfh['s1'].values
            s1[cond] = dfh['s2'].values[cond]+maxs1
            dfh['s1'] = s1
            dfh['s1'] = dfh['s1'].astype('int64')
            dfh['N1'] = dfh.groupby('s1')['s1'].transform('count')
            
            # check
            dfh['track_id'] = dfh['s1'].values
            dfh['event_id'] = 0
            print(score_event(dfh, dfh[['event_id', 'hit_id', 'track_id']]))
        return dfh['s1'].values
    def predict(self, hits):  
        X = self._preprocess(hits)
        self.clusters = self._init(hits)                
        
        cl = hdbscan.HDBSCAN(min_samples=1,min_cluster_size=7,
                             metric='braycurtis',cluster_selection_method='leaf',algorithm='best', 
                             leaf_size=50)
        labels = np.unique(self.clusters)
        n_labels = 0
        while n_labels < len(labels):
            n_labels = len(labels)
            self._eliminate_outliers(labels,X)
            max_len = np.max(self.clusters)
            self.clusters[self.clusters==0] = cl.fit_predict(X[self.clusters==0])+max_len
            labels = np.unique(self.clusters)
        return self.clusters

In [49]:
def create_one_event_submission(event_id, hits, labels):
    sub_data = np.column_stack(([event_id]*len(hits), hits.hit_id.values, labels))
    submission = pd.DataFrame(data=sub_data, columns=["event_id", "hit_id", "track_id"]).astype(int)
    return submission

In [82]:
path_to_train = '../data/train'
event_prefix = "event000001000"
hits, cells, particles, truth = load_event(os.path.join(path_to_train, event_prefix))

In [61]:
model = Clusterer()
labels = model.predict(hits)

 42%|████▏     | 10/24 [00:08<00:12,  1.12it/s]


KeyboardInterrupt: 

In [11]:
submission = create_one_event_submission(0, hits, labels)
score = score_event(truth, submission)
print("Your score: ", score)

Your score:  0.3724995097633181


In [127]:
model = Clusterer()
labels = model.predict(hits)
submission = create_one_event_submission(0, hits, labels)
score = score_event(truth, submission)
print("Your score: ", score)

100%|██████████| 24/24 [00:23<00:00,  1.03it/s]
100%|██████████| 24/24 [00:22<00:00,  1.09it/s]
100%|██████████| 46651/46651 [00:07<00:00, 6261.28it/s]


Your score:  0.33537467511461244


In [133]:
for eps in [0.0025, 0.003, 0.0035, 0.004, 0.0045]:
    model = Clusterer(eps)
    labels = model.predict(hits)
    submission = create_one_event_submission(0, hits, labels)
    score = score_event(truth, submission)
    print("Your score: ", score)

100%|██████████| 24/24 [00:21<00:00,  1.10it/s]
100%|██████████| 24/24 [00:21<00:00,  1.11it/s]
100%|██████████| 53076/53076 [00:08<00:00, 6418.19it/s]
  0%|          | 0/24 [00:00<?, ?it/s]

Your score:  0.3316338683211663


100%|██████████| 24/24 [00:21<00:00,  1.09it/s]
100%|██████████| 24/24 [00:21<00:00,  1.10it/s]
100%|██████████| 48412/48412 [00:07<00:00, 6421.95it/s]
  0%|          | 0/24 [00:00<?, ?it/s]

Your score:  0.35494782255692414


100%|██████████| 24/24 [00:22<00:00,  1.07it/s]
100%|██████████| 24/24 [00:22<00:00,  1.09it/s]
100%|██████████| 44451/44451 [00:06<00:00, 6377.71it/s]
  0%|          | 0/24 [00:00<?, ?it/s]

Your score:  0.3724995097633181


100%|██████████| 24/24 [00:22<00:00,  1.07it/s]
100%|██████████| 24/24 [00:22<00:00,  1.07it/s]
100%|██████████| 41106/41106 [00:06<00:00, 6191.60it/s]
  0%|          | 0/24 [00:00<?, ?it/s]

Your score:  0.3678402221992445


100%|██████████| 24/24 [00:22<00:00,  1.06it/s]
100%|██████████| 24/24 [00:22<00:00,  1.07it/s]
100%|██████████| 37930/37930 [00:06<00:00, 6316.72it/s]


Your score:  0.35777253808781195


In [135]:
for eps in [0.009, 0.01, 0.015]:
    model = Clusterer(eps)
    labels = model.predict(hits)
    submission = create_one_event_submission(0, hits, labels)
    score = score_event(truth, submission)
    print("Your score: ", score)

100%|██████████| 24/24 [00:24<00:00,  1.00s/it]
100%|██████████| 24/24 [00:24<00:00,  1.02s/it]
100%|██████████| 80524/80524 [00:11<00:00, 6869.86it/s]
  0%|          | 0/24 [00:00<?, ?it/s]

Your score:  0.24455597857829214


100%|██████████| 24/24 [00:24<00:00,  1.01s/it]
100%|██████████| 24/24 [00:24<00:00,  1.02s/it]
100%|██████████| 76502/76502 [00:11<00:00, 6831.23it/s]
  0%|          | 0/24 [00:00<?, ?it/s]

Your score:  0.24760303721305957


100%|██████████| 24/24 [00:25<00:00,  1.04s/it]
100%|██████████| 24/24 [00:25<00:00,  1.05s/it]
100%|██████████| 57403/57403 [00:08<00:00, 6614.96it/s]


Your score:  0.26580657537774616


In [136]:
for eps in [0.015, 0.02, 0.025, 0.03, 0.035]:
    model = Clusterer(eps)
    labels = model.predict(hits)
    submission = create_one_event_submission(0, hits, labels)
    score = score_event(truth, submission)
    print("Your score: ", score)

100%|██████████| 24/24 [00:24<00:00,  1.03s/it]
100%|██████████| 24/24 [00:25<00:00,  1.05s/it]
100%|██████████| 57403/57403 [00:08<00:00, 6549.93it/s]
  0%|          | 0/24 [00:00<?, ?it/s]

Your score:  0.26580657537774616


100%|██████████| 24/24 [00:25<00:00,  1.07s/it]
100%|██████████| 24/24 [00:26<00:00,  1.09s/it]
100%|██████████| 41323/41323 [00:06<00:00, 6391.09it/s]
  0%|          | 0/24 [00:00<?, ?it/s]

Your score:  0.2779047770395823


100%|██████████| 24/24 [00:26<00:00,  1.10s/it]
100%|██████████| 24/24 [00:26<00:00,  1.11s/it]
100%|██████████| 28283/28283 [00:04<00:00, 6182.40it/s]
  0%|          | 0/24 [00:00<?, ?it/s]

Your score:  0.2731438906619206


100%|██████████| 24/24 [00:27<00:00,  1.14s/it]
100%|██████████| 24/24 [00:27<00:00,  1.14s/it]
100%|██████████| 18087/18087 [00:02<00:00, 6032.42it/s]
  0%|          | 0/24 [00:00<?, ?it/s]

Your score:  0.2510339295763603


100%|██████████| 24/24 [00:27<00:00,  1.16s/it]
100%|██████████| 24/24 [00:28<00:00,  1.18s/it]
100%|██████████| 10181/10181 [00:01<00:00, 5800.38it/s]


Your score:  0.2478649934517582


In [137]:
for eps in [0.02, 0.021, 0.022, 0.023, 0.024, 0.025]:
    model = Clusterer(eps)
    labels = model.predict(hits)
    submission = create_one_event_submission(0, hits, labels)
    score = score_event(truth, submission)
    print("Your score: ", score)

100%|██████████| 24/24 [00:25<00:00,  1.07s/it]
100%|██████████| 24/24 [00:25<00:00,  1.08s/it]
100%|██████████| 41323/41323 [00:06<00:00, 6466.63it/s]
  0%|          | 0/24 [00:00<?, ?it/s]

Your score:  0.2779047770395823


100%|██████████| 24/24 [00:25<00:00,  1.07s/it]
100%|██████████| 24/24 [00:26<00:00,  1.10s/it]
100%|██████████| 38499/38499 [00:06<00:00, 6374.89it/s]
  0%|          | 0/24 [00:00<?, ?it/s]

Your score:  0.27658018631173864


100%|██████████| 24/24 [00:26<00:00,  1.09s/it]
100%|██████████| 24/24 [00:26<00:00,  1.10s/it]
100%|██████████| 35718/35718 [00:05<00:00, 6221.50it/s]
  0%|          | 0/24 [00:00<?, ?it/s]

Your score:  0.2773611788769742


100%|██████████| 24/24 [00:26<00:00,  1.09s/it]
100%|██████████| 24/24 [00:26<00:00,  1.12s/it]
100%|██████████| 33074/33074 [00:05<00:00, 6275.96it/s]
  0%|          | 0/24 [00:00<?, ?it/s]

Your score:  0.27508225396290126


100%|██████████| 24/24 [00:26<00:00,  1.10s/it]
100%|██████████| 24/24 [00:26<00:00,  1.12s/it]
100%|██████████| 30645/30645 [00:05<00:00, 6120.09it/s]
  0%|          | 0/24 [00:00<?, ?it/s]

Your score:  0.27420398561069526


100%|██████████| 24/24 [00:26<00:00,  1.10s/it]
100%|██████████| 24/24 [00:26<00:00,  1.12s/it]
100%|██████████| 28283/28283 [00:04<00:00, 6279.68it/s]


Your score:  0.2731438906619206


In [138]:
for eps in [0.0195, 0.0196, 0.0197, 0.0198, 0.0199, 0.02, 0.0221]:
    model = Clusterer(eps)
    labels = model.predict(hits)
    submission = create_one_event_submission(0, hits, labels)
    score = score_event(truth, submission)
    print("Your score: ", score)

100%|██████████| 24/24 [00:25<00:00,  1.07s/it]
100%|██████████| 24/24 [00:26<00:00,  1.09s/it]
100%|██████████| 42818/42818 [00:06<00:00, 6432.35it/s]
  0%|          | 0/24 [00:00<?, ?it/s]

Your score:  0.27899005323946957


100%|██████████| 24/24 [00:25<00:00,  1.06s/it]
100%|██████████| 24/24 [00:25<00:00,  1.08s/it]
100%|██████████| 42494/42494 [00:06<00:00, 6484.42it/s]
  0%|          | 0/24 [00:00<?, ?it/s]

Your score:  0.2783438965105194


100%|██████████| 24/24 [00:25<00:00,  1.06s/it]
100%|██████████| 24/24 [00:25<00:00,  1.08s/it]
100%|██████████| 42220/42220 [00:06<00:00, 6434.12it/s]
  0%|          | 0/24 [00:00<?, ?it/s]

Your score:  0.2777166455682055


100%|██████████| 24/24 [00:25<00:00,  1.07s/it]
100%|██████████| 24/24 [00:26<00:00,  1.09s/it]
100%|██████████| 41917/41917 [00:06<00:00, 6428.08it/s]
  0%|          | 0/24 [00:00<?, ?it/s]

Your score:  0.2782452993647748


100%|██████████| 24/24 [00:25<00:00,  1.06s/it]
100%|██████████| 24/24 [00:25<00:00,  1.08s/it]
100%|██████████| 41636/41636 [00:06<00:00, 6503.59it/s]
  0%|          | 0/24 [00:00<?, ?it/s]

Your score:  0.27838525377865697


100%|██████████| 24/24 [00:25<00:00,  1.06s/it]
100%|██████████| 24/24 [00:26<00:00,  1.08s/it]
100%|██████████| 41323/41323 [00:06<00:00, 6288.11it/s]
  0%|          | 0/24 [00:00<?, ?it/s]

Your score:  0.2779047770395823


100%|██████████| 24/24 [00:26<00:00,  1.09s/it]
100%|██████████| 24/24 [00:26<00:00,  1.10s/it]
100%|██████████| 35484/35484 [00:05<00:00, 6026.46it/s]


Your score:  0.2779769871040115


In [139]:
for eps in [0.0190, 0.0191, 0.0192, 0.0193, 0.0194, 0.0195]:
    model = Clusterer(eps)
    labels = model.predict(hits)
    submission = create_one_event_submission(0, hits, labels)
    score = score_event(truth, submission)
    print("Your score: ", score)

100%|██████████| 24/24 [00:25<00:00,  1.07s/it]
100%|██████████| 24/24 [00:26<00:00,  1.09s/it]
100%|██████████| 44330/44330 [00:07<00:00, 6165.24it/s]
  0%|          | 0/24 [00:00<?, ?it/s]

Your score:  0.2784253653607127


100%|██████████| 24/24 [00:25<00:00,  1.07s/it]
100%|██████████| 24/24 [00:25<00:00,  1.08s/it]
100%|██████████| 44021/44021 [00:06<00:00, 6564.00it/s]
  0%|          | 0/24 [00:00<?, ?it/s]

Your score:  0.27790839642284904


100%|██████████| 24/24 [00:25<00:00,  1.06s/it]
100%|██████████| 24/24 [00:25<00:00,  1.08s/it]
100%|██████████| 43723/43723 [00:06<00:00, 6550.50it/s]
  0%|          | 0/24 [00:00<?, ?it/s]

Your score:  0.27825617823536675


100%|██████████| 24/24 [00:25<00:00,  1.06s/it]
100%|██████████| 24/24 [00:25<00:00,  1.08s/it]
100%|██████████| 43401/43401 [00:06<00:00, 6478.95it/s]
  0%|          | 0/24 [00:00<?, ?it/s]

Your score:  0.2786410817632458


100%|██████████| 24/24 [00:25<00:00,  1.06s/it]
100%|██████████| 24/24 [00:26<00:00,  1.09s/it]
100%|██████████| 43135/43135 [00:06<00:00, 6441.88it/s]
  0%|          | 0/24 [00:00<?, ?it/s]

Your score:  0.2794540779419116


100%|██████████| 24/24 [00:25<00:00,  1.06s/it]
100%|██████████| 24/24 [00:25<00:00,  1.08s/it]
100%|██████████| 42818/42818 [00:06<00:00, 6361.96it/s]


Your score:  0.27899005323946957


In [140]:
for eps in [0.01940, 0.01941, 0.01942, 0.01943, 0.01944, 0.01945]:
    model = Clusterer(eps)
    labels = model.predict(hits)
    submission = create_one_event_submission(0, hits, labels)
    score = score_event(truth, submission)
    print("Your score: ", score)

100%|██████████| 24/24 [00:25<00:00,  1.06s/it]
100%|██████████| 24/24 [00:26<00:00,  1.09s/it]
100%|██████████| 43135/43135 [00:06<00:00, 6439.70it/s]
  0%|          | 0/24 [00:00<?, ?it/s]

Your score:  0.2794540779419116


100%|██████████| 24/24 [00:25<00:00,  1.06s/it]
100%|██████████| 24/24 [00:26<00:00,  1.09s/it]
100%|██████████| 43096/43096 [00:06<00:00, 6405.98it/s]
  0%|          | 0/24 [00:00<?, ?it/s]

Your score:  0.2796902112536624


100%|██████████| 24/24 [00:25<00:00,  1.07s/it]
100%|██████████| 24/24 [00:25<00:00,  1.08s/it]
100%|██████████| 43071/43071 [00:06<00:00, 6325.08it/s]
  0%|          | 0/24 [00:00<?, ?it/s]

Your score:  0.2793524303109651


100%|██████████| 24/24 [00:25<00:00,  1.07s/it]
100%|██████████| 24/24 [00:26<00:00,  1.08s/it]
100%|██████████| 43039/43039 [00:06<00:00, 6524.31it/s]
  0%|          | 0/24 [00:00<?, ?it/s]

Your score:  0.27919318824042855


100%|██████████| 24/24 [00:25<00:00,  1.07s/it]
100%|██████████| 24/24 [00:26<00:00,  1.09s/it]
100%|██████████| 43010/43010 [00:06<00:00, 6356.89it/s]
  0%|          | 0/24 [00:00<?, ?it/s]

Your score:  0.27926378008881425


100%|██████████| 24/24 [00:25<00:00,  1.06s/it]
100%|██████████| 24/24 [00:25<00:00,  1.08s/it]
100%|██████████| 42979/42979 [00:06<00:00, 6473.17it/s]


Your score:  0.27910077055062


In [141]:
for eps in [0.01941, 0.019412, 0.019413, 0.019414, 0.019415]:
    model = Clusterer(eps)
    labels = model.predict(hits)
    submission = create_one_event_submission(0, hits, labels)
    score = score_event(truth, submission)
    print("Your score: ", score)

100%|██████████| 24/24 [00:25<00:00,  1.07s/it]
100%|██████████| 24/24 [00:26<00:00,  1.08s/it]
100%|██████████| 43096/43096 [00:06<00:00, 6418.05it/s]
  0%|          | 0/24 [00:00<?, ?it/s]

Your score:  0.2796902112536624


100%|██████████| 24/24 [00:25<00:00,  1.07s/it]
100%|██████████| 24/24 [00:25<00:00,  1.08s/it]
100%|██████████| 43093/43093 [00:06<00:00, 6361.04it/s]
  0%|          | 0/24 [00:00<?, ?it/s]

Your score:  0.279684001582719


100%|██████████| 24/24 [00:25<00:00,  1.06s/it]
100%|██████████| 24/24 [00:25<00:00,  1.08s/it]
100%|██████████| 43091/43091 [00:06<00:00, 6490.36it/s]
  0%|          | 0/24 [00:00<?, ?it/s]

Your score:  0.2795959374951502


100%|██████████| 24/24 [00:25<00:00,  1.06s/it]
100%|██████████| 24/24 [00:26<00:00,  1.09s/it]
100%|██████████| 43086/43086 [00:06<00:00, 6325.80it/s]
  0%|          | 0/24 [00:00<?, ?it/s]

Your score:  0.27949082549908844


100%|██████████| 24/24 [00:25<00:00,  1.07s/it]
100%|██████████| 24/24 [00:25<00:00,  1.07s/it]
100%|██████████| 43079/43079 [00:06<00:00, 6479.18it/s]


Your score:  0.27949082549908844


In [152]:
path_to_train = '../data/train'
event_prefix = "event000001000"
hits, cells, particles, truth = load_event(os.path.join(path_to_train, event_prefix))

truth = pd.merge(truth, particles, how='left', on='particle_id')
hits = pd.merge(hits, truth, how='left', on='hit_id')

hits = hits.fillna(0)
    
model = Clusterer(eps=0.0035)
labels = model.predict(hits)
submission = create_one_event_submission(0, hits, labels)
score = score_event(truth, submission)
print("Your score: ", score)

100%|██████████| 24/24 [00:22<00:00,  1.08it/s]
  4%|▍         | 1/24 [00:01<00:42,  1.83s/it]

0.0752305681290277


  8%|▊         | 2/24 [00:03<00:40,  1.86s/it]

0.07860727762759465


 12%|█▎        | 3/24 [00:05<00:39,  1.86s/it]

0.08487206478655537


 17%|█▋        | 4/24 [00:07<00:36,  1.84s/it]

0.09836164652413468


 21%|██        | 5/24 [00:09<00:34,  1.82s/it]

0.12273995810518126


 25%|██▌       | 6/24 [00:10<00:32,  1.81s/it]

0.16390320049327797


 29%|██▉       | 7/24 [00:12<00:30,  1.79s/it]

0.20963877214138535


 33%|███▎      | 8/24 [00:14<00:28,  1.80s/it]

0.2550587275434543


 38%|███▊      | 9/24 [00:16<00:26,  1.78s/it]

0.28150570336789116


 42%|████▏     | 10/24 [00:17<00:24,  1.77s/it]

0.2965299353228473


 46%|████▌     | 11/24 [00:19<00:22,  1.76s/it]

0.305826575304673


 50%|█████     | 12/24 [00:21<00:21,  1.75s/it]

0.31338502129324364


 54%|█████▍    | 13/24 [00:22<00:19,  1.74s/it]

0.3191473262526317


 58%|█████▊    | 14/24 [00:24<00:17,  1.74s/it]

0.3234579119822572


 62%|██████▎   | 15/24 [00:25<00:15,  1.73s/it]

0.32712654568837507


 67%|██████▋   | 16/24 [00:27<00:13,  1.72s/it]

0.3294563154367999


 71%|███████   | 17/24 [00:29<00:11,  1.71s/it]

0.33212978778870617


 75%|███████▌  | 18/24 [00:30<00:10,  1.71s/it]

0.333400048336403


 79%|███████▉  | 19/24 [00:32<00:08,  1.72s/it]

0.3349611482924627


 83%|████████▎ | 20/24 [00:34<00:06,  1.72s/it]

0.3356844718040679


 88%|████████▊ | 21/24 [00:36<00:05,  1.72s/it]

0.336986740059513


 92%|█████████▏| 22/24 [00:37<00:03,  1.72s/it]

0.33829005048499017


 96%|█████████▌| 23/24 [00:39<00:01,  1.72s/it]

0.3393908779413055


100%|██████████| 24/24 [00:41<00:00,  1.72s/it]
  1%|          | 481/44451 [00:00<00:09, 4806.48it/s]

0.34013065376921303


100%|██████████| 44451/44451 [00:07<00:00, 6047.93it/s]


Your score:  0.3724995097633181
