In [1]:
%matplotlib inline
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import os

from trackml.dataset import load_event, load_dataset
from trackml.score import score_event

from multiprocessing import Pool

def create_one_event_submission(event_id, hits, labels):
    sub_data = np.column_stack(([event_id]*len(hits), hits.hit_id.values, labels))
    submission = pd.DataFrame(data=sub_data, columns=["event_id", "hit_id", "track_id"]).astype(int)
    return submission

In [2]:
# Change this according to your directory preferred setting
path_to_train = "./data/train_1"

# This event is in Train_1
event_prefix = "event000001000"

In [3]:
from sklearn.preprocessing import StandardScaler
import hdbscan
from scipy import stats
from tqdm import tqdm_notebook as tqdm
from sklearn.cluster import DBSCAN

class Clusterer(object):
    def __init__(self,rz_scales=[0.65, 0.965, 1.528], eps=0.0035):                        
        self.rz_scales=rz_scales
        self.epsilon = eps
    
    # remove outliers
    def _eliminate_outliers(self,labels,M):
        norms=np.zeros((len(labels)),np.float32)
        indices=np.zeros((len(labels)),np.float32)
        for i, cluster in tqdm(enumerate(labels),total=len(labels)):
            if cluster == 0:
                continue
            index = np.argwhere(self.clusters==cluster)
            index = np.reshape(index,(index.shape[0]))
            indices[i] = len(index)
            x = M[index]
            norms[i] = self._test_quadric(x)
        threshold1 = np.percentile(norms,90)*5
        threshold2 = 25
        threshold3 = 6
        for i, cluster in enumerate(labels):
            if norms[i] > threshold1 or indices[i] > threshold2 or indices[i] < threshold3:
                self.clusters[self.clusters==cluster]=0   
    
    # not sure what this function does?
    def _test_quadric(self,x):
        if x.size == 0 or len(x.shape)<2:
            return 0
        Z = np.zeros((x.shape[0],10), np.float32)
        Z[:,0] = x[:,0]**2
        Z[:,1] = 2*x[:,0]*x[:,1]
        Z[:,2] = 2*x[:,0]*x[:,2]
        Z[:,3] = 2*x[:,0]
        Z[:,4] = x[:,1]**2
        Z[:,5] = 2*x[:,1]*x[:,2]
        Z[:,6] = 2*x[:,1]
        Z[:,7] = x[:,2]**2
        Z[:,8] = 2*x[:,2]
        Z[:,9] = 1
        v, s, t = np.linalg.svd(Z,full_matrices=False)        
        smallest_index = np.argmin(np.array(s))
        T = np.array(t)
        T = T[smallest_index,:]        
        norm = np.linalg.norm(np.dot(Z,T), ord=2)**2
        return norm

    # standard scale our data
    def _preprocess(self, hits):
        x = hits.x.values
        y = hits.y.values
        z = hits.z.values

        r = np.sqrt(x**2 + y**2 + z**2)
        hits['x2'] = x/r
        hits['y2'] = y/r

        r = np.sqrt(x**2 + y**2)
        hits['z2'] = z/r

        ss = StandardScaler()
        X = ss.fit_transform(hits[['x2', 'y2', 'z2']].values)
        for i, rz_scale in enumerate(self.rz_scales):
            X[:,i] = X[:,i] * rz_scale
       
        return X
    
    def _init(self,dfh):
        # create the radius of the hit from the coordinates
        dfh['r'] = np.sqrt(dfh['x'].values**2+dfh['y'].values**2+dfh['z'].values**2)
        
        # radius excluding z coordinate
        dfh['rt'] = np.sqrt(dfh['x'].values**2+dfh['y'].values**2)
        
        # create some other features
        dfh['a0'] = np.arctan2(dfh['y'].values,dfh['x'].values)
        dfh['z1'] = dfh['z'].values/dfh['rt'].values
        dfh['x2'] = 1/dfh['z1'].values
        
        # step params
        dz0 = -0.00070
        stepdz = 0.00001
        stepeps = 0.000005
        mm = 1
        
        # loop from 1 to 100
        for ii in tqdm(range(100)):
            mm = mm*(-1)
            dz = mm*(dz0+ii*stepdz)
            dfh['a1'] = dfh['a0'].values+dz*dfh['z'].values*np.sign(dfh['z'].values)
            dfh['sina1'] = np.sin(dfh['a1'].values)
            dfh['cosa1'] = np.cos(dfh['a1'].values)
            dfh['x1'] = dfh['a1'].values/dfh['z1'].values
            ss = StandardScaler()
            dfs = ss.fit_transform(dfh[['sina1','cosa1','z1','x1','x2']].values)
            cx = np.array([1, 1, 0.75, 0.5, 0.5])
            dfs = np.multiply(dfs, cx)
            
            # cluster our hits
            clusters=DBSCAN(eps=self.epsilon+ii*stepeps,min_samples=1,metric='euclidean',n_jobs=2).fit(dfs).labels_            
            
            # if we are in first iteration add features to our hits
            if ii==0:
                dfh['s1'] = clusters
                dfh['N1'] = dfh.groupby('s1')['s1'].transform('count')
            
            # else update our hits conditionally, if it's a better fit
            else:
                dfh['s2'] = clusters
                dfh['N2'] = dfh.groupby('s2')['s2'].transform('count')
                maxs1 = dfh['s1'].max()
                cond = np.where((dfh['N2'].values>dfh['N1'].values) & (dfh['N2'].values<20))
                s1 = dfh['s1'].values
                s1[cond] = dfh['s2'].values[cond]+maxs1
                dfh['s1'] = s1
                dfh['s1'] = dfh['s1'].astype('int64')
                dfh['N1'] = dfh.groupby('s1')['s1'].transform('count')
        
        # return our clusters
        return dfh['s1'].values    
    
    def predict(self, hits):    
        # init our clusters
        self.clusters = self._init(hits) 
        
        # preprocess our data
        X = self._preprocess(hits) 
        
        # create our clusterer
        cl = hdbscan.HDBSCAN(min_samples=1,min_cluster_size=7, metric='braycurtis',cluster_selection_method='leaf',algorithm='best', leaf_size=50)
        
        # labels = unique clusters
        labels = np.unique(self.clusters)
        
        # remove outliers
        self._eliminate_outliers(labels,X)
        
        # init n_labels
        n_labels = 0
        
        # now we loop through the points that haven't been assigned to a cluster and assign them with
        # HDBSCAN
        while n_labels < len(labels):
            n_labels = len(labels)
            max_len = np.max(self.clusters)
            mask = self.clusters == 0
            self.clusters[mask] = cl.fit_predict(X[mask])+max_len
            
        return self.clusters

In [4]:
path_to_test = "./data/test"
eps = 0.0036

def one_loop(event_id):
    hits  = pd.read_csv(path_to_test + '/event%s-hits.csv'%event_id)
    cells = pd.read_csv(path_to_test + '/event%s-cells.csv'%event_id)
    print('Event ID: ', event_id)
                
    # Track pattern recognition 
    model = Clusterer(eps=eps)
    labels = model.predict(hits)

    # Prepare submission for an event
    one_submission = create_one_event_submission(event_id, hits, labels)
    one_submission.to_csv('./%09d_dbscan_e_36'%int(event_id), index=False, compression='gzip')
            
    return one_submission

def create_test_submissions(path_to_test = "./data/test", start=0, end=125):
    event_ids = [ '%09d'%i for i in range(start,end) ]

    pool = Pool(processes=8)
    results = pool.map(one_loop, event_ids)
    pool.close()
    
    return results

In [5]:
submission = create_test_submissions(start=0, end=125)

Event ID:  000000020


HBox(children=(IntProgress(value=0), HTML(value='')))

Event ID:  000000016
Event ID:  000000012
Event ID:  000000028


HBox(children=(IntProgress(value=0), HTML(value='')))

Event ID:  000000008
Event ID:  000000024


HBox(children=(IntProgress(value=0), HTML(value='')))

Event ID:  000000004
Event ID:  000000000


HBox(children=(IntProgress(value=0), HTML(value='')))

HBox(children=(IntProgress(value=0), HTML(value='')))

HBox(children=(IntProgress(value=0), HTML(value='')))

HBox(children=(IntProgress(value=0), HTML(value='')))

HBox(children=(IntProgress(value=0), HTML(value='')))




HBox(children=(IntProgress(value=0, max=29189), HTML(value='')))




  **self._backend_args)


Event ID:  000000021


HBox(children=(IntProgress(value=0), HTML(value='')))




HBox(children=(IntProgress(value=0, max=35827), HTML(value='')))




  **self._backend_args)





HBox(children=(IntProgress(value=0, max=39802), HTML(value='')))

Event ID:  000000017


HBox(children=(IntProgress(value=0), HTML(value='')))




HBox(children=(IntProgress(value=0, max=37650), HTML(value='')))




  **self._backend_args)





HBox(children=(IntProgress(value=0, max=41730), HTML(value='')))




  **self._backend_args)


Event ID:  000000029


HBox(children=(IntProgress(value=0), HTML(value='')))


Event ID:  000000013


HBox(children=(IntProgress(value=0), HTML(value='')))

  **self._backend_args)





HBox(children=(IntProgress(value=0, max=42393), HTML(value='')))

Event ID:  000000009


HBox(children=(IntProgress(value=0), HTML(value='')))





HBox(children=(IntProgress(value=0, max=40713), HTML(value='')))

  **self._backend_args)


Event ID:  000000005


HBox(children=(IntProgress(value=0), HTML(value='')))




  **self._backend_args)


Event ID:  000000025


HBox(children=(IntProgress(value=0), HTML(value='')))




HBox(children=(IntProgress(value=0, max=38740), HTML(value='')))




  **self._backend_args)


Event ID:  000000001


HBox(children=(IntProgress(value=0), HTML(value='')))




HBox(children=(IntProgress(value=0, max=38184), HTML(value='')))




  **self._backend_args)


Event ID:  000000022


HBox(children=(IntProgress(value=0), HTML(value='')))




HBox(children=(IntProgress(value=0, max=30982), HTML(value='')))




  **self._backend_args)


Event ID:  000000030


HBox(children=(IntProgress(value=0), HTML(value='')))




HBox(children=(IntProgress(value=0, max=40537), HTML(value='')))




HBox(children=(IntProgress(value=0, max=44305), HTML(value='')))




HBox(children=(IntProgress(value=0, max=38819), HTML(value='')))




  **self._backend_args)






HBox(children=(IntProgress(value=0, max=41982), HTML(value='')))




  **self._backend_args)


Event ID:  000000010


HBox(children=(IntProgress(value=0), HTML(value='')))

  **self._backend_args)





HBox(children=(IntProgress(value=0, max=36612), HTML(value='')))

Event ID:  000000006


HBox(children=(IntProgress(value=0), HTML(value='')))


Event ID:  000000014


HBox(children=(IntProgress(value=0), HTML(value='')))




  **self._backend_args)
  **self._backend_args)


Event ID:  000000026


HBox(children=(IntProgress(value=0), HTML(value='')))

Event ID:  000000018


HBox(children=(IntProgress(value=0), HTML(value='')))




HBox(children=(IntProgress(value=0, max=43976), HTML(value='')))




HBox(children=(IntProgress(value=0, max=40080), HTML(value='')))




  **self._backend_args)





  **self._backend_args)


Event ID:  000000023


HBox(children=(IntProgress(value=0), HTML(value='')))

Event ID:  000000002


HBox(children=(IntProgress(value=0), HTML(value='')))




HBox(children=(IntProgress(value=0, max=36978), HTML(value='')))




  **self._backend_args)





HBox(children=(IntProgress(value=0, max=37496), HTML(value='')))

Event ID:  000000031


HBox(children=(IntProgress(value=0), HTML(value='')))




HBox(children=(IntProgress(value=0, max=37663), HTML(value='')))




  **self._backend_args)





HBox(children=(IntProgress(value=0, max=33192), HTML(value='')))


Event ID:  000000019


HBox(children=(IntProgress(value=0), HTML(value='')))

  **self._backend_args)





  **self._backend_args)





HBox(children=(IntProgress(value=0, max=35865), HTML(value='')))

Event ID:  000000015


HBox(children=(IntProgress(value=0), HTML(value='')))

Event ID:  000000027


HBox(children=(IntProgress(value=0), HTML(value='')))




  **self._backend_args)





HBox(children=(IntProgress(value=0, max=38400), HTML(value='')))

Event ID:  000000007


HBox(children=(IntProgress(value=0), HTML(value='')))




  **self._backend_args)


Event ID:  000000011


HBox(children=(IntProgress(value=0), HTML(value='')))




HBox(children=(IntProgress(value=0, max=38418), HTML(value='')))




  **self._backend_args)


Event ID:  000000003


HBox(children=(IntProgress(value=0), HTML(value='')))




HBox(children=(IntProgress(value=0, max=34767), HTML(value='')))




HBox(children=(IntProgress(value=0, max=38504), HTML(value='')))




  **self._backend_args)





  **self._backend_args)


Event ID:  000000032


HBox(children=(IntProgress(value=0), HTML(value='')))

Event ID:  000000036


HBox(children=(IntProgress(value=0), HTML(value='')))




HBox(children=(IntProgress(value=0, max=37194), HTML(value='')))




  **self._backend_args)


Event ID:  000000040


HBox(children=(IntProgress(value=0), HTML(value='')))




HBox(children=(IntProgress(value=0, max=35713), HTML(value='')))




HBox(children=(IntProgress(value=0, max=34885), HTML(value='')))




  **self._backend_args)





HBox(children=(IntProgress(value=0, max=39295), HTML(value='')))




  **self._backend_args)


Event ID:  000000044


HBox(children=(IntProgress(value=0), HTML(value='')))

Event ID:  000000048


HBox(children=(IntProgress(value=0), HTML(value='')))




  **self._backend_args)





HBox(children=(IntProgress(value=0, max=38261), HTML(value='')))

Event ID:  000000052


HBox(children=(IntProgress(value=0), HTML(value='')))




  **self._backend_args)


Event ID:  000000056


HBox(children=(IntProgress(value=0), HTML(value='')))




HBox(children=(IntProgress(value=0, max=31778), HTML(value='')))





HBox(children=(IntProgress(value=0, max=39932), HTML(value='')))

  **self._backend_args)


Event ID:  000000033


HBox(children=(IntProgress(value=0), HTML(value='')))




  **self._backend_args)


Event ID:  000000060


HBox(children=(IntProgress(value=0), HTML(value='')))




HBox(children=(IntProgress(value=0, max=37063), HTML(value='')))




  **self._backend_args)


Event ID:  000000037


HBox(children=(IntProgress(value=0), HTML(value='')))




HBox(children=(IntProgress(value=0, max=41929), HTML(value='')))




HBox(children=(IntProgress(value=0, max=38336), HTML(value='')))




  **self._backend_args)



Event ID:  000000041


HBox(children=(IntProgress(value=0), HTML(value='')))

  **self._backend_args)


Event ID:  000000049


HBox(children=(IntProgress(value=0), HTML(value='')))




HBox(children=(IntProgress(value=0, max=41153), HTML(value='')))




HBox(children=(IntProgress(value=0, max=40285), HTML(value='')))




  **self._backend_args)



Event ID:  000000057


HBox(children=(IntProgress(value=0), HTML(value='')))

  **self._backend_args)


Event ID:  000000053


HBox(children=(IntProgress(value=0), HTML(value='')))




HBox(children=(IntProgress(value=0, max=30557), HTML(value='')))




  **self._backend_args)


Event ID:  000000034


HBox(children=(IntProgress(value=0), HTML(value='')))




HBox(children=(IntProgress(value=0, max=43718), HTML(value='')))




  **self._backend_args)


Event ID:  000000045


HBox(children=(IntProgress(value=0), HTML(value='')))




HBox(children=(IntProgress(value=0, max=35263), HTML(value='')))




  **self._backend_args)


Event ID:  000000061


HBox(children=(IntProgress(value=0), HTML(value='')))




HBox(children=(IntProgress(value=0, max=36719), HTML(value='')))




  **self._backend_args)


Event ID:  000000042


HBox(children=(IntProgress(value=0), HTML(value='')))




HBox(children=(IntProgress(value=0, max=39873), HTML(value='')))




HBox(children=(IntProgress(value=0, max=36117), HTML(value='')))





HBox(children=(IntProgress(value=0, max=39059), HTML(value='')))

  **self._backend_args)





  **self._backend_args)


Event ID:  000000038


HBox(children=(IntProgress(value=0), HTML(value='')))




  **self._backend_args)


Event ID:  000000054


HBox(children=(IntProgress(value=0), HTML(value='')))

Event ID:  000000050


HBox(children=(IntProgress(value=0), HTML(value='')))




HBox(children=(IntProgress(value=0, max=41649), HTML(value='')))




  **self._backend_args)


Event ID:  000000058


HBox(children=(IntProgress(value=0), HTML(value='')))




HBox(children=(IntProgress(value=0, max=44832), HTML(value='')))




  **self._backend_args)





HBox(children=(IntProgress(value=0, max=35111), HTML(value='')))

Event ID:  000000035


HBox(children=(IntProgress(value=0), HTML(value='')))




  **self._backend_args)


Event ID:  000000046


HBox(children=(IntProgress(value=0), HTML(value='')))




HBox(children=(IntProgress(value=0, max=35339), HTML(value='')))




  **self._backend_args)


Event ID:  000000062


HBox(children=(IntProgress(value=0), HTML(value='')))




HBox(children=(IntProgress(value=0, max=40192), HTML(value='')))




  **self._backend_args)





HBox(children=(IntProgress(value=0, max=39414), HTML(value='')))

Event ID:  000000043


HBox(children=(IntProgress(value=0), HTML(value='')))




  **self._backend_args)


Event ID:  000000039


HBox(children=(IntProgress(value=0), HTML(value='')))




HBox(children=(IntProgress(value=0, max=38492), HTML(value='')))




HBox(children=(IntProgress(value=0, max=39052), HTML(value='')))




  **self._backend_args)





  **self._backend_args)


Event ID:  000000051


HBox(children=(IntProgress(value=0), HTML(value='')))

Event ID:  000000055


HBox(children=(IntProgress(value=0), HTML(value='')))




HBox(children=(IntProgress(value=0, max=35284), HTML(value='')))




  **self._backend_args)





HBox(children=(IntProgress(value=0, max=35084), HTML(value='')))




HBox(children=(IntProgress(value=0, max=38976), HTML(value='')))

Event ID:  000000047


HBox(children=(IntProgress(value=0), HTML(value='')))




  **self._backend_args)



Event ID:  000000064


HBox(children=(IntProgress(value=0), HTML(value='')))

  **self._backend_args)


Event ID:  000000059


HBox(children=(IntProgress(value=0), HTML(value='')))




HBox(children=(IntProgress(value=0, max=40100), HTML(value='')))





HBox(children=(IntProgress(value=0, max=37031), HTML(value='')))

  **self._backend_args)


Event ID:  000000063


HBox(children=(IntProgress(value=0), HTML(value='')))




  **self._backend_args)


Event ID:  000000068


HBox(children=(IntProgress(value=0), HTML(value='')))




HBox(children=(IntProgress(value=0, max=42955), HTML(value='')))





HBox(children=(IntProgress(value=0, max=33684), HTML(value='')))

  **self._backend_args)





HBox(children=(IntProgress(value=0, max=31253), HTML(value='')))


Event ID:  000000072


HBox(children=(IntProgress(value=0), HTML(value='')))

  **self._backend_args)





  **self._backend_args)


Event ID:  000000076


HBox(children=(IntProgress(value=0), HTML(value='')))

Event ID:  000000080


HBox(children=(IntProgress(value=0), HTML(value='')))




HBox(children=(IntProgress(value=0, max=37228), HTML(value='')))




  **self._backend_args)


Event ID:  000000084


HBox(children=(IntProgress(value=0), HTML(value='')))




HBox(children=(IntProgress(value=0, max=41093), HTML(value='')))





HBox(children=(IntProgress(value=0, max=40929), HTML(value='')))

  **self._backend_args)


Event ID:  000000088


HBox(children=(IntProgress(value=0), HTML(value='')))




  **self._backend_args)


Event ID:  000000065


HBox(children=(IntProgress(value=0), HTML(value='')))




HBox(children=(IntProgress(value=0, max=41791), HTML(value='')))





HBox(children=(IntProgress(value=0, max=39108), HTML(value='')))




HBox(children=(IntProgress(value=0, max=39029), HTML(value='')))

  **self._backend_args)





HBox(children=(IntProgress(value=0, max=36479), HTML(value='')))

Event ID:  000000092


HBox(children=(IntProgress(value=0), HTML(value='')))





  **self._backend_args)
  **self._backend_args)



Event ID:  000000077


HBox(children=(IntProgress(value=0), HTML(value='')))

Event ID:  000000073


HBox(children=(IntProgress(value=0), HTML(value='')))

  **self._backend_args)


Event ID:  000000069


HBox(children=(IntProgress(value=0), HTML(value='')))




HBox(children=(IntProgress(value=0, max=33161), HTML(value='')))




  **self._backend_args)


Event ID:  000000085


HBox(children=(IntProgress(value=0), HTML(value='')))




HBox(children=(IntProgress(value=0, max=44365), HTML(value='')))




HBox(children=(IntProgress(value=0, max=42211), HTML(value='')))




  **self._backend_args)



Event ID:  000000081


HBox(children=(IntProgress(value=0), HTML(value='')))

  **self._backend_args)


Event ID:  000000089


HBox(children=(IntProgress(value=0), HTML(value='')))




HBox(children=(IntProgress(value=0, max=33015), HTML(value='')))




  **self._backend_args)


Event ID:  000000066


HBox(children=(IntProgress(value=0), HTML(value='')))




HBox(children=(IntProgress(value=0, max=31813), HTML(value='')))




  **self._backend_args)


Event ID:  000000078


HBox(children=(IntProgress(value=0), HTML(value='')))




HBox(children=(IntProgress(value=0, max=35686), HTML(value='')))




HBox(children=(IntProgress(value=0, max=39732), HTML(value='')))




  **self._backend_args)





  **self._backend_args)


Event ID:  000000093


HBox(children=(IntProgress(value=0), HTML(value='')))




HBox(children=(IntProgress(value=0, max=36322), HTML(value='')))

Event ID:  000000074


HBox(children=(IntProgress(value=0), HTML(value='')))




  **self._backend_args)


Event ID:  000000070


HBox(children=(IntProgress(value=0), HTML(value='')))




HBox(children=(IntProgress(value=0, max=37392), HTML(value='')))




  **self._backend_args)


Event ID:  000000086


HBox(children=(IntProgress(value=0), HTML(value='')))




HBox(children=(IntProgress(value=0, max=36026), HTML(value='')))




  **self._backend_args)


Event ID:  000000082


HBox(children=(IntProgress(value=0), HTML(value='')))




HBox(children=(IntProgress(value=0, max=42451), HTML(value='')))




  **self._backend_args)


Event ID:  000000090


HBox(children=(IntProgress(value=0), HTML(value='')))




HBox(children=(IntProgress(value=0, max=36671), HTML(value='')))




  **self._backend_args)


Event ID:  000000067


HBox(children=(IntProgress(value=0), HTML(value='')))




HBox(children=(IntProgress(value=0, max=40144), HTML(value='')))




  **self._backend_args)


Event ID:  000000079


HBox(children=(IntProgress(value=0), HTML(value='')))




HBox(children=(IntProgress(value=0, max=34677), HTML(value='')))




  **self._backend_args)





HBox(children=(IntProgress(value=0, max=40208), HTML(value='')))

Event ID:  000000071


HBox(children=(IntProgress(value=0), HTML(value='')))




  **self._backend_args)


Event ID:  000000075


HBox(children=(IntProgress(value=0), HTML(value='')))




HBox(children=(IntProgress(value=0, max=42226), HTML(value='')))




  **self._backend_args)


Event ID:  000000094


HBox(children=(IntProgress(value=0), HTML(value='')))




HBox(children=(IntProgress(value=0, max=35951), HTML(value='')))




  **self._backend_args)





HBox(children=(IntProgress(value=0, max=42244), HTML(value='')))

Event ID:  000000083


HBox(children=(IntProgress(value=0), HTML(value='')))




HBox(children=(IntProgress(value=0, max=40973), HTML(value='')))





  **self._backend_args)
  **self._backend_args)


Event ID:  000000087


HBox(children=(IntProgress(value=0), HTML(value='')))

Event ID:  000000091


HBox(children=(IntProgress(value=0), HTML(value='')))




HBox(children=(IntProgress(value=0, max=32392), HTML(value='')))




  **self._backend_args)


Event ID:  000000096


HBox(children=(IntProgress(value=0), HTML(value='')))




HBox(children=(IntProgress(value=0, max=44151), HTML(value='')))





HBox(children=(IntProgress(value=0, max=36983), HTML(value='')))

  **self._backend_args)





  **self._backend_args)


Event ID:  000000100


HBox(children=(IntProgress(value=0), HTML(value='')))

Event ID:  000000104


HBox(children=(IntProgress(value=0), HTML(value='')))




HBox(children=(IntProgress(value=0, max=42131), HTML(value='')))




HBox(children=(IntProgress(value=0, max=35984), HTML(value='')))





  **self._backend_args)
  **self._backend_args)


Event ID:  000000095


HBox(children=(IntProgress(value=0), HTML(value='')))

Event ID:  000000108


HBox(children=(IntProgress(value=0), HTML(value='')))




HBox(children=(IntProgress(value=0, max=35194), HTML(value='')))




  **self._backend_args)





HBox(children=(IntProgress(value=0, max=41847), HTML(value='')))




HBox(children=(IntProgress(value=0, max=39129), HTML(value='')))

Event ID:  000000112


HBox(children=(IntProgress(value=0), HTML(value='')))





  **self._backend_args)
  **self._backend_args)


Event ID:  000000116


HBox(children=(IntProgress(value=0), HTML(value='')))

Event ID:  000000097


HBox(children=(IntProgress(value=0), HTML(value='')))




HBox(children=(IntProgress(value=0, max=40886), HTML(value='')))




  **self._backend_args)


Event ID:  000000120


HBox(children=(IntProgress(value=0), HTML(value='')))




HBox(children=(IntProgress(value=0, max=40157), HTML(value='')))





HBox(children=(IntProgress(value=0, max=40931), HTML(value='')))

  **self._backend_args)


Event ID:  000000105


HBox(children=(IntProgress(value=0), HTML(value='')))




  **self._backend_args)





HBox(children=(IntProgress(value=0, max=44935), HTML(value='')))

Event ID:  000000109


HBox(children=(IntProgress(value=0), HTML(value='')))




HBox(children=(IntProgress(value=0, max=40524), HTML(value='')))




  **self._backend_args)



Event ID:  000000101


HBox(children=(IntProgress(value=0), HTML(value='')))

  **self._backend_args)


Event ID:  000000124


HBox(children=(IntProgress(value=0), HTML(value='')))




HBox(children=(IntProgress(value=0, max=39246), HTML(value='')))




  **self._backend_args)


Event ID:  000000113


HBox(children=(IntProgress(value=0), HTML(value='')))




HBox(children=(IntProgress(value=0, max=40902), HTML(value='')))




HBox(children=(IntProgress(value=0, max=42438), HTML(value='')))




  **self._backend_args)





  **self._backend_args)


Event ID:  000000098


HBox(children=(IntProgress(value=0), HTML(value='')))




HBox(children=(IntProgress(value=0, max=35808), HTML(value='')))

Event ID:  000000117


HBox(children=(IntProgress(value=0), HTML(value='')))




  **self._backend_args)


Event ID:  000000121


HBox(children=(IntProgress(value=0), HTML(value='')))




HBox(children=(IntProgress(value=0, max=33983), HTML(value='')))




  **self._backend_args)


Event ID:  000000106


HBox(children=(IntProgress(value=0), HTML(value='')))




HBox(children=(IntProgress(value=0, max=42795), HTML(value='')))




  **self._backend_args)


Event ID:  000000110


HBox(children=(IntProgress(value=0), HTML(value='')))




HBox(children=(IntProgress(value=0, max=43890), HTML(value='')))




HBox(children=(IntProgress(value=0, max=36087), HTML(value='')))





  **self._backend_args)
  **self._backend_args)


Event ID:  000000102


HBox(children=(IntProgress(value=0), HTML(value='')))




HBox(children=(IntProgress(value=0, max=44410), HTML(value='')))




HBox(children=(IntProgress(value=0, max=42473), HTML(value='')))




HBox(children=(IntProgress(value=0, max=41908), HTML(value='')))




  **self._backend_args)







HBox(children=(IntProgress(value=0, max=41980), HTML(value='')))

  **self._backend_args)
  **self._backend_args)


Event ID:  000000114


HBox(children=(IntProgress(value=0), HTML(value='')))

Event ID:  000000118


HBox(children=(IntProgress(value=0), HTML(value='')))


Event ID:  000000099


HBox(children=(IntProgress(value=0), HTML(value='')))

  **self._backend_args)


Event ID:  000000122


HBox(children=(IntProgress(value=0), HTML(value='')))




HBox(children=(IntProgress(value=0, max=37155), HTML(value='')))




  **self._backend_args)


Event ID:  000000107


HBox(children=(IntProgress(value=0), HTML(value='')))




HBox(children=(IntProgress(value=0, max=41126), HTML(value='')))




  **self._backend_args)


Event ID:  000000111


HBox(children=(IntProgress(value=0), HTML(value='')))




HBox(children=(IntProgress(value=0, max=37751), HTML(value='')))




HBox(children=(IntProgress(value=0, max=36335), HTML(value='')))




HBox(children=(IntProgress(value=0, max=35240), HTML(value='')))





  **self._backend_args)
  **self._backend_args)





  **self._backend_args)


Event ID:  000000103


HBox(children=(IntProgress(value=0), HTML(value='')))

Event ID:  000000115


HBox(children=(IntProgress(value=0), HTML(value='')))

Event ID:  000000123


HBox(children=(IntProgress(value=0), HTML(value='')))




HBox(children=(IntProgress(value=0, max=38174), HTML(value='')))




HBox(children=(IntProgress(value=0, max=35827), HTML(value='')))





  **self._backend_args)
  **self._backend_args)


Event ID:  000000119


HBox(children=(IntProgress(value=0), HTML(value='')))




HBox(children=(IntProgress(value=0, max=42132), HTML(value='')))




  **self._backend_args)





HBox(children=(IntProgress(value=0, max=43499), HTML(value='')))




  **self._backend_args)





HBox(children=(IntProgress(value=0, max=38564), HTML(value='')))




  **self._backend_args)





HBox(children=(IntProgress(value=0, max=36329), HTML(value='')))




  **self._backend_args)





HBox(children=(IntProgress(value=0, max=30912), HTML(value='')))




HBox(children=(IntProgress(value=0, max=34724), HTML(value='')))




  **self._backend_args)





  **self._backend_args)


In [9]:
event_ids = [ i for i in range(0,125) ]
submissions = []
for i,event_id in enumerate(event_ids):
    submission  = pd.read_csv('./%09d_dbscan_e_36'%event_id, compression='gzip')
    submissions.append(submission)

# Create submission file
submission = pd.concat(submissions, axis=0)
submission.to_csv('20180704_dbscan_e_36.csv.gz', index=False, compression='gzip')
print(len(submission))

13741466


In [7]:
model = Clusterer(eps=0.0040)
labels = model.predict(hits)

HBox(children=(IntProgress(value=0), HTML(value='')))

HBox(children=(IntProgress(value=0, max=37118), HTML(value='')))

In [8]:
submission = create_one_event_submission(0, hits, labels)
score = score_event(truth, submission)
print("Your score: ", score)

Your score:  0.4221169878368154


In [10]:
dataset_submissions = []
dataset_scores = []
for event_id, hits, cells, particles, truth in load_dataset(path_to_train, skip=50, nevents=5):
    # Track pattern recognition
    model = Clusterer(eps=0.0035)
    labels = model.predict(hits)

    # Prepare submission for an event
    one_submission = create_one_event_submission(event_id, hits, labels)
    dataset_submissions.append(one_submission)

    # Score for the event
    score = score_event(truth, one_submission)
    dataset_scores.append(score)

    print("Score for event %d: %.8f" % (event_id, score))
print('Mean score: %.8f' % (np.mean(dataset_scores)))

HBox(children=(IntProgress(value=0), HTML(value='')))

HBox(children=(IntProgress(value=0, max=41724), HTML(value='')))

Score for event 1050: 0.43917525


HBox(children=(IntProgress(value=0), HTML(value='')))

HBox(children=(IntProgress(value=0, max=42910), HTML(value='')))

Score for event 1051: 0.43411798


HBox(children=(IntProgress(value=0), HTML(value='')))

HBox(children=(IntProgress(value=0, max=34739), HTML(value='')))

Score for event 1052: 0.43377265


HBox(children=(IntProgress(value=0), HTML(value='')))

HBox(children=(IntProgress(value=0, max=43095), HTML(value='')))

Score for event 1053: 0.42372599


HBox(children=(IntProgress(value=0), HTML(value='')))

HBox(children=(IntProgress(value=0, max=35134), HTML(value='')))

Score for event 1054: 0.45065105
Mean score: 0.43628858


In [4]:
path_to_test = "./data/test"
start = 0

def create_test_submissions(path_to_test = "./data/test", start=0):
    test_dataset_submissions = []
    create_submission = True # True for submission 
    if create_submission:
        for event_id, hits, cells in load_dataset(path_to_test, parts=['hits', 'cells']):
            if event_id >= start:
                print('Event ID: ', event_id)
                
                # Track pattern recognition 
                model = Clusterer(eps=0.00345)
                labels = model.predict(hits)

                # Prepare submission for an event
                one_submission = create_one_event_submission(event_id, hits, labels)
                one_submission.to_csv('./%09d.helix.csv.gz'%event_id, index=False, compression='gzip')
                test_dataset_submissions.append(one_submission)
        
        # Create submission file
        submission = pd.concat(test_dataset_submissions, axis=0)
        
        return submission

In [5]:
_ = create_test_submissions(start=0)

Event ID:  0


HBox(children=(IntProgress(value=0), HTML(value='')))




HBox(children=(IntProgress(value=0, max=39572), HTML(value='')))


Event ID:  1


HBox(children=(IntProgress(value=0), HTML(value='')))




HBox(children=(IntProgress(value=0, max=40884), HTML(value='')))


Event ID:  2


HBox(children=(IntProgress(value=0), HTML(value='')))




HBox(children=(IntProgress(value=0, max=39050), HTML(value='')))


Event ID:  3


HBox(children=(IntProgress(value=0), HTML(value='')))




HBox(children=(IntProgress(value=0, max=40642), HTML(value='')))


Event ID:  4


HBox(children=(IntProgress(value=0), HTML(value='')))




HBox(children=(IntProgress(value=0, max=43189), HTML(value='')))


Event ID:  5


HBox(children=(IntProgress(value=0), HTML(value='')))




HBox(children=(IntProgress(value=0, max=39514), HTML(value='')))


Event ID:  6


HBox(children=(IntProgress(value=0), HTML(value='')))




HBox(children=(IntProgress(value=0, max=36642), HTML(value='')))


Event ID:  7


HBox(children=(IntProgress(value=0), HTML(value='')))




HBox(children=(IntProgress(value=0, max=39086), HTML(value='')))


Event ID:  8


HBox(children=(IntProgress(value=0), HTML(value='')))




HBox(children=(IntProgress(value=0, max=42312), HTML(value='')))


Event ID:  9


HBox(children=(IntProgress(value=0), HTML(value='')))




HBox(children=(IntProgress(value=0, max=41275), HTML(value='')))


Event ID:  10


HBox(children=(IntProgress(value=0), HTML(value='')))




HBox(children=(IntProgress(value=0, max=39280), HTML(value='')))


Event ID:  11


HBox(children=(IntProgress(value=0), HTML(value='')))




HBox(children=(IntProgress(value=0, max=39907), HTML(value='')))


Event ID:  12


HBox(children=(IntProgress(value=0), HTML(value='')))




HBox(children=(IntProgress(value=0, max=38400), HTML(value='')))


Event ID:  13


HBox(children=(IntProgress(value=0), HTML(value='')))




HBox(children=(IntProgress(value=0, max=45107), HTML(value='')))


Event ID:  14


HBox(children=(IntProgress(value=0), HTML(value='')))




HBox(children=(IntProgress(value=0, max=38263), HTML(value='')))


Event ID:  15


HBox(children=(IntProgress(value=0), HTML(value='')))




HBox(children=(IntProgress(value=0, max=35275), HTML(value='')))


Event ID:  16


HBox(children=(IntProgress(value=0), HTML(value='')))




HBox(children=(IntProgress(value=0, max=36391), HTML(value='')))


Event ID:  17


HBox(children=(IntProgress(value=0), HTML(value='')))




HBox(children=(IntProgress(value=0, max=42914), HTML(value='')))


Event ID:  18


HBox(children=(IntProgress(value=0), HTML(value='')))




HBox(children=(IntProgress(value=0, max=38061), HTML(value='')))


Event ID:  19


HBox(children=(IntProgress(value=0), HTML(value='')))




HBox(children=(IntProgress(value=0, max=35645), HTML(value='')))


Event ID:  20


HBox(children=(IntProgress(value=0), HTML(value='')))




HBox(children=(IntProgress(value=0, max=29730), HTML(value='')))


Event ID:  21


HBox(children=(IntProgress(value=0), HTML(value='')))




HBox(children=(IntProgress(value=0, max=38746), HTML(value='')))


Event ID:  22


HBox(children=(IntProgress(value=0), HTML(value='')))




HBox(children=(IntProgress(value=0, max=44799), HTML(value='')))


Event ID:  23


HBox(children=(IntProgress(value=0), HTML(value='')))




HBox(children=(IntProgress(value=0, max=39246), HTML(value='')))


Event ID:  24


HBox(children=(IntProgress(value=0), HTML(value='')))




HBox(children=(IntProgress(value=0, max=41491), HTML(value='')))


Event ID:  25


HBox(children=(IntProgress(value=0), HTML(value='')))

KeyboardInterrupt: 

In [8]:
event_ids = [ i for i in range(0,125) ]
submissions = []
for i,event_id in enumerate(event_ids):
    submission  = pd.read_csv('./%09d.helix.csv.gz'%event_id, compression='gzip')
    submissions.append(submission)

# Create submission file
submission = pd.concat(submissions, axis=0)
submission.to_csv('20180701_dbscan_e_345.csv.gz', index=False, compression='gzip')
print(len(submission))

13741466
