In [80]:
import os
import math
import multiprocessing
import random
import functools
import time

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import matplotlib.ticker as ticker
from mpl_toolkits import mplot3d
import seaborn as sns

from pandas import plotting

from sklearn.preprocessing import StandardScaler, MinMaxScaler
from sklearn.cluster import DBSCAN
from sklearn.svm import SVR

from sklearn.model_selection import train_test_split
from sklearn.model_selection import GridSearchCV
from sklearn import linear_model

from trackml.dataset import load_event, load_dataset
from trackml.randomize import shuffle_hits
from trackml.score import score_event

%matplotlib inline

In [2]:
def read_dataset_filenames_from_dir(path_to_datasets_dir):
    dataset_filenames = os.listdir(path_to_datasets_dir)
    event_filenames = {}
    for filename in dataset_filenames:
        path_to_file = os.path.join(path_to_datasets_dir, filename)
        event_id = filename[5:14]
        if event_id not in event_filenames:
            event_filenames[event_id] = [path_to_file]
        else:
            event_filenames[event_id].append(path_to_file)
    return event_filenames

In [3]:
def select_events(indexes_list, event_names):
    return tuple(event_names[i] for i in indexes_list)

In [4]:
def random_events_select(event_names, subset_size):
    event_names_len = len(event_names)
    indexes = select_random_indexses_subset(event_names_len, subset_size)
    return select_events(indexes, event_names)

In [5]:
def offset_events_select(event_names, subset_size, offset):
    event_names_len = len(event_names)
    indexes = select_offset_indexses_subset(event_names_len, subset_size, offset)
    return select_events(indexes, event_names)

In [6]:
def select_random_indexses_subset(size, subset_size):
    return random.sample(tuple(range(size)), subset_size) 

In [7]:
def select_random_indexses_subset(size, subset_size):
    return random.sample(tuple(range(size)), subset_size) 

In [8]:
def select_offset_indexses_subset(size, subset_size, offset):
    return tuple(range(size))[offset:offset + subset_size]

In [9]:
def load_events(event_ids, event_dir):
    events = {}
    for event_id in event_ids:
        hits, cells, particles, truth = load_event('../input/{}/event{}'.format(event_dir, event_id))
        events[event_id] = {
            'hits': hits,
            'cells': cells,
            'particles': particles,
            'truth': truth
        }
    return events

In [10]:
def remove_particles_with_zero_id(particles):
    return particles.loc[particles['particle_id'] != 0, :]

In [11]:
def remove_particles_with_zero_hits(particles):
    return particles.loc[particles['nhits'] != 0, :]

In [12]:
def remove_zero_particles(particles):
    #return particles.loc[(particles['particle_id'] != 0) and (particles['nhits'] != 0), :]
    not_zero_id_particles = remove_particles_with_zero_id(particles)
    #return remove_particles_with_zero_hits(not_zero_id_particles)
    return not_zero_id_particles

In [13]:
def clean_event(event):
    cleaned_event = {}
    cleaned_event['particles'] = remove_zero_particles(event['particles'])
    cleaned_event['hits'] = event['hits']
    cleaned_event['cells'] = event['cells']
    cleaned_event['truth'] = event['truth']
    return cleaned_event

In [14]:
def clean_events(events):
    cleaned_events = {}
    return {event_id: clean_event(event) for event_id, event in events.items()}        

In [15]:
def extract_particle_truth(particle_id, truth):
    return truth[truth['particle_id'] == particle_id]

In [16]:
def get_particle_truth_hits_id(particle_truth):
    return particle_truth['hit_id'].tolist()
    #return particle_truth['hit_id']

In [17]:
def extract_particle_hits(particle_hit_ids, hits):
    #return hits[hits['hit_id'].reset_index(drop=True) == particle_hit_ids.reset_index(drop=True)]
    return hits[hits['hit_id'].isin(particle_hit_ids)]

In [18]:
def get_particle_truth_path(particle_truth):
    return tuple([tuple(point[1]) for point in particle_truth.loc[:, ('tx', 'ty', 'tz')].iterrows()])

In [19]:
def get_particle_truth_momentums(particle_truth):
    return particle_truth.loc[:, ('tpx', 'tpy', 'tpz')]

In [20]:
def get_particle_hits_path(particle_hits):
    return particle_hits.loc[:, ('x', 'y', 'z')]

In [21]:
def compute_particle_path_length(particle_path):
    path_as_vectors = (
        end_point - begin_point for end_point, begin_point in zip(
        particle_path[1:].values, particle_path[:-1].values
        )
    )
    return sum(math.sqrt(np.dot(vector, vector)) for vector in path_as_vectors)

In [22]:
def check_point_closenes(first_point, second_point, eps=0.01):
    return all(
        [
            abs( abs(first_point_coord / second_point_coord) - 1) <= eps \
            for first_point_coord, second_point_coord  in zip(first_point, second_point)
        ]
    )

In [23]:
def all_point_occurences(point, df, point_columns, eps=0.01):
    coordinates_df = df[point_columns]
    point_occurences = []
    for i, coordinates in coordinates_df.iterrows():
        if check_point_closenes(point, coordinates, eps=eps):
            point_occurences.append((i, coordinates))
    return point_occurences

In [24]:
def find_first_point_occurence(point, df, point_columns, eps=0.01):
    coordinates_df = df[point_columns]
    point_occurence = None
    for i, coordinates in coordinates_df.iterrows():
        if check_point_closenes(point, coordinates, eps=eps):
            point_occurence = (i, coordinates)
            break;
    return point_occurence

In [25]:
def extract_all_event_truth_patches(event_truth, batch_size):
    all_event_truth_patches = []
    unique_partile_ids = set(event_truth['particle_id'].tolist())
    total_patches_num  = len(unique_partile_ids)
    for particle_id in unique_partile_ids:
        number_current_added_pathes = batch_size * len(all_event_truth_patches)
        non_proceed_patches_num = total_patches_num - number_current_added_pathes
        if non_proceed_patches_num >= batch_size:
            iterations_num = batch_size 
        else:
            iterations_num = non_proceed_patches_num
        batch = []
        for i in range(iterations_num):
            particle_truth = extract_particle_truth(particle_id, event_truth)
            batch.append(get_particle_truth_path(particle_truth))
        all_event_truth_patches.append(tuple(batch))
    return tuple(all_event_truth_patches)

In [26]:
def extract_all_event_truth_patches(event_truth):
    all_event_truth_patches = []
    unique_particle_ids = set(event_truth['particle_id'].tolist())
    for particle_id in unique_particle_ids:
        particle_truth = extract_particle_truth(particle_id, event_truth)
        all_event_truth_patches.append(get_particle_truth_path(particle_truth))
    return tuple(all_event_truth_patches)

In [27]:
def extract_all_event_truth_patches(event_truth):
    unique_particle_ids = set(event_truth['particle_id'].tolist())
    return (
        get_particle_truth_path(
            extract_particle_truth(particle_id, event_truth)
        ) for particle_id in unique_particle_ids
    )

In [28]:
def extract_all_event_truth_patches(event_truth):
    unique_particle_ids = set(event_truth['particle_id'].tolist())
    return tuple([
        get_particle_truth_path(
            extract_particle_truth(particle_id, event_truth)
        ) for particle_id in unique_particle_ids
    ])

In [29]:
def find_closenest_path(truth_path, event_hits):
    closenest_path = []
    for point in truth_path:
        point_occurence = find_first_point_occurence(point, event_hits, ['x', 'y', 'z'], eps=0.1)
        if point_occurence:
            closenest_path.append(point_occurence[1])
    return closenest_path

In [81]:
def find_all_closenest_patches(truth_patches, event_hits):
    print("enter in find_all_closenest_patches")
    all_closenest_patches = []
    start_time = time.time()
    for truth_path in truth_patches:
        all_closenest_patches.append(find_closenest_path(truth_path, event_hits))
    print("closenest pathces found or not found")
    print("elapsed time: {}".format(time.time() - start_time))
    return all_closenest_pathes

In [79]:
def create_closenest_patches_detector(event_hits):
    def process_pathes_chunk(truth_pathes_chunk):
        print("enter in proecess_pathes_chunk")
        all_closenest_patches = []
        for truth_path in truth_patches_chunk:
            all_closenest_patches.append(find_closenest_path(truth_path, event_hits))
        print("process_patches_chunk: patches proceed")
        return all_closenest_patches
    return process_pathes_chunk

In [69]:
def dummy_count_patches_num(patches):
    i = 0
    print("dummy_count, i before", i)
    for path in patches:
        i += 1
    print("dummy_count, i after", i)
    return i

In [56]:
def divide_pathces_in_chunks(patches_list, num_of_chunks):
    chunk_size = len(patches_list) // num_of_chunks
    return tuple([patches_list[i * chunk_size:(i + 1) * chunk_size] for i in range(num_of_chunks)])

In [31]:
event_filenames = read_dataset_filenames_from_dir('../input/train_1/')

In [32]:
event_ids = offset_events_select(list(event_filenames.keys()), 4, 0)

In [33]:
event_ids

('000001488', '000002134', '000001343', '000002668')

In [34]:
events = load_events(event_ids, 'train_1')

In [35]:
cleaned_events = clean_events(events)

In [36]:
event0 = list(cleaned_events.items())[0]

In [37]:
event0_data = event0[1]

In [38]:
event0_truth = event0_data['truth']
event0_hits = event0_data['hits']
event0_particles = event0_data['particles']

In [39]:
event0_all_pathes = extract_all_event_truth_patches(event0_truth)

In [70]:
#print(event0_all_pathes)

In [41]:
event1 = list(cleaned_events.items())[1]

In [42]:
event1_data = event1[1]

In [43]:
event1_truth = event1_data['truth']
event1_hits = event1_data['hits']
event1_particles = event1_data['particles']

In [44]:
#all_closenest_patches = find_all_closenest_patches(event0_all_pathes, event1_hits)

In [48]:
pool_size = multiprocessing.cpu_count()

In [63]:
event0_patches_divided_to_chunks = divide_pathces_in_chunks(event0_all_pathes, pool_size)

In [71]:
pool = multiprocessing.Pool(processes=pool_size)
pool_outputs = pool.map(dummy_count_patches_num, event0_patches_divided_to_chunks)
pool.close()
pool.join()

dummy_count, i before 0
dummy_count, i before 0
dummy_count, i after 2033
dummy_count, i after 2033
dummy_count, i before 0
dummy_count, i after 2033
dummy_count, i before 0
dummy_count, i after 2033


In [72]:
print(pool_outputs)

[2033, 2033, 2033, 2033]


In [82]:
#closenest_patches_detector = create_closenest_patches_detector(event1_hits)
closenest_patches_detector = functools.partial(find_all_closenest_patches, event_hits=event1_hits)

In [84]:
pool = multiprocessing.Pool(processes=pool_size)
pool_outputs = pool.map(closenest_patches_detector, event0_patches_divided_to_chunks)
pool.close()
pool.join()