# About
This notebook shows how Machine Learning helps to **reduce number of ghosts**.

In [1]:
%matplotlib inline
import matplotlib.pyplot as plt

import pandas
import numpy

# Input data

In [2]:
data = pandas.read_csv('hits_1000.csv', index_col=False)
#data = data[data.event.values < 100]

data.head()

Unnamed: 0,event,particle,layer,iphi,x,y
0,0,0,5,39276,55.103343,-401.233874
1,0,6,5,22685,-381.682239,135.438799
2,0,3,3,6082,160.995866,139.460859
3,0,5,2,27787,-35.433651,-150.895515
4,0,5,1,15230,-19.62735,-82.702885


# Split Data into Train/Test Samples

In [3]:
from sklearn.cross_validation import train_test_split

event_ids = numpy.unique(data.event.values)

event_ids_train, event_ids_test = train_test_split(event_ids, 
                                                   test_size=0.5, 
                                                   random_state=42)

data_train = data[data.event.isin(event_ids_train)]
data_test = data[data.event.isin(event_ids_test)]

# Hough Transform with Tracks Classification

## Data Preparation

In [4]:
X_train = data_train[[u'event', u'layer', u'iphi', u'x', u'y']].values
y_train = data_train[u'particle'].values

X_test = data_test[[u'event', u'layer', u'iphi', u'x', u'y']].values
y_test = data_test[u'particle'].values

## Selection of a base track pattern recognition method and a classifier

In [5]:
from sklearn.ensemble import RandomForestClassifier
from hough import Hough

clf = RandomForestClassifier(n_estimators=1000)
base = Hough(n_theta_bins=5000, n_radius_bins=1000, min_radius=20., min_hits=4)

## Track Pattern Recognition

In [6]:
from hough_classification import HoughClassification

mh = HoughClassification(base=base, 
                   classifier=clf, 
                   proba_threshold=0.8)

mh.fit(X_train, y_train)
y_reco = mh.predict(X_test)

## Quality metrics

In [7]:
from metrics import RecognitionQuality

rq = RecognitionQuality(track_eff_threshold=0.8, min_hits_per_track=4)
report_event, report_tracks = rq.calculate(X_test, y_test, y_reco)

In [8]:
report_event.mean(axis=0)

Event                       484.638000
ReconstructionEfficiency      0.940794
GhostRate                     0.012362
CloneRate                     0.000949
AvgTrackEfficiency            0.987292
dtype: float64