# Simple LSTM for track finding on 1D detector layers

In [1]:
# System imports
from __future__ import print_function

# External imports
import numpy as np
from keras import models
from keras import layers

# Local imports
from data import (generate_straight_track, generate_straight_tracks,
                  generate_uniform_noise, generate_track_bkg)
from drawing import draw_event, draw_input_and_pred

from matplotlib import pyplot as plt
%matplotlib notebook

Using TensorFlow backend.


## Data generation

In [2]:
# Config parameters
det_width = 50
det_depth = 50
det_shape = (det_depth, det_width)
num_events = 100000

In [3]:
# Generate a sample of single-track events
tracks = generate_straight_tracks(num_events, det_shape)
train_input = tracks[:,:-1,:]
train_target = tracks[:,1:,:]
print(train_input.shape)

(100000, 49, 50)


## Simple LSTM track fitting model

In [4]:
def build_model(num_hidden=10, length=det_depth-1, dim=det_width,
                loss='categorical_crossentropy',
                optimizer='Nadam', metrics=['accuracy']):
    inputs = layers.Input(shape=(length, dim))
    hidden = layers.LSTM(output_dim=num_hidden, return_sequences=True)(inputs)
    outputs = layers.TimeDistributed(layers.Dense(dim, activation='softmax'))(hidden)
    model = models.Model(input=inputs, output=outputs)
    model.compile(loss=loss, optimizer=optimizer, metrics=metrics)
    return model

In [9]:
# Instantiate the model
model1 = build_model()
# Train on the entire training set
%time model1.fit(train_input, train_target, batch_size=500, nb_epoch=10)
# Get all of the training data predictions
%time train_pred = model1.predict(train_input)

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
CPU times: user 4min 37s, sys: 23.9 s, total: 5min
Wall time: 2min 48s
CPU times: user 33.6 s, sys: 838 ms, total: 34.4 s
Wall time: 35.5 s


In [33]:
# Display one sample from the training set
display_idx = 72
draw_input_and_pred(train_input[display_idx], train_pred[display_idx])

<IPython.core.display.Javascript object>

## Two-track events

In [35]:
# Try adding two tracks into one image and see how the model does without any retraining
t1 = generate_straight_track(det_shape)
t2 = generate_straight_track(det_shape)
t3 = t1 + t2
t3_input = np.expand_dims(t3, 0)[:,:-1,:]
t3_pred = model1.predict(t3_input)

# Plot the event and the corresponding prediction
draw_input_and_pred(t3, t3_pred[0])

<IPython.core.display.Javascript object>

In [11]:
# Now lets generate events with 2 tracks each and train on that
events2 = sum([generate_straight_tracks(50000, det_shape) for i in range(2)])
train2_input = events2[:,:-1,:]
train2_target = events2[:,1:,:]

In [12]:
# Create a new model
model2 = build_model(num_hidden=20)
%time model2.fit(train2_input, train2_target, batch_size=500, nb_epoch=10)
train2_pred = model2.predict(train2_input)

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
CPU times: user 2min 13s, sys: 11.7 s, total: 2min 25s
Wall time: 1min 22s


In [36]:
# Display one sample from the training set
display_idx = 666
draw_input_and_pred(events2[display_idx], train2_pred[display_idx])

NameError: name 'events2' is not defined

## Single tracks with uniform noise
In training, the inputs to the network are the events with the tracks and noise. However, in this case the target is the not same event, but rather the track by itself (still shifted by 1 index). Let's see if we can get this to work.

In [15]:
noise = generate_uniform_noise(len(tracks), det_shape, skip_layers=10)
events3 = tracks + noise
events3[events3 > 1] = 1

# Define the inputs and target
train3_input = events3[:,:-1,:]
train3_target = tracks[:,1:,:]

# Let's take a quick look at one of these noise events
plt.figure()
draw_event(events3[66], title='Track with uniform noise')

<IPython.core.display.Javascript object>

In [19]:
# Instantiate the model
model3 = build_model(num_hidden=50)
# Train the network
%time model3.fit(train3_input, train3_target, batch_size=500, nb_epoch=10)
# Get the final predictions from the entire training set
train3_pred = model3.predict(train3_input)

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
CPU times: user 4min 30s, sys: 23.6 s, total: 4min 54s
Wall time: 2min 49s


In [37]:
display_idx = 12050
draw_input_and_pred(events3[display_idx], train3_pred[display_idx])

<IPython.core.display.Javascript object>

let's try to make it a little more difficult by decreasing the seed size and increasing the noise level

In [23]:
noise = generate_uniform_noise(len(tracks), det_shape, skip_layers=5, prob=0.2)
events4 = tracks + noise
train4_input = events4[:,:-1,:]
train4_target = tracks[:,1:,:]
plt.figure()
draw_event(events4[10])

<IPython.core.display.Javascript object>

In [24]:
model4 = build_model(num_hidden=50)
%time model4.fit(train4_input, train4_target, batch_size=500, nb_epoch=10)
train4_pred = model4.predict(train4_input)

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


In [43]:
display_idx = 444
draw_input_and_pred(events4[display_idx], train4_pred[display_idx])

display_idx = 10
draw_input_and_pred(events4[display_idx], train4_pred[display_idx])

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

## Multi-track background

Now let's try the scenario where each event has background tracks in addition to the signal track.

In [53]:
# Generate the bkg data
bkgs = generate_track_bkg(tracks.shape[0], det_shape, tracks_per_event=2)
events5 = bkgs + tracks
events5[events5 > 1] = 1
train5_input = events5[:,:-1,:]
train5_target = tracks[:,1:,:]

In [62]:
model5 = build_model(num_hidden=50)
%time model5.fit(train5_input, train5_target, batch_size=200, nb_epoch=10)
train5_pred = model5.predict(train5_input)

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
CPU times: user 9min 56s, sys: 58.6 s, total: 10min 55s
Wall time: 6min 11s


In [64]:
display_idx = 1234
draw_input_and_pred(events5[display_idx], train5_pred[display_idx])

display_idx = 5432
draw_input_and_pred(events5[display_idx], train5_pred[display_idx])

display_idx = 666
draw_input_and_pred(events5[display_idx], train5_pred[display_idx])

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>