# Simple LSTM for track finding on 1D detector layers

In [12]:
# System imports
from __future__ import print_function

# External imports
import numpy as np
from keras import models
from keras import layers

# Local imports
from data import (generate_straight_track, generate_straight_tracks,
                  generate_uniform_noise, generate_track_bkg)
from drawing import draw_2d_event, draw_2d_input_and_pred

from matplotlib import pyplot as plt
%matplotlib notebook

## Data generation

In [14]:
# Config parameters
det_width = 50
det_depth = 50
det_shape = (det_depth, det_width)
num_events = 100000

In [15]:
# Generate a sample of single-track events
tracks = generate_straight_tracks(num_events, det_shape)
train_input = tracks
train_target = tracks
print(train_input.shape)

(100000, 50, 50)


## Simple LSTM track fitting model

In [22]:
def build_model(num_hidden=10, length=det_depth, dim=det_width,
                loss='categorical_crossentropy',
                optimizer='Nadam', metrics=['accuracy']):
    inputs = layers.Input(shape=(length, dim))
    hidden = layers.LSTM(output_dim=num_hidden, return_sequences=True)(inputs)
    outputs = layers.TimeDistributed(layers.Dense(dim, activation='softmax'))(hidden)
    model = models.Model(input=inputs, output=outputs)
    model.compile(loss=loss, optimizer=optimizer, metrics=metrics)
    return model

In [5]:
# Instantiate the model
model1 = build_model()
# Train on the entire training set
model1.fit(train_input, train_target, batch_size=500, nb_epoch=10)
# Get all of the training data predictions
train_pred = model1.predict(train_input)

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


In [12]:
# Display one sample from the training set
display_idx = 10
draw_2d_input_and_pred(train_input[display_idx], train_pred[display_idx])

<IPython.core.display.Javascript object>

## Two-track events

In [7]:
# Try adding two tracks into one image and see how the model does without any retraining
t1 = generate_straight_track(det_shape)
t2 = generate_straight_track(det_shape)
t3 = t1 + t2
t3_input = np.expand_dims(t3, 0)
t3_pred = model1.predict(t3_input)

# Plot the event and the corresponding prediction
draw_2d_input_and_pred(t3, t3_pred[0])

<IPython.core.display.Javascript object>

In [8]:
# Now lets generate events with 2 tracks each and train on that
events2 = sum([generate_straight_tracks(50000, det_shape) for i in range(2)])
train2_input = events2
train2_target = events2

In [9]:
# Create a new model
model2 = build_model(num_hidden=20)
model2.fit(train2_input, train2_target, batch_size=500, nb_epoch=10)
train2_pred = model2.predict(train2_input)

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


In [10]:
# Display one sample from the training set
display_idx = 666
draw_2d_input_and_pred(events2[display_idx], train2_pred[display_idx])

<IPython.core.display.Javascript object>

## Single tracks with uniform noise
In training, the inputs to the network are the events with the tracks and noise. However, in this case the target is the not same event, but rather the track by itself (still shifted by 1 index). Let's see if we can get this to work.

In [14]:
noise = generate_uniform_noise(len(tracks), det_shape, skip_layers=10)
events3 = tracks + noise
events3[events3 > 1] = 1

# Define the inputs and target
train3_input = events3
train3_target = tracks

# Let's take a quick look at one of these noise events
plt.figure()
draw_2d_event(events3[66], title='Track with uniform noise')

<IPython.core.display.Javascript object>

In [15]:
# Instantiate the model
model3 = build_model(num_hidden=50)
# Train the network
model3.fit(train3_input, train3_target, batch_size=500, nb_epoch=10)
# Get the final predictions from the entire training set
train3_pred = model3.predict(train3_input)

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


In [16]:
display_idx = 12050
draw_2d_input_and_pred(events3[display_idx], train3_pred[display_idx])

<IPython.core.display.Javascript object>

let's try to make it a little more difficult by decreasing the seed size and increasing the noise level

In [17]:
noise = generate_uniform_noise(len(tracks), det_shape, skip_layers=5, prob=0.2)
events4 = tracks + noise
train4_input = events4
train4_target = tracks
plt.figure()
draw_2d_event(events4[10])

<IPython.core.display.Javascript object>

In [18]:
model4 = build_model(num_hidden=50)
model4.fit(train4_input, train4_target, batch_size=500, nb_epoch=10)
train4_pred = model4.predict(train4_input)

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


In [19]:
display_idx = 444
draw_2d_input_and_pred(events4[display_idx], train4_pred[display_idx])

display_idx = 10
draw_2d_input_and_pred(events4[display_idx], train4_pred[display_idx])

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

## Multi-track background

Now let's try the scenario where each event has background tracks in addition to the signal track.

In [None]:
# Generate the bkg data
bkgs = generate_track_bkg(tracks.shape[0], det_shape, tracks_per_event=5)
events5 = bkgs + tracks
events5[events5 > 1] = 1
train5_input = events5
train5_target = tracks

In [None]:
model5 = build_model(num_hidden=100)
model5.fit(train5_input, train5_target, batch_size=200, nb_epoch=10)
train5_pred = model5.predict(train5_input)

In [None]:
display_idx = 1234
draw_2d_input_and_pred(events5[display_idx], train5_pred[display_idx])
display_idx = 5432
draw_2d_input_and_pred(events5[display_idx], train5_pred[display_idx])
display_idx = 666
draw_2d_input_and_pred(events5[display_idx], train5_pred[display_idx])

## Smoother LSTM
Here's a model which does a forward LSTM pass followed by a reverse pass over the outputs.
The goal is to refine and smooth the estimate.

In [32]:
def reverse_func(x):
    import keras.backend as K
    assert K.ndim(x) == 3, "Should be a 3D tensor."
    rev = K.permute_dimensions(x, (1, 0, 2))[::-1]
    return K.permute_dimensions(rev, (1, 0, 2))

def build_smooth_model(num_hidden=10, length=det_depth, dim=det_width,
                       loss='categorical_crossentropy',
                       optimizer='Nadam', metrics=['accuracy']):
    inputs = layers.Input(shape=(length, dim))
    # Forward pass LSTM + FC
    hidden = layers.LSTM(output_dim=num_hidden, return_sequences=True)(inputs)
    hidden = layers.TimeDistributed(layers.Dense(dim, activation='softmax'))(hidden)
    # Backwards pass LSTM + FC smoother
    hidden = layers.LSTM(output_dim=num_hidden, return_sequences=True, go_backwards=True)(hidden)
    # Reverse sequence order to match input order
    hidden = layers.Lambda(reverse_func)(hidden)
    outputs = layers.TimeDistributed(layers.Dense(dim, activation='softmax'))(hidden)
    model = models.Model(input=inputs, output=outputs)
    model.compile(loss=loss, optimizer=optimizer, metrics=metrics)
    return model

In [33]:
train6_input = events5
train6_target = tracks

In [34]:
model6 = build_smooth_model(num_hidden=100)
model6.summary()

____________________________________________________________________________________________________
Layer (type)                     Output Shape          Param #     Connected to                     
input_4 (InputLayer)             (None, 50, 50)        0                                            
____________________________________________________________________________________________________
lstm_7 (LSTM)                    (None, 50, 100)       60400       input_4[0][0]                    
____________________________________________________________________________________________________
timedistributed_7 (TimeDistribute(None, 50, 50)        5050        lstm_7[0][0]                     
____________________________________________________________________________________________________
lstm_8 (LSTM)                    (None, 50, 100)       60400       timedistributed_7[0][0]          
___________________________________________________________________________________________

In [35]:
model6.fit(train6_input, train6_target, batch_size=200, nb_epoch=10)
train6_pred = model6.predict(train6_input)

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


In [36]:
display_idx = 1234
draw_2d_input_and_pred(train6_input[display_idx], train6_pred[display_idx])
display_idx = 5432
draw_2d_input_and_pred(train6_input[display_idx], train6_pred[display_idx])
display_idx = 666
draw_2d_input_and_pred(train6_input[display_idx], train6_pred[display_idx])

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>