In [1]:
# Standard imports
import sys
import itertools
import glob

# Package imports
import numpy as np
import pandas as pd
from keras import models, layers
import matplotlib.pyplot as plt

# Local imports
import utils
import drawing

%matplotlib inline
np.random.seed(2341)

Using Theano backend.
Using cuDNN version 5105 on context None
Mapped name None to device cuda0: GeForce GTX 1080 (0000:04:00.0)


In [2]:
hits_row_names = ["event_id", "track_id", "i_r", "i_phi", "x", "y"]
particles_row_names = ["event_id", "p_id", "pt", "phi", "vx", "vy"]
n_r_bins = 9
n_phi_bins = 1000
max_tracks = 25
pt_scale = 2*np.pi/20000 # for loss function

def get_phi(x, y):
    return np.arctan2(y, x)

def discretize(evt, phiwidth):
    """
    evt: pandas dataframe holding hit information for one event
    phiwidth: size of pixel in phi direction
    
    Converts the hit information in the event into a numpy array with
    hits represented by 1's.
    """
    
    r_grid = np.arange(n_r_bins)
    phi_grid = np.arange(0., 2*np.pi, phiwidth)
    image = np.zeros((1, len(r_grid),len(phi_grid)))
    try:
        for hit in evt.itertuples():
            ir = hit[hits_row_names.index('i_r')]
            x = hit[hits_row_names.index('x')]
            y = hit[hits_row_names.index('y')]
            phi = get_phi(x, y)
            iphi = int((phi/phiwidth))
            image[0,ir,iphi] = 1
    except AttributeError:
        # This occurs if the event has only one hit (rare), in which case evt
        # is a Series, not a DataFrame.  Deal with this separately.
        print "Encountered event with only one hit:",evt
        
    return image

def get_targets(evt, max_tracks):
    """
    evt: pandas dataframe holding particle info for one event
    
    Gets the pt and phi of each particle in the event and returns them in a numpy array,
    scaled to an appropriate scale.
    """
    particles = np.zeros((max_tracks, 2))
    weights = np.zeros(max_tracks)
    try:
        for i,particle in enumerate(evt.itertuples()):
            if i >= max_tracks:
                break
            particles[i, 0] = particle[particles_row_names.index('pt')]*pt_scale
            particles[i, 1] = particle[particles_row_names.index('phi')]
            weights[i] = 1
        particles[:i+1] = particles[particles[:i+1,1].argsort()] # sort by second column (phi)
    except AttributeError:
        # This occurs if the event has only one particle, in which case evt is
        # a Series, not a DataFrame.  Deal with this separately.
        particles[0, 0] = evt['pt']*pt_scale
        particles[0, 1] = evt['phi']
        weights[0] = 1
    
    return particles, weights

In [3]:
def gen_single_hits(hit_files):
    cur_file = 0
    num_files = len(hit_files)
    while True:
        df = pd.read_csv(hit_files[cur_file], header=None, names=hits_row_names, index_col=hits_row_names[0])
        event_nums = sorted(df.index.unique())
        for event_num in event_nums:
            yield discretize(df.loc[event_num], phiwidth=2*np.pi/n_phi_bins)
        cur_file += 1
        if cur_file >= num_files:
            cur_file = 0

def gen_single_particles(particle_files, max_tracks):
    cur_file = 0
    num_files = len(particle_files)
    while True:
        df = pd.read_csv(particle_files[cur_file], header=None, names=particles_row_names, index_col=particles_row_names[0])
        event_nums = sorted(df.index.unique())
        for event_num in event_nums:
            yield get_targets(df.loc[event_num], max_tracks)
        cur_file += 1
        if cur_file >= num_files:
            cur_file = 0
    
def generate_data(batch_size, hit_files, particle_files, max_tracks=max_tracks):
    gen_hits = gen_single_hits(hit_files)
    gen_particles = gen_single_particles(particle_files, max_tracks)
    while True:
        batch_events = np.array([evt for evt in itertools.islice(gen_hits, batch_size)])
        batch_targets = np.zeros((batch_size, max_tracks, 2))
        batch_weights = np.zeros((batch_size, max_tracks))
        for i,(particles, weights) in enumerate(itertools.islice(gen_particles, batch_size)):
            batch_targets[i] = particles
            batch_weights[i] = weights
        yield batch_events, batch_targets, batch_weights

In [4]:
hit_files = sorted(glob.glob("hits_*.csv"))
particle_files = sorted(glob.glob("particles_*.csv"))
generator = generate_data(256, hit_files, particle_files)

In [5]:
# Model
def build_model_1(max_tracks=max_tracks):
    input_layer = layers.Input(shape=(1, n_r_bins, n_phi_bins))
    
    layer1 = layers.Convolution2D(8, 2, 6, border_mode='same')(input_layer)
    layer1 = layers.Activation('relu')(layer1)
    layer1 = layers.Convolution2D(8, 2, 6, border_mode='same')(layer1)
    layer1 = layers.Activation('relu')(layer1)
    layer1 = layers.MaxPooling2D(pool_size=(1,4))(layer1)
    layer1 = layers.Convolution2D(16, 2, 6, border_mode='same')(layer1)
    layer1 = layers.Activation('relu')(layer1)
    layer1 = layers.Convolution2D(16, 2, 6, border_mode='same')(layer1)
    layer1 = layers.Activation('relu')(layer1)
    layer1 = layers.Flatten()(layer1)
    
    layer2 = layers.Convolution2D(8, 2, 12, border_mode='same')(input_layer)
    layer2 = layers.Activation('relu')(layer2)
    layer2 = layers.Convolution2D(8, 2, 12, border_mode='same')(layer2)
    layer2 = layers.Activation('relu')(layer2)
    layer2 = layers.MaxPooling2D(pool_size=(2,4))(layer2)
    layer2 = layers.Convolution2D(16, 2, 6, border_mode='same')(layer2)
    layer2 = layers.Activation('relu')(layer2)
    layer2 = layers.Convolution2D(16, 2, 6, border_mode='same')(layer2)
    layer2 = layers.Activation('relu')(layer2)
    layer2 = layers.Flatten()(layer2)
    
    layer3 = layers.Convolution2D(8, 4, 12, border_mode='same')(input_layer)
    layer3 = layers.Activation('relu')(layer3)
    layer3 = layers.Convolution2D(8, 4, 12, border_mode='same')(layer3)
    layer3 = layers.Activation('relu')(layer3)
    layer3 = layers.Flatten()(layer3)
    
    layer = layers.merge([layer1, layer2, layer3], mode='concat', concat_axis=1)
    layer = layers.Dense(1500)(layer)
    layer = layers.RepeatVector(max_tracks)(layer)
    layer = layers.LSTM(1500, return_sequences=True)(layer)
    output_layer = layers.TimeDistributed(layers.Dense(2))(layer)
    model = models.Model(input=input_layer, output=output_layer)
    model.compile(loss='mean_squared_error', optimizer='Adam', sample_weight_mode="temporal")
    return model

In [6]:
model_1 = build_model_1()
%time model_1.fit_generator(generator, samples_per_epoch=96000, nb_epoch=3)

  mode='max')
  mode='max')
  mode='max')


Epoch 1/3
i_r           0.000000
i_phi       472.000000
x            37.228531
y            11.620518
Name: 3426445, dtype: float64
Epoch 2/3
Epoch 3/3
CPU times: user 22min 10s, sys: 1min 22s, total: 23min 32s
Wall time: 15min 30s


<keras.callbacks.History at 0x7f7e234aca10>

In [10]:
%time model_1.fit_generator(generator, samples_per_epoch=96000, nb_epoch=3)

Epoch 1/3
Epoch 2/3
Epoch 3/3
i_r           0.000000
i_phi       472.000000
x            37.228531
y            11.620518
Name: 3426445, dtype: float64
CPU times: user 20min 52s, sys: 1min 30s, total: 22min 22s
Wall time: 14min 22s


<keras.callbacks.History at 0x7f7dfddbfc10>

In [18]:
evt, pred, weight = generator.next()
print "Model prediction"
print model_1.predict(np.array([evt[0]]))
print "Target"
print pred[0]

Model prediction
[[[ 0.78820527 -2.595474  ]
  [ 0.21208265 -2.20827961]
  [ 1.12105155 -1.46318448]
  [ 1.29186463 -1.39742219]
  [-0.71272582 -1.16125846]
  [ 0.93078148 -0.84250575]
  [ 1.07210767 -0.02985069]
  [-1.75846767  0.25473168]
  [-0.40684488  0.45352206]
  [-0.77167994  1.13568866]
  [ 2.14367223  1.7237767 ]
  [ 2.2369647   2.4955287 ]
  [ 0.70575696  2.97177386]
  [ 0.38294891  3.02810431]
  [ 0.08146887  3.06465745]
  [ 0.14743239  3.08069038]
  [ 0.17670089  3.08407807]
  [ 0.19411695  3.08596921]
  [ 0.20119756  3.08418202]
  [ 0.20886905  3.07437778]
  [ 0.22853503  3.05222011]
  [ 0.22870113  3.0322566 ]
  [ 0.23092413  3.00956655]
  [ 0.22235233  2.98453021]
  [ 0.19834043  2.96092439]]]
Target
[[ 0.59165805 -2.65426334]
 [ 0.29430782 -2.27614785]
 [ 0.88598731 -1.57160843]
 [ 1.28811406 -1.48887173]
 [-0.70240604 -1.32850356]
 [ 0.65199057 -0.93561097]
 [ 1.89057963 -0.16840306]
 [-2.05525045  0.15693129]
 [-3.10455302  0.24736351]
 [-1.68845147  0.80363246]
 [ 0