## JLab ML Challenge 2

In [4]:
import os
import sys
import gzip
import pandas as pd
import numpy as np
import math

import tensorflow as tf
import tensorflow.keras as keras

from keras.models import load_model
from keras.models import Model
from keras.layers import Dense, Reshape, Flatten, Input, Lambda
from keras.optimizers import SGD, Adamax, Adadelta
from keras.initializers import glorot_normal
from keras.callbacks import Callback, TensorBoard
from keras.utils.training_utils import multi_gpu_model
import keras.backend as K
import keras.losses
import tensorflow as tf


EPOCHS = 1000
BS     = 5000
GPUS   = 0

# Open labels files so we can get number of samples and pass the
# data frames to the generators later
traindf = pd.read_csv('TRAIN/track_parms.csv')
validdf = pd.read_csv('VALIDATION/track_parms.csv')
STEP_SIZE_TRAIN = len(traindf)/BS
STEP_SIZE_VALID = len(validdf)/BS

#-----------------------------------------------------
# generate_arrays_from_file
#-----------------------------------------------------
# Create generator to read in images and labels
# (used for both training and validation samples)
def generate_arrays_from_file( path, labelsdf ):

        images_path = path+'/images.raw.gz'
        print( 'generator created for: ' + images_path)

        batch_input       = []
        batch_labels_tanl = []
        batch_labels_z    = []
        idx = 0
        ibatch = 0
        while True:  # loop forever, re-reading images from same file
                with gzip.open(images_path) as f:
                        while True: # loop over images in file

                                # Read in one image
                                bytes = f.read(width*height)
                                if len(bytes) != (width*height): break # break into outer loop so we can re-open file
                                data = np.frombuffer(bytes, dtype='B', count=width*height)
                                pixels = np.reshape(data, [width, height, 1], order='F')
                                pixels_norm = np.transpose(pixels.astype(np.float) / 255., axes=(1, 0, 2) )
                                pixels_norm = 1.0 - pixels_norm  # Invert color (make black be no hit and white be tdrift=0

                                # Read one label
                                tanl = labelsdf.tanl[idx]
                                z    = labelsdf.z[idx]
                                idx += 1

                                # Add to batch and check if it is time to yield
                                batch_input.append( pixels_norm )
                                batch_labels_tanl.append( tanl )
                                batch_labels_z.append( z )
                                if len(batch_input) == BS :
                                        ibatch += 1

                                        # Since we are training multiple loss functions we must
                                        # pass the labels back as a dictionary whose keys match
                                        # the layer their corresponding values are being applied
                                        # to.
                                        labels_dict = {
                                                'tanl_output' :  np.array(batch_labels_tanl ),
                                                'z_output'    :  np.array(batch_labels_z   ),
                                        }

                                        yield ( np.array(batch_input), labels_dict )
                                        batch_input       = []
                                        batch_labels_tanl = []
                                        batch_labels_z    = []

                        idx = 0
                        f.close()

#-----------------------------------------------------
# MyWeightedAvg
#
# This is used by the final Lambda layer in each branch
# of the network. It defines the formula for calculating
# the weighted average of the inputs from the previous
# layer.
#-----------------------------------------------------
def MyWeightedAvg(inputs, binsize, xmin):
        ones = K.ones_like(inputs[0,:])                       # [1, 1, 1, 1....]   (size Nouts)
        idx  = K.cumsum(ones)                                 # [1, 2, 3, 4....]   (size Nouts)
        norm = K.sum(inputs, axis=1, keepdims=True)           # normalization of all outputs by batch. shape is 1D array of size batch (n.b. keepdims=True is critical!)
        wsum = K.sum(idx*inputs, axis=1, keepdims=True)/norm  # array of size batch with weighted avg. of mean in units of bins (n.b. keepdims=True is critical!)
        output = (binsize*(wsum-0.5)) + xmin                  # convert from bins to physical units (shape batch,1)

        print('MyWeightedAvg:')
        print('       binsize = %f' % binsize)
        print('          xmin = %f' % xmin)
        print('   input shape = %s' % str(inputs.shape))
        print('  output shape = %s' % str(output.shape))

        return output

#-----------------------------------------------------
# DefineCommonModel
#-----------------------------------------------------
def DefineCommonModel(inputs):
        x = Flatten(name='top_layer1')(inputs)
        x = Dense(int(Nouts*5), name='common_layer1', activation='linear', kernel_initializer="glorot_uniform")(x)
        #x = Dense(Nouts, name='common_layer2', activation='linear', kernel_initializer="glorot_uniform")(x)
        return x

#-----------------------------------------------------
# DefineTanlModel
#-----------------------------------------------------
def DefineTanlModel(inputs):
        x = Dense(Nouts, name='tanl_output_dist', activation='relu', kernel_initializer="glorot_uniform")(inputs)
        x = Lambda(MyWeightedAvg, output_shape=(1,), name='tanl_output', arguments={'binsize':TANL_BINSIZE, 'xmin':TANLMIN})(x)
        return x

#-----------------------------------------------------
# DefineZModel
#-----------------------------------------------------
def DefineZModel(inputs):
        x = Dense(Nouts, name='z_hidden_layer1', activation='linear', kernel_initializer="glorot_uniform")(inputs)
        x = Dense(Nouts, name='z_output_dist', activation='relu', kernel_initializer="glorot_uniform")(x)
        x = Lambda(MyWeightedAvg, output_shape=(1,), name='z_output', arguments={'binsize':Z_BINSIZE, 'xmin':ZMIN})(x)
        return x

#-----------------------------------------------------
# DefineModel
#-----------------------------------------------------
# This is used to define the model. It is only called if no model
# file is found in the model_checkpoints directory.
def DefineModel():

        # If GPUS==0 this will force use of CPU, even if GPUs are present
        # If GPUS>1 this will force the CPU to serve as orchestrator
        # If GPUS==1 this will do nothing, allowing GPU to act as its own orchestrator
        if GPUS!=1: tf.device('/cpu:0')

        # Here we build the network model.
        # This model is made of multiple parts. The first handles the
        # inputs and identifies common features. The rest are branches with
        # each determining an output parameter from those features.
        inputs      = Input(shape=(height, width, 1), name='image_inputs')
        commonmodel = DefineCommonModel(inputs)
        tanlmodel   = DefineTanlModel( commonmodel )
        zmodel      = DefineZModel( commonmodel )
        model       = Model(inputs=inputs, outputs=[tanlmodel, zmodel])
        #model       = Model(inputs=inputs, outputs=[tanlmodel])
        model.summary()

        # Here we specify a different loss function for every output branch.
        # We also specify a weight for each branch. The weights allow us to 
        # specify that it is more important to minimize certain losses more
        # than others.
        sigma_tanl = 1.0  # estimated resolution 
        sigma_z    = 1.0  # estimated resolution
        losses = {
                'tanl_output'   :  'mean_squared_error',
                'z_output'      :  'mean_squared_error',
        }
        lossWeights = {
                'tanl_output'   :  1.0/(sigma_tanl*sigma_tanl),
                'z_output'      :  1.0/(sigma_z*sigma_z),
        }

        # Compile the model, possibly using multiple GPUs
        opt = Adadelta(clipnorm=1.0)
        if GPUS<=1 :
                final_model = model
        else:
                final_model = multi_gpu_model( model, gpus=GPUS )

        final_model.compile(loss=losses, loss_weights=lossWeights, optimizer=opt, metrics=['mae', 'mse', 'accuracy'])

        return final_model


#===============================================================================


# Here we want to check if a model has been saved due to previous training.
# If so, then we read it in and continue training where it left off. Otherwise,
# we define the model and start fresh. 

if not os.path.exists('model_checkpoints'): os.mkdir('model_checkpoints')

# Look for most recent saved epoch
epoch_loaded = 0
for f in os.listdir('model_checkpoints'):
        if f.startswith('model_epoch') and f.endswith('.h5'):
                e = int(f[11:-3])
                if e > epoch_loaded:
                        epoch_loaded = e
                        fname = 'model_checkpoints/model_epoch%03d.h5' % epoch_loaded

if epoch_loaded > 0:
        print('Loading model: ' + fname)
        model = load_model( fname )
else:
        print('Unable to find saved model. Will start from scratch')
        model = DefineModel()

# Print summary of model
model.summary()

#===============================================================================

# Create training and validation generators
train_generator = generate_arrays_from_file('TRAIN', traindf)
valid_generator = generate_arrays_from_file('VALIDATION', validdf)

# Use tensorboard to log training. To view the training log with the
# tensorboard gui you can run tensorboard to fire up a web server
# so you can use your browser to view the results.
#
# Note: you may need to move the log file to your
# local desktop and run tensorboard there.
#
#  tensorboard --logdir=./logs
tensorboard=TensorBoard(log_dir='./logs', histogram_freq=0, batch_size=BS*GPUS, write_graph=True, write_grads=False, write_images=False, embeddings_freq=0, embeddings_layer_names=None, embeddings_metadata=None, embeddings_data=None, update_freq='epoch')

#-----------------------------------------------------
# class checkpointModel
#-----------------------------------------------------
# There is a bug in keras that causes an error when trying to save a model
# trained on multiple GPUs. The work around is to save the original model
# at the end of every epoch using a callback. See
#    https://github.com/keras-team/kersas/issues/8694
class checkpointModel(Callback):
        def __init__(self, model):
                self.model_to_save = model
        def on_epoch_end(self, epoch, logs=None):
                myepoch = epoch +1
                fname = 'model_checkpoints/model_epoch%03d.h5' % myepoch
                old_fname = 'model_checkpoints/model_epoch%03d.h5' % (myepoch-1)
                if os.path.exists( old_fname ):
                        print('removing old model: %s' % old_fname)
                        os.remove( old_fname )
                print('saving model: %s' % fname)
                self.model_to_save.save(fname)
cbk = checkpointModel( model )

cbk.on_epoch_end(-1)

# Fit the model
history = model.fit_generator(
  generator            = train_generator
  ,steps_per_epoch     = STEP_SIZE_TRAIN
  ,validation_data     = valid_generator
  ,validation_steps    = STEP_SIZE_VALID
  ,epochs              = EPOCHS
  ,initial_epoch       = epoch_loaded
  ,use_multiprocessing = False
  ,callbacks=[tensorboard, cbk]
)


In [10]:
import warnings
warnings.simplefilter(action='ignore', category=FutureWarning)

import os
import sys
import gzip
import pandas as pd
import numpy as np
import math

import tensorflow as tf
import tensorflow.keras as keras

from keras.models import Model, Sequential
from keras.layers import Dense, TimeDistributed, Input, Lambda, LSTM

model = Sequential()
model.add( LSTM(units=500, return_sequences=True, input_shape=(None,7)) )
model.add( LSTM(units=500, return_sequences=True) )
model.add( LSTM(units=500, return_sequences=True) )
model.add( TimeDistributed(Dense(units=5)) )

model.compile(optimizer='adam', loss='mean_squared_error', metrics=['accuracy'])

model.summary()

Model: "sequential_4"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
lstm_10 (LSTM)               (None, None, 500)         1016000   
_________________________________________________________________
lstm_11 (LSTM)               (None, None, 500)         2002000   
_________________________________________________________________
lstm_12 (LSTM)               (None, None, 500)         2002000   
_________________________________________________________________
time_distributed_3 (TimeDist (None, None, 5)           2505      
Total params: 5,022,505
Trainable params: 5,022,505
Non-trainable params: 0
_________________________________________________________________


In [15]:
import csv

with open('MLchallenge2_training.csv') as csv_file:
    csv_file.readline() # discard header line
    csv_reader = csv.reader(csv_file, delimiter=',')
    
    x_all = []
    y_all = []
    for row in csv_reader:
        
        # Copy each state vector (6 values) into individual list with a 7th value being the z of the next detector plane
        statevectors = []
        for i in range(0, 24):
            idx = i*6
            statevectors.append(row[idx:idx+6]+[row[idx+6+2]])

        # Create 5-parameter labels for detector planes 7-24 and add them alomng with their corresponding features to the x_all and y_all lists
        for i in range(7,24):
            features = statevectors[0:i]
            label = statevectors[i][0:2] + statevectors[i][3:6]
            x_all.append(features)
            y_all.append(label)
            #print('FEATURES')
            #print(features)
            #print('LABEL')
            #print(label)
        
        # Limit how many samples we use
        if len(y_all) >=100000: break

TRAIN_FRACTION = 0.90
idx = int(len(x_all)*TRAIN_FRACTION)
x_train = np.array(x_all[0:idx])
y_train = np.array(y_all[0:idx])
x_test  = np.array(x_all[idx:])
y_test  = np.array(y_all[idx:])

print('Training samples: ' + str(len(x_train)) + ' (' + str(len(y_train)) +')')
print(' Testing samples: ' + str(len(x_test )) + ' (' + str(len(y_test )) +')')


Training samples: 90009 (90009)
 Testing samples: 10002 (10002)


In [23]:
# We want to train with batch size>1 in order to be efficient. A batch, however, needs all members to
# have the same number of time steps. It's OK for different batches to have a different number of time
# steps, but within one batch, they must be the same.
#
# Here, we use global variables to keep track of the number of time steps the next batch should have
# as well as the next index in the global training set for that particular number of steps. We need
# the second index in case we have a different number of samples with say 17 time steps than we do with
# 18 time steps.

BATCH_SIZE = 100

def my_generator(x_samples, y_samples):
    global BATCH_SIZE

    BATCH_NSTEPS = 7
    BATCH_STEP_INDEX = [0]*(24-7+1)

    print('my_generator: ' + str(len(x_samples)) + ' samples')
    
    x = []
    y = []
    while True:
        
        # Check if we have a full batch and yield if we do
        if len(x) == BATCH_SIZE:
            x_np = np.array(x)
            y_np = np.array(y)
            print('-- x : ' + str(len(x_np)) + ' samples   shape: ' + str(x_np.shape))
            yield x_np, y_np
            x = []
            y = []
            BATCH_NSTEPS = BATCH_NSTEPS+1
            if BATCH_NSTEPS > 23: BATCH_NSTEPS=7
        
        # Add next sample with BATCH_NSTEPS to training lists
        istart = BATCH_STEP_INDEX[BATCH_NSTEPS]
        for i in range(istart, istart+len(x_samples)):
            idx = i%len(x_samples)
            if len(x_samples[idx]) == BATCH_NSTEPS:
                x.append(x_samples[idx])
                y.append(y_samples[idx])
                BATCH_STEP_INDEX[BATCH_NSTEPS] = idx+1
                break

train_generator = my_generator(x_train, y_train)
valid_generator = my_generator(x_test, y_test)

In [24]:

EPOCHS = 10
STEP_SIZE_TRAIN = len(x_train)/(23.-7.)/BATCH_SIZE
STEP_SIZE_VALID = len(x_test)/(23.-7.)/BATCH_SIZE

epoch_loaded = 0

#model.fit_generator(train_generator(), steps_per_epoch=30, epochs=10, verbose=1)

history = model.fit_generator(
  generator            = train_generator
  ,steps_per_epoch     = STEP_SIZE_TRAIN
  ,validation_data     = valid_generator
  ,validation_steps    = STEP_SIZE_VALID
  ,epochs              = EPOCHS
  ,initial_epoch       = epoch_loaded
  ,use_multiprocessing = False
  #,callbacks=[tensorboard, cbk]
)


Epoch 1/10
my_generator: 90009 samplesmy_generator: 10002 samples

-- x : 100 samples   shape: (100, 7, 7)
-- x : 100 samples   shape: (100, 7, 7)
-- x : 100 samples   shape: (100, 8, 7)
-- x : 100 samples   shape: (100, 8, 7)
-- x : 100 samples   shape: (100, 9, 7)


ValueError: Error when checking target: expected time_distributed_3 to have 3 dimensions, but got array with shape (100, 5)