# Imports

In [1]:
import configparser
import numpy as np
import tensorflow as tf
from established.utils.helper_functions import get_all_file_names
from established.utils.data_loader import data_generator_classification, data_generator_identification
from established.utils.data_loader import data_loader_model_wrapper_classification
from established.utils.data_loader import data_loader_model_wrapper_identification
from established.model.losses import softmax_loss, triplet_loss_tf
from established.model.models import get_simple_LSTM_encoder, get_simple_LSTM_classification
from established.utils.trainer import BaseTrainer

  from ._conv import register_converters as _register_converters
Using TensorFlow backend.


In [11]:
# If true: train classification, else: identification
classification = False

# Preparation

In [12]:
# parse config
config_file = 'established/config.ini'
config = configparser.ConfigParser()
config.read(config_file)

['established/config.ini']

In [13]:
# load paths of data files
if classification:
    train_files, val_files = get_all_file_names(config['DATA']['data_root'])
else:
    train_files, val_files = get_all_file_names(config['DATA']['data_root'])

In [14]:
# define parameters
batch_size = int(config['TRAINING']['batch_size'])
encoding_dim = int(config['TRAINING']['encoding_dim'])
num_classes = int(config['DATA']['num_classes'])
time_steps = int(config['DATA']['time_steps'])
features_dim = int(config['DATA']['features_dim'])
samples_per_file = np.load(train_files[0][0]).shape[0]
steps_per_epoch_train = int(len(train_files)*samples_per_file/batch_size)
steps_per_epoch_val = int(len(val_files)*samples_per_file/batch_size)

In [15]:
if classification:
    # get data loader
    train_gen = data_generator_classification(train_files, batch_size, steps_per_epoch_train, num_classes,
                                                mode='train', model='LSTM')
    val_gen = data_generator_classification(val_files, batch_size, steps_per_epoch_val, num_classes, 
                                                mode='val', model='LSTM')

    # data loader wrapper -> reshape inputs, add state initialzation and pad time and dimension to 
    # multiple of 16
    train_gen_wrap = data_loader_model_wrapper_classification(train_gen, [batch_size, encoding_dim])
    val_gen_wrap = data_loader_model_wrapper_classification(train_gen, [batch_size, encoding_dim])
else:
    # get data loader
    train_gen = data_generator_identification(train_files, batch_size, steps_per_epoch_train, mode='train', model='LSTM')
    val_gen = data_generator_identification(val_files, batch_size, steps_per_epoch_val, mode='val', model='LSTM')

    # data loader wrapper -> reshape inputs, add state initialzation and pad time and dimension to 
    # multiple of 16
    train_gen_wrap = data_loader_model_wrapper_identification(train_gen, [3*batch_size, encoding_dim], encoding_dim)
    val_gen_wrap = data_loader_model_wrapper_identification(train_gen, [3*batch_size, encoding_dim], encoding_dim)

# Initialize model

In [16]:
# reset default graph in case graph has already been defined
tf.reset_default_graph()

if classification:
    input_dict, output_dict, label_dict = get_simple_LSTM_classification(encoding_dim, time_steps, 
                                                                         features_dim, num_classes)
else:
    input_dict, output_dict, label_dict = get_simple_LSTM_encoder(encoding_dim, time_steps, features_dim)

In [17]:
# Initialize trainer and losses

if classification:
    losses_dict = {
        'logits': softmax_loss
    }
else:
    losses_dict = {
        'encoding': triplet_loss_tf
    }

trainer = BaseTrainer(input_dict, output_dict, label_dict, losses_dict, train_gen_wrap, val_gen_wrap, 
                     steps_per_epoch_train, steps_per_epoch_val, config, out_scope='Encoding')

# Run training

In [None]:
trainer.train()

Epoch 0
Positive samples cache full!
Step 0 | Training loss: 0.5204478


In [None]:
import datetime
import numpy as np
import tensorflow as tf

class BaseTrainer():
    """
    class for handling the training procedure
    """
    
    def __init__(self, input_dict, output_dict, label_dict, losses_dict, train_gen, val_gen, 
                 steps_per_epoch_train, steps_per_epoch_val, config_file, out_scope: str='Encoding'):
        """
        initialize trainer
        
        args:
            input_dict: dictionary with graph input nodes
            output_dict: dictionary with graph output nodes
            label_dict: dictionary with graph label input nodes
            losses_dict: dictionary with losses. each key in dict need corresponding key in output_dict and label_dict
            train_gen: generator for training data
            val_gen: generator for validation data
            steps_per_epoch_train: int, number of batches per epoch
            steps_per_epoch_val: int, number of steps per loop over validation generator
            config_file: config file
            out_scope: str, name of scope of desired graph output
            
        returns:
            None
        """
        
        self.input_dict = input_dict
        self.output_dict = output_dict
        self.label_dict = label_dict
        self.losses_dict = losses_dict
        self.train_gen = train_gen
        self.val_gen = val_gen
        self.steps_per_epoch_train = steps_per_epoch_train
        self.steps_per_epoch_val = steps_per_epoch_val
        self.out_scope = out_scope
        
        self.params = config._sections['TRAINING']
    
    
    def train(self, checkpoint=None):
        """
        Perform training of model
        
        RESUME TRAINING FROM CHECKPOINT NOT YET IMPLEMENTED!!!!
        """
        
        # compute costs and define training optimization operation
        costs = [self.losses_dict[key](self.label_dict[key], self.output_dict[key], **self.params) 
                 for key in self.output_dict.keys()]
        cost = tf.reduce_sum(tf.concat(costs, axis=0))
        train_ops = tf.train.AdamOptimizer(float(self.params['lr'])).minimize(cost)
        
        # create summaries for tensorboard
        with tf.name_scope('summary'):
            train_summary = tf.placeholder(tf.float32, shape=None, name='train_loss_placeholder')
            train_loss_summary = tf.summary.scalar('train_loss', train_summary)
            val_summary = tf.placeholder(tf.float32,shape=None,name='val_loss_placeholder')
            val_loss_summary = tf.summary.scalar('val_loss', val_summary)
        loss_summaries = tf.summary.merge([train_loss_summary, val_loss_summary])
        
        # get name of output node
        out_node_name = [op.name for op in tf.get_default_graph().get_operations() 
                        if self.out_scope in op.name and not 'gradients' in op.name][-1]
        
        # initialize tf session and saver
        sess = tf.Session()
        saver = tf.train.Saver()
        
        # create run folder and initializer summary writer
        run_folder = 'runs/' + 'tf13' + str(datetime.datetime.now())
        writer = tf.summary.FileWriter(run_folder, sess.graph)
        
        # if a checkpoint has been parsed restore the session
        if checkpoint is not None:
            saver.restore(sess, checkpoint)
        
        # initialize variables
        sess.run(tf.global_variables_initializer())
        
        # start the training
        best_val_loss = np.inf
        patience_counter = 0
        for i in range(int(self.params['epochs'])):
            print('Epoch ' + str(i))
            
            train_loss = []
            for j in range(self.steps_per_epoch_train):
                
                # get batch and run optimization operation
                batch = self.train_gen.__next__()
                inp = self._build_input(batch)
                
                # if time for printing, evaluate cost and training optimization operation
                # else just evaluate training optimization operation
                if j % int(self.params['print_every']) == 0:
                    _, current_cost = sess.run([train_ops, cost], inp)
                    train_loss.append(current_cost)
                    print('Step ' + str(j) + ' | Training loss: ' + str(current_cost))
                else:
                    sess.run(train_ops, inp)
            
            # collect validation losses
            val_loss = []
            for j in range(self.steps_per_epoch_val):
                batch = self.val_gen.__next__()
                inp = self._build_input(batch)
                val_loss.append(sess.run(cost, inp))
            
            # average training and validation losses, write them to tensorboard and print 
            val_loss = np.mean(val_loss)
            train_loss = np.mean(train_loss)
            summ = sess.run(loss_summaries, feed_dict={train_summary: train_loss, val_summary: val_loss})
            writer.add_summary(summ, i+1)
            writer.flush()
            print('Epoch ' + str(i) + ' | Training loss: ' + str(train_loss) + ' | Validation loss: ' + str(val_loss))
            
            # update best validation loss if improvement and save checkpoint and model
            # if no improvement increment patience counter
            if val_loss < best_val_loss:
                best_val_loss = val_loss
                patience_counter = 0
                
                # save session
                saver.save(sess, run_folder + '/class_ckpt', i)
                output_graph_def = tf.graph_util.convert_variables_to_constants(
                    sess,
                    tf.get_default_graph().as_graph_def(),
                    [out_node_name]
                    )
                
                # save graph
                with tf.gfile.GFile(run_folder + '/best_model.pb', "wb") as f:
                    f.write(output_graph_def.SerializeToString())
            else:
                patience_counter += 1
                if patience_counter >= int(self.params['patience']):
                    print('Training done!')
                    break
            
                    
    def _build_input(self, batch):
        """
        builds input for session evaluation from batch
        """
        
        inp = {}
        for key in self.input_dict.keys():
            inp[self.input_dict[key]] = batch[key]
        for key in self.label_dict.keys():
            inp[self.label_dict[key]] = batch[key]
        return inp