In [0]:
import tensorflow as tf
import numpy as np

In [0]:
class Constants(object):
    """ Constant strings to be used in the code. """
    SEED = 1234

    # Models.
    MODEL_RNN = "rnn"
    MODEL_VRNN = "vrnn"
    MODEL_TCN = "tcn"

    # Negative log-likelihood losses.
    NLL_BERNOULLI = 'nll_bernoulli'
    NLL_NORMAL = 'nll_normal'
    NLL_BINORMAL = 'nll_binormal'
    NLL_GMM = 'nll_gmm'
    NLL_BIGMM = 'nll_bigmm'
    NLL_CENT = 'nll_cent'  # Cross-entropy.
    NLL_CENT_BINARY = 'nll_cent_binary'  # Cross-entropy for binary outputs.
    KLD = 'kld'
    L1 = 'l1'
    MSE = 'mse'

    # RNN cells and layer types.
    LSTM = 'lstm'
    GRU = 'gru'
    DENSE = "dense"  # Fully connected layer.
    TCN = "tcn"  # Temporal convolutional layer, i.e., causal 1D convolution.

    # Activation functions.
    RELU = 'relu'
    ELU = 'elu'
    SIGMOID = 'sigmoid'
    SOFTPLUS = 'softplus'
    TANH = 'tanh'
    SOFTMAX = 'softmax'
    LRELU = 'lrelu'
    CLRELU = 'clrelu'  # Clamped leaky relu.

    # Learning rate scheduler types.
    LR_EXP = "exponential"
    LR_CONSTANT = "constant"

    # Loss reduce function types.
    R_MEAN_STEP = 'mean_step_loss'  # Take average of average step loss per sample over batch. Uses sequence length.
    R_MEAN_SEQUENCE = 'mean_sequence_loss'  # Take average of sequence loss (summation of all steps) over batch. Uses sequence length.
    R_MEAN = 'mean'  # Calculate average of the whole loss tensor.
    R_SUM = 'sum'  # Sum all entries in the loss tensor.
    B_MEAN_STEP = 'batch_mean_step_loss'  # Keep the loss per sample. Uses sequence length.
    R_IDENTITY = 'identity'  # No effect.

    # Optimizers
    OPTIMIZER_ADAM = "adam"

# Tensorflow Model Utils

In [0]:
def get_activation_fn(activation=C.RELU):
    """
    Return tensorflow activation function given string name.
    Args:
        activation (str): name of the activation function.
    Returns:
        TF activation function or None.
    """
    # Check if the activation is already callable.
    if callable(activation):
        return activation
    # Check if the activation is a built-in or custom function.
    if activation == C.RELU:
        return tf.nn.relu
    elif activation == C.ELU:
        return tf.nn.elu
    elif activation == C.TANH:
        return tf.nn.tanh
    elif activation == C.SIGMOID:
        return tf.nn.sigmoid
    elif activation == C.SOFTPLUS:
        return tf.nn.softplus
    elif activation == C.SOFTMAX:
        return tf.nn.softmax
    elif activation == C.LRELU:
        return lambda x: tf.nn.leaky_relu(x, alpha=1. / 3.)
    elif activation == C.CLRELU:
        with tf.name_scope('ClampedLeakyRelu'):
            return lambda x: tf.clip_by_value(tf.nn.leaky_relu(x, alpha=1. / 3.), -3.0, 3.0)
    elif activation is None:
        return None
    else:
        raise Exception("Unknown activation function")

class CausalConv1D(tf.layers.Conv1D):
    def __init__(self, filters, kernel_size, dilation_rate=1,
                 activation=None, trainable=True, name=None, **kwargs):
        super(CausalConv1D, self).__init__(filters=filters,
                                           kernel_size=kernel_size,
                                           padding='valid',
                                           dilation_rate=dilation_rate,
                                           activation=activation,
                                           trainable=trainable,
                                           name=name, **kwargs)

    def call(self, inputs):
        padding = (self.kernel_size[0] - 1) * self.dilation_rate[0]
        inputs = tf.pad(inputs, tf.constant([(0, 0,), (1, 0), (0, 0)]) * padding)
        return super(CausalConv1D, self).call(inputs)


class DenseLayer(tf.keras.models.Sequential):
    """
    Stacks a number of dense layers by allowing applying dropout on the inputs and activation function on the outputs
    of every dense layer.
    """
    def __init__(self, units, num_layers, activation_fn, dropout_rate=0, **kwargs):
        super(DenseLayer, self).__init__(**kwargs)

        self.units = units if isinstance(units, list) else [units] * self.num_layers
        self.num_layers = num_layers
        self.dropout_rate = dropout_rate if isinstance(dropout_rate, list) else [dropout_rate] * self.num_layers
        self.activation_fn = get_activation_fn(activation_fn)

        for idx in range(self.num_layers):
            if self.dropout_rate[idx] > 0:
                self.add(tf.keras.layers.Dropout(self.dropout_rate[idx], name=self.name + "_dropout" + str(idx)))
            self.add(tf.keras.layers.Dense(self.units[idx], self.activation_fn, name=self.name + "_" + str(idx)))

    def call(self, inputs, training=None, mask=None):
        out = super(DenseLayer, self).call(inputs, training=training, mask=mask)
        return out

    def get_config(self):
        base_config = super(DenseLayer, self).get_config()
        return base_config

# Configuration Class

In [0]:
import json
import os

class AttrDict(dict):
    def __init__(self, **kwargs):
        super(AttrDict, self).__init__(**kwargs)

    __getattr__ = dict.__getitem__
    __setattr__ = dict.__setitem__


class DenseLayerConfig(AttrDict):
    """ A template configuration for dense network. """
    def __init__(self, layers=None, units=None, activation=None, dropout_rate=0.0, **kwargs):
        super(DenseLayerConfig, self).__init__(**kwargs)
        self.type = C.DENSE
        self.layers = layers
        self.units = units
        self.activation = activation
        self.dropout_rate = dropout_rate


class TCNLayerConfig(AttrDict):
    """ A template configuration for temporal convolutional network. """
    def __init__(self, layers=None, units=None, activation=None, filters=None, kernel_width=2, strides=1, dilation=1, **kwargs):
        super(TCNLayerConfig, self).__init__(**kwargs)
        self.type = C.TCN
        self.layers = layers
        self.units = units
        self.activation = activation
        self.filters = filters
        self.kernel_width = kernel_width
        self.strides = strides
        self.dilation = dilation


class RNNLayerConfig(AttrDict):
    """ A template configuration for RNN network. """
    def __init__(self, type, layers, units, activation, **kwargs):
        super(RNNLayerConfig, self).__init__(**kwargs)
        self.type = type
        self.layers = layers
        self.units = units
        self.activation = activation


class LossConfig(AttrDict):
    """ A template configuration for defining loss terms. """
    def __init__(self, type=None, out_key=None, target_key=None, weight=1, **kwargs):
        super(LossConfig, self).__init__(**kwargs)
        self.type = None  # see constants for the options.
        self.out_key = None  # key/name of the tensorflow op. looks for <out_key> in model outputs.
        self.target_key = None  # key/name of the target data placeholder.
        self.weight = weight


class Configuration(AttrDict):
    """ Main configuration class for defining models and experiments. """
    def __init__(self, **kwargs):
        super(Configuration, self).__init__(**kwargs)
        self.loss = AttrDict()

    def dump(self, path):
        json.dump(self, open(os.path.join(path, 'config.json'), 'w'), indent=4, sort_keys=True)

#Model Class

In [0]:
def build_tf_estimator(inputs, targets, mode, config):
    model = None
    if config.model == C.MODEL_RNN:
      model = RNN(inputs, targets, mode, config)
    else:
      raise Exception("Unknown model type.")
    
    model.build_graph()
    
    return tf.estimator.EstimatorSpec(
      mode=mode,
      predictions=model.dict_predictions,
      loss=model.training_objective,
      train_op=model.training_op,
      eval_metric_ops=None)
  

class BaseTemporalModel(object):
  def __init__(self, inputs, targets, mode, config, **kwargs):
    self.config = None
    self.mode = None
    self.inputs = None
    self.targets = None
    
    # Total loss constructed by build_objective method.
    self.training_objective = None
    # Tensorflow training op constructed by build_optimizer method.
    self.training_op = None
    
    # A container for model outputs.
    self.dict_predictions = dict()
    
    # A container for loss terms.
    self.dict_loss_terms = dict()
    
  def build_graph(self):
    """
    Composing parts of model and builds the final tensorflow computational 
    graph by building the model and loss.
    """
    self.build_network()
    self.build_loss_terms()
    self.build_objective()
    self.build_optimizer()

  def build_network(self):
    """
    Builds internal dynamics of the model.
    """
    raise NotImplementedError('Subclasses must override.')
  
  def build_input_layer(self):
    """Maps the data inputs to an intermediate representation."""
    pass
  
  def build_output_layer(self):
    """Builds the model predictions based on the loss configuration"""
    pass
      
  def build_loss_terms(self):
    """
    Builds loss terms for training objective or monitoring.
    """
    raise NotImplementedError('Subclasses must override.')
    
  def build_objective(self):
    """
    Builds training objective.
    """
    raise NotImplementedError('Subclasses must override.')
    
  def build_optimizer(self):
    """
    Builds optimizer and training op.
    """
    train_op = tf.contrib.layers.optimize_loss(
      loss=self.training_objective,
      global_step=tf.train.get_global_step(),
      learning_rate=self.config.experiment.learning_rate,
      optimizer=self.config.experiment.optimizer,
      clip_gradients=self.config.experiment.grad_clip_by_norm,
      summaries=["learning_rate", "loss", "global_gradient_norm"])
  
    return train_op
  

class RNN(BaseTemporalModel):
  """A standard Recurrent Neural Network model. """
  def __init__(self, inputs, targets, mode, config, **kwargs):
    super(VHRED, self).__init__(inputs, targets, mode, config, **kwargs)

class VHRED(BaseTemporalModel):
  """Latent Variable Hierarchical Recurrent Encoder-Decoder model.
  https://arxiv.org/abs/1605.06069
  """
  def __init__(self, inputs, targets, mode, config, **kwargs):
    super(VHRED, self).__init__(inputs, targets, mode, config, **kwargs)
  
  def build_encoder(self, inputs, **kwargs):
    pass
  
  def build_decoder(self, inputs, **kwargs):
    pass
  
  def build_latent_space(self, inputs, **kwargs):
    
  
  

# Model and Experiment

In [0]:
def define_config(dump_path=None):
    config = Configuration()
    
    config.input_layer = FCLayerConfig()
    config.input_layer.dropout_rate = 0.5
    config.input_layer.num_layers = 2
    config.input_layer.units = 256
    config.input_layer.activation = C.RELU

    config.rnn_layer = FCLayerConfig()
    config.rnn_layer.type = C.LSTM
    config.rnn_layer.num_layers = 2
    config.rnn_layer.units = 256
    config.rnn_layer.activation = C.RELU
    
    config.output_layer = FCLayerConfig()
    config.output_layer.num_layers = 1
    config.output_layer.units = 256
    config.output_layer.activation_fn = C.RELU
    
    config.experiment = AttrDict()
    config.experiment.optimizer = C.OPTIMIZER_ADAM
    config.experiment.learning_rate = 1e-3
    config.experiment.learning_rate_type = C.LR_CONSTANT
    config.experiment.learning_rate_decay_steps = 1000
    config.experiment.learning_rate_decay_rate = 0.95
    config.experiment.batch_size = 20
    config.experiment.num_epochs = 100
    config.experiment.loss_reduce_type = C.R_MEAN_SEQUENCE
    config.experiment.grad_clip_by_norm = 1
    
    config.loss = AttrDict()
    config.loss.stroke = LossConfig(type=C.NLL_GMM, out_key="out", weight=1, target_key="stroke", num_components=20)
    config.loss.pen = LossConfig(type=C.NLL_BERNOULLI, out_key="out", weight=1, target_key="pen") 
    
    if dump_path is not None:
      config.dump(dump_path)
    
    return config

In [0]:
test_config = define_config()
print(test_config.input_layer)

{'type': 'fc', 'num_layers': 2, 'size': None, 'activation': 'relu', 'dropout_rate': 0.5, 'units': 256}


In [0]:
dense_network = FCLayers(test_config.input_layer, name="input_layer")

np_data = np.random.normal(0, 1, (64, 4))
tf_input = tf.Variable(np_data, dtype=tf.float32)
tf_output = dense_network(tf_input)
dense_network.summary()
# tf_output = dense_network(tf_input)
# dense_network.summary()
"""
dense_network = DenseLayersTF(test_config.input_layer, name="input_layer")
np_data = np.random.normal(0, 1, (64, 4))
tf_input = tf.Variable(np_data, dtype=tf.float32)
tf_output = dense_network(tf_input)
dense_network.get_weights()
"""

AttributeError: ignored

In [0]:
encoder_model = tf.keras.models.Sequential(name="encoder")
enc_dense1 = tf.keras.layers.Dense(10, name="enc_dense1")
encoder_model.add(enc_dense1)
encoder_model.add(tf.keras.layers.Dense(32, name="enc_dense2"))
encoder_model.add(tf.keras.layers.Dropout(0.5, name="enc_drop"))

decoder_model = tf.keras.models.Sequential(name="decoder")
decoder_model.add(tf.keras.layers.Dropout(0.5, name="dec_drop"))
decoder_model.add(tf.keras.layers.Dense(32, name="dec_dense1"))
decoder_model.add(tf.keras.layers.Dense(10, name="dec_dense2"))

model = tf.keras.models.Sequential()
model.add(encoder_model)
model.add(decoder_model)

np_data = np.random.normal(0, 1, (64, 4))
tf_input = tf.Variable(np_data, dtype=tf.float32)

In [0]:
tf_output = model(tf_input)

In [0]:
encoder_model.summary()
decoder_model.summary()
model.summary()
model.compute_output_shape(tf_input.shape)

_________________________________________________________________
Layer (type)                 Output Shape              Param #   
enc_dense1 (Dense)           multiple                  50        
_________________________________________________________________
enc_dense2 (Dense)           multiple                  352       
_________________________________________________________________
enc_drop (Dropout)           multiple                  0         
Total params: 402
Trainable params: 402
Non-trainable params: 0
_________________________________________________________________
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
dec_drop (Dropout)           multiple                  0         
_________________________________________________________________
dec_dense1 (Dense)           multiple                  1056      
_________________________________________________________________
dec_dense2 (

TensorShape([Dimension(64), Dimension(10)])