In [1]:
# Building the MDN-RNN model with TensorFlow
 
# Importing the libraries
 
import numpy as np
import tensorflow as tf
 
# Building the MDN-RNN model within a class
 
class MDNRNN(object):
    
    # Initializing all the parameters and variables of the MDNRNN class
    def __init__(self, hps, reuse=False, gpu_mode=False):
        self.hps = hps
        with tf.variable_scope('mdn_rnn', reuse=reuse):
            if not gpu_mode:
                with tf.device('/cpu:0'):
                    tf.logging.info('Model using cpu.')
                    self.g = tf.Graph()
                    with self.g.as_default():
                        self.build_model(hps)
            else:
                tf.logging.info('Model using gpu.')
                self.g = tf.Graph()
                with self.g.as_default():
                    self.build_model(hps)
        self._init_session()
    
    # Making a method that creates the MDN-RNN model architecture itself
    def build_model(self, hps):
        # Building the RNN
        self.num_mixture = hps.num_mixture
        KMIX = self.num_mixture
        INWIDTH = hps.input_seq_width
        OUTWIDTH = hps.output_seq_width
        LENGTH = self.hps.max_seq_len
        if hps.is_training:
            self.global_step = tf.Variable(0, name='global_step', trainable=False)
        cell_fn = tf.contrib.rnn.LayerNormBasicLSTMCell
        use_recurrent_dropout = False if self.hps.use_recurrent_dropout == 0 else True
        use_input_dropout = False if self.hps.use_input_dropout == 0 else True
        use_output_dropout = False if self.hps.use_output_dropout == 0 else True
        use_layer_norm = False if self.hps.use_layer_norm == 0 else True
        if use_recurrent_dropout:
            cell = cell_fn(hps.rnn_size, layer_norm=use_layer_norm, dropout_keep_prob=self.hps.recurrent_dropout_prob)
        else:
            cell = cell_fn(hps.rnn_size, layer_norm=use_layer_norm)
        if use_input_dropout:
            cell = tf.nn.rnn_cell.DropoutWrapper(cell, input_keep_prob=self.hps.input_dropout_prob)
        if use_output_dropout:
            cell = tf.nn.rnn_cell.DropoutWrapper(cell, output_keep_prob=self.hps.output_dropout_prob)
        self.cell = cell
        self.sequence_lengths = LENGTH
        self.input_x = tf.placeholder(dtype=tf.float32, shape=[self.hps.batch_size, self.hps.max_seq_len, INWIDTH])
        self.output_x = tf.placeholder(dtype=tf.float32, shape=[self.hps.batch_size, self.hps.max_seq_len, OUTWIDTH])
        actual_input_x = self.input_x
        self.initial_state = cell.zero_state(batch_size=hps.batch_size, dtype=tf.float32)
        NOUT = OUTWIDTH * KMIX * 3
        with tf.variable_scope('RNN'):
            output_w = tf.get_variable("output_w", [self.hps.rnn_size, NOUT])
            output_b = tf.get_variable("output_b", [NOUT])
        output, last_state = tf.nn.dynamic_rnn(cell=cell,
                                               inputs=actual_input_x,
                                               initial_state=self.initial_state,
                                               dtype=tf.float32,
                                               swap_memory=True,
                                               scope="RNN")
        # Building the MDN
        output = tf.reshape(output, [-1, hps.rnn_size])
        output = tf.nn.xw_plus_b(output, output_w, output_b)
        output = tf.reshape(output, [-1, KMIX * 3])
        self.final_state = last_state
        def get_mdn_coef(output):
            logmix, mean, logstd = tf.split(output, 3, 1)
            logmix = logmix - tf.reduce_logsumexp(logmix, 1, keepdims=True)
            return logmix, mean, logstd
        out_logmix, out_mean, out_logstd = get_mdn_coef(output)
        self.out_logmix = out_logmix
        self.out_mean = out_mean
        self.out_logstd = out_logstd
        # Implementing the training operations
        logSqrtTwoPI = np.log(np.sqrt(2.0 * np.pi))
        def tf_lognormal(y, mean, logstd):
            return -0.5 * ((y - mean) / tf.exp(logstd)) ** 2 - logstd - logSqrtTwoPI
        def get_lossfunc(logmix, mean, logstd, y):
            v = logmix + tf_lognormal(y, mean, logstd)
            v = tf.reduce_logsumexp(v, 1, keepdims=True)
            return -tf.reduce_mean(v)
        flat_target_data = tf.reshape(self.output_x,[-1, 1])
        lossfunc = get_lossfunc(out_logmix, out_mean, out_logstd, flat_target_data)
        self.cost = tf.reduce_mean(lossfunc)
        if self.hps.is_training == 1:
            self.lr = tf.Variable(self.hps.learning_rate, trainable=False)
            self.optimizer = tf.train.AdamOptimizer(self.lr)
            gvs = self.optimizer.compute_gradients(self.cost)
            capped_gvs = [(tf.clip_by_value(grad, -self.hps.grad_clip, self.hps.grad_clip), var) for grad, var in gvs]
            self.train_op = self.optimizer.apply_gradients(capped_gvs, global_step=self.global_step, name='train_step')
        self.init = tf.global_variables_initializer()

In [2]:
# Building the MDN-RNN model with Keras
 
# Importing the libraries
import math
import numpy as np
from keras.layers import Input, LSTM, Dense
from keras.models import Model
from keras import backend as K
from keras.callbacks import EarlyStopping
 
# Setting the dimensions of the latent vectors
Z_DIM = 32
 
# Setting the number of actions
ACTION_DIM = 3
 
# Setting the number of LSTM units
HIDDEN_UNITS = 256
 
# Setting the number of gaussian mixture outputs
GAUSSIAN_MIXTURES = 5
 
# Setting the batch size and number of epochs
BATCH_SIZE =32
EPOCHS = 20
 
# Getting the gaussian mixture coeficients
def get_mixture_coef(y_pred):
    d = GAUSSIAN_MIXTURES * Z_DIM
    rollout_length = K.shape(y_pred)[1]
    pi = y_pred[:,:,:d]
    mu = y_pred[:,:,d:(2*d)]
    log_sigma = y_pred[:,:,(2*d):(3*d)]
    pi = K.reshape(pi, [-1, rollout_length, GAUSSIAN_MIXTURES, Z_DIM])
    mu = K.reshape(mu, [-1, rollout_length, GAUSSIAN_MIXTURES, Z_DIM])
    log_sigma = K.reshape(log_sigma, [-1, rollout_length, GAUSSIAN_MIXTURES, Z_DIM])
    pi = K.exp(pi) / K.sum(K.exp(pi), axis=2, keepdims=True)
    sigma = K.exp(log_sigma)
    return pi, mu, sigma
 
# Normalizing the target values
def tf_normal(y_true, mu, sigma, pi):
    rollout_length = K.shape(y_true)[1]
    y_true = K.tile(y_true,(1,1,GAUSSIAN_MIXTURES))
    y_true = K.reshape(y_true, [-1, rollout_length, GAUSSIAN_MIXTURES,Z_DIM])
    oneDivSqrtTwoPI = 1 / math.sqrt(2*math.pi)
    result = y_true - mu
    result = result * (1 / (sigma + 1e-8))
    result = -K.square(result)/2
    result = K.exp(result) * (1/(sigma + 1e-8))*oneDivSqrtTwoPI
    result = result * pi
    result = K.sum(result, axis=2)
    return result
 
# Building the MDN-RNN model within a class
 
class MDNRNN():
 
    # Initializing all the parameters and variables of the MDNRNN class
    def __init__(self):
        self.models = self._build()
        self.model = self.models[0]
        self.forward = self.models[1]
        self.z_dim = Z_DIM
        self.action_dim = ACTION_DIM
        self.hidden_units = HIDDEN_UNITS
        self.gaussian_mixtures = GAUSSIAN_MIXTURES
 
    # Building the model
    def _build(self):
        # Defining the Inputs of the RNN (latent vector space + action space)
        rnn_x = Input(shape=(None, Z_DIM + ACTION_DIM))
        # Defining the LSTM layer that returns the output weights and cell states
        lstm = LSTM(HIDDEN_UNITS, return_sequences=True, return_state = True)
        # Getting the real outputs from the LSTM
        lstm_output, _ , _ = lstm(rnn_x)
        # Getting the gaussian mixture outputs
        mdn = Dense(GAUSSIAN_MIXTURES * (3*Z_DIM))(lstm_output)
        # Getting the training model
        rnn = Model(rnn_x, mdn)
        # Getting the hidden state and cell state inputs
        state_input_h = Input(shape=(HIDDEN_UNITS,))
        state_input_c = Input(shape=(HIDDEN_UNITS,))
        # Grouping them
        state_inputs = [state_input_h, state_input_c]
        # Getting outputs new state and new cell state from the LSTM
        _ , state_h, state_c = lstm(rnn_x, initial_state = [state_input_h, state_input_c])
        # Defining the forward propagation for inference only
        forward = Model([rnn_x] + state_inputs, [state_h, state_c])
        # Implementing the training operations
        def rnn_r_loss(y_true, y_pred):
            # Defining the negative log loss over all the gausian mixtures
            pi, mu, sigma = get_mixture_coef(y_pred)
            result = tf_normal(y_true, mu, sigma, pi)
            result = -K.log(result + 1e-8)
            result = K.mean(result, axis = (1,2))
            return result
        # Defining the KL divergence loss, the same as in the VAE, only over normalized outputs
        def rnn_kl_loss(y_true, y_pred):
            pi, mu, sigma = get_mixture_coef(y_pred)
            kl_loss = - 0.5 * K.mean(1 + K.log(K.square(sigma)) - K.square(mu) - K.square(sigma), axis = [1,2,3])
            return kl_loss
        # Defining the RNN loss
        def rnn_loss(y_true, y_pred):
            return rnn_r_loss(y_true, y_pred)
        # Compiling the RNN model with the RNN loss and the RMSProp optimizer
        rnn.compile(loss=rnn_loss, optimizer='rmsprop', metrics = [rnn_r_loss, rnn_kl_loss])
        return (rnn,forward)
 
    # Loading the weights of the model
    def set_weights(self, filepath):
        self.model.load_weights(filepath)
 
    # Creating early stopping callbacks to prevent overfitting
    def train(self, rnn_input, rnn_output, validation_split = 0.2):
        earlystop = EarlyStopping(monitor='val_loss', min_delta=0.0001, patience=5, verbose=1, mode='auto')
        callbacks_list = [earlystop]
        # Fitting the model to the RNN inputs and targets
        self.model.fit(rnn_input, rnn_output,
            shuffle=True,
            epochs=EPOCHS,
            batch_size=BATCH_SIZE,
            validation_split=validation_split,
            callbacks=callbacks_list)
        # Saving the model after the training is done
        self.model.save_weights('rnn/weights.h5')
 
    # Separating the function used to save the model (usefull if the model is retrained)
    def save_weights(self, filepath):
        self.model.save_weights(filepath)

Using TensorFlow backend.
