# Generating handwriting with Clockwork-RNN
This is the implementation of handwriting generation with Clockwork-RNN(CWRNN) in Mixture Density Network(MDN).

The main idea behind MDN is to use neural network outputs to parameterise a mixture distribution.

The idea behind CWRNN is to use more RNN cells but not to use all of them in every timestep. Cells in CWRNN have periods which tell us in what timestep the cell should be active.

This implemetation is heavily influenced by these projects:
*  https://github.com/aidangomez/models/tree/684883e4d4f310e59da109ba28cda8ba5f7785c9/clockwork_rnn
*  https://github.com/hardmaru/write-rnn-tensorflow

And these papers:
*  https://arxiv.org/pdf/1308.0850.pdf
*  https://arxiv.org/pdf/1402.3511.pdf

Loading data from xml files. Data can be found at http://www.fki.inf.unibe.ch/databases/iam-on-line-handwriting-database. I only used 300 files of total 12000 for time convenience. Path to data is saved in var $path$.

In [None]:
import os
import sys
import xml.etree.ElementTree as ET
import numpy as np
import matplotlib.pyplot as plt
import tensorflow as tf
import math
import random
import time

path = "C:/Users/Jirka/Downloads/lineStrokes"
filenames = []
for dir1 in os.listdir(path):
    for dir2 in os.listdir(path+"/"+dir1):
        for filename in os.listdir(path+"/"+dir1+"/"+dir2):
            filenames.append(path+"/"+dir1+"/"+dir2+"/"+filename)
            
trainSequences = []
            
for filename in filenames:  
    root = ET.parse(filename).getroot();
    XY = root.find('WhiteboardDescription')

    maxX = (float)(XY.find('DiagonallyOppositeCoords').get('x'))
    maxY = (float)(XY.find('DiagonallyOppositeCoords').get('y'))
    minX = (float)(XY.find('VerticallyOppositeCoords').get('x'))
    minY = (float)(XY.find('HorizontallyOppositeCoords').get('y'))

    line = []
    for stroke in root.iter('Stroke'):
        sTime = (float)(stroke.get('start_time'))
        eTime = (float)(stroke.get('end_time'))
        dTime = ((eTime-sTime) if (sTime-eTime)>0 else 1)
        for point in stroke.iter('Point'):
            line.append([int(point.get('x')), int(point.get('y')), 0]);
        line[-1][2] = 1
    
    for it in range(len(line)-1, 0, -1):
        line[it] = [line[it][0]-line[it-1][0],
                    line[it][1]-line[it-1][1],
                    line[it][2]]
    line[0] = [0, 0, 0]
    trainSequences.append(line)

npTrainSequences = []
for line in trainSequences:
    npTrainSequences.append(np.array(line, dtype=np.int16))
    
    
valid_data = []
train_data = []
counter = 0
cur_data_counter = 0

for data in npTrainSequences:
    if len(data) > (300+2):
        # removes large gaps from the data
        data = np.minimum(data, 500)
        data = np.maximum(data, -500)
        data = np.array(data,dtype=np.float32)
        data[:,0:2] /= 10
        cur_data_counter = cur_data_counter + 1
        if cur_data_counter % 20 == 0:
            valid_data.append(data)
        else:
            train_data.append(data)
            counter += int(len(data)/((300+2))) # number of equiv batches this datapoint is worth
            
print("train data: {}, valid data: {}".format(len(train_data), len(valid_data)))
    
size = 0
for a in npTrainSequences:
    size += sys.getsizeof(a)

#Data loader prepared for batch fetching to neural network.
class DataLoader():
    def __init__(self, train_data, valid_data, batch_size=50):
        self.seq_length = 300
        self.train_data = train_data
        self.valid_data = valid_data
        self.batch_size = batch_size
        self.reset_batch_pointer()
        self.num_batches = int(counter / self.batch_size)
    
    def next_batch(self):
        # returns a randomised, seq_length sized portion of the training data
        x_batch = []
        y_batch = []
        for i in range(self.batch_size):
            data = self.train_data[self.pointer]
            n_batch = int(len(data)/((self.seq_length+2))) # number of equiv batches this datapoint is worth
            idx = random.randint(0, len(data)-self.seq_length-2)
            x_batch.append(np.copy(data[idx:idx+self.seq_length]))
            y_batch.append(np.copy(data[idx+1:idx+self.seq_length+1]))
            if random.random() < (1.0/float(n_batch)): # adjust sampling probability.
                #if this is a long datapoint, sample this data more with higher probability
                self.pointer += 1
                if (self.pointer >= len(self.train_data)):
                    self.pointer = 0
        return np.array(x_batch), np.array(y_batch)

    def reset_batch_pointer(self):
        self.pointer = 0

Setting up network model

In [None]:
import numpy as np
import matplotlib.pyplot as plt
import tensorflow as tf
from clockwork.cwrnn import CWRNNCell

batch_size = 50
seq_len = 300
num_components = 3

NHIDDEN = 128*8
NCELLS = 8

inputs = tf.placeholder(dtype=tf.float32, shape=[batch_size, seq_len, num_components])
targets = tf.placeholder(dtype=tf.float32, shape=[batch_size, seq_len, num_components])

#CWRNN Part
cells = []
for i in range(NCELLS):
    cells.append(tf.contrib.rnn.BasicRNNCell(num_units=NHIDDEN//NCELLS))

periods = []
for i in range(NCELLS):
    periods.append(2**i)

cell = CWRNNCell(cells, periods, state_is_tuple=False)

#LSTM Part
#cell = tf.contrib.rnn.BasicLSTMCell(NHIDDEN, state_is_tuple=False)

#cell = tf.contrib.rnn.MultiRNNCell(
#            [tf.contrib.rnn.BasicLSTMCell(NHIDDEN, state_is_tuple=False) for _ in range(2)],
#            state_is_tuple=False
#)

#cell = tf.contrib.rnn.DropoutWrapper(cell, output_keep_prob = 0.8)

zero_state = cell.zero_state(batch_size=batch_size, dtype=tf.float32)
prev_state = tf.identity(zero_state, name='prev_state')

NOUT = 1 + 20 * 6 # end_of_stroke + prob + 2*(mu + sig) + corr

with tf.variable_scope('rnnlm'):
    output_w = tf.get_variable("output_w", [NHIDDEN, NOUT])
    output_b = tf.get_variable("output_b", [NOUT])
    
inputs2 = tf.unstack(inputs, axis=1)

#Forward pass
outputs, state_out = tf.contrib.legacy_seq2seq.rnn_decoder(inputs2, prev_state, cell, loop_function=None, scope='rnnlm')
output = tf.reshape(tf.concat(axis=1, values=outputs), [-1, NHIDDEN])
output = tf.nn.xw_plus_b(output, output_w, output_b)
state_out = tf.identity(state_out, name='state_out')

#Some trying

#outputs, _ = tf.contrib.legacy_seq2seq.rnn_decoder(cell=cell, decoder_inputs=x, initial_state=zero_state)
#outputs, out_state = tf.nn.dynamic_rnn(cell, inputs, initial_state=prev_state, time_major=False)

#NOUT = 1 + 20 * 6 # end_of_stroke + prob + 2*(mu + sig) + corr

#W_out = tf.Variable(tf.random_normal([NHIDDEN,NOUT], stddev=1.0, dtype=tf.float32))
#b_out = tf.Variable(tf.random_normal([NOUT], stddev=1.0, dtype=tf.float32))

#output = tf.reshape(outputs, [-1, NHIDDEN])
#output = tf.reshape(tf.concat(axis=1, values=outputs), [-1, NHIDDEN])
#print(output.shape)
#output = tf.nn.xw_plus_b(output, W_out, b_out)

#End of some trying


flat_targets = tf.reshape(targets, [-1, num_components])
[x1_data, x2_data, eos_data] = tf.split(axis=1, num_or_size_splits=3, value=flat_targets)

#MDN Part. Mainly copied from https://github.com/hardmaru/write-rnn-tensorflow.
def tf_2d_normal(x1, x2, mu1, mu2, s1, s2, rho):
    # eq # 24 and 25 of http://arxiv.org/abs/1308.0850
    norm1 = tf.subtract(x1, mu1)
    norm2 = tf.subtract(x2, mu2)
    s1s2 = tf.multiply(s1, s2)
    z = tf.square(tf.div(norm1, s1))+tf.square(tf.div(norm2, s2))-2*tf.div(tf.multiply(rho, tf.multiply(norm1, norm2)), s1s2)
    negRho = 1-tf.square(rho)
    result = tf.exp(tf.div(-z,2*negRho))
    denom = 2*np.pi*tf.multiply(s1s2, tf.sqrt(negRho))
    result = tf.div(result, denom)
    return result

def get_lossfunc(z_pi, z_mu1, z_mu2, z_sigma1, z_sigma2, z_corr, z_eos, x1_data, x2_data, eos_data):
    result0 = tf_2d_normal(x1_data, x2_data, z_mu1, z_mu2, z_sigma1, z_sigma2, z_corr)
    # implementing eq # 26 of http://arxiv.org/abs/1308.0850
    epsilon = 1e-20
    result1 = tf.multiply(result0, z_pi)
    result1 = tf.reduce_sum(result1, 1, keep_dims=True)
    result1 = -tf.log(tf.maximum(result1, 1e-20)) # at the beginning, some errors are exactly zero.

    result2 = tf.multiply(z_eos, eos_data) + tf.multiply(1-z_eos, 1-eos_data)
    result2 = -tf.log(result2)

    result = result1 + result2
    return tf.reduce_sum(result)

# below is where we need to do MDN splitting of distribution params
def get_mixture_coef(output):
    # returns the tf slices containing mdn dist params
    # ie, eq 18 -> 23 of http://arxiv.org/abs/1308.0850
    z = output
    z_eos = z[:, 0:1]
    z_pi, z_mu1, z_mu2, z_sigma1, z_sigma2, z_corr = tf.split(axis=1, num_or_size_splits=6, value=z[:, 1:])

    # process output z's into MDN paramters

    # end of stroke signal
    z_eos = tf.sigmoid(z_eos) # should be negated, but doesn't matter.

    # softmax all the pi's:
    max_pi = tf.reduce_max(z_pi, 1, keep_dims=True)
    z_pi = tf.subtract(z_pi, max_pi)
    z_pi = tf.exp(z_pi)
    normalize_pi = tf.reciprocal(tf.reduce_sum(z_pi, 1, keep_dims=True))
    z_pi = tf.multiply(normalize_pi, z_pi)

    # exponentiate the sigmas and also make corr between -1 and 1.
    z_sigma1 = tf.exp(z_sigma1)
    z_sigma2 = tf.exp(z_sigma2)
    z_corr = tf.tanh(z_corr)

    return [z_pi, z_mu1, z_mu2, z_sigma1, z_sigma2, z_corr, z_eos]

#Get distribution parameters from network output
[o_pi, o_mu1, o_mu2, o_sigma1, o_sigma2, o_corr, o_eos] = get_mixture_coef(output)

#Parameters for outputs
data_out_pi = tf.identity(o_pi, "data_out_pi");
data_out_mu1 = tf.identity(o_mu1, "data_out_mu1");
data_out_mu2 = tf.identity(o_mu2, "data_out_mu2");
data_out_sigma1 = tf.identity(o_sigma1, "data_out_sigma1");
data_out_sigma2 = tf.identity(o_sigma2, "data_out_sigma2");
data_out_corr = tf.identity(o_corr, "data_out_corr");
data_out_eos = tf.identity(o_eos, "data_out_eos");

#Loss Function
lossfunc = get_lossfunc(o_pi, o_mu1, o_mu2, o_sigma1, o_sigma2, o_corr, o_eos, x1_data, x2_data, eos_data)
cost = lossfunc / (batch_size * seq_len)

#Train network with AdamOptimizer and desired learninig rate with gradient clippingg
lr = tf.Variable(0.0, trainable=False)
tvars = tf.trainable_variables()
grads, _ = tf.clip_by_global_norm(tf.gradients(cost, tvars), 10.0)
optimizer = tf.train.AdamOptimizer(lr)
train_op = optimizer.apply_gradients(zip(grads, tvars))

In [None]:
model_path = "C:\\Users\\Jirka\\Desktop\\CWRNN_LONG\\"

sess = tf.InteractiveSession()
sess.run(tf.global_variables_initializer())
saver = tf.train.Saver(tf.global_variables())

data_loader = DataLoader(train_data, valid_data, batch_size)
NEPOCH = 40
for e in range(NEPOCH):
    sess.run(tf.assign(lr, 0.005 * (0.95 ** e)))
    data_loader.reset_batch_pointer()

    state = prev_state.eval()
    for b in range(data_loader.num_batches):
        i = e * data_loader.num_batches + b
        start = time.time()
        x, y = data_loader.next_batch()
        feed = {inputs: x, targets: y, prev_state: state}
        train_loss, state, _ = sess.run([cost, state_out, train_op], feed) 

        end = time.time()
        print(
            "{}/{} (epoch {}), train_loss = {:.3f}, time/batch = {:.3f}"  \
            .format(
                i,
                NEPOCH * data_loader.num_batches,
                e, 
                train_loss, end - start))
        if (e * data_loader.num_batches + b) % 100 == 0 and ((e * data_loader.num_batches + b) > 0):
            checkpoint_path = os.path.join(model_path, 'model.ckpt')
            saver.save(sess, checkpoint_path, global_step = e * data_loader.num_batches + b)
            print("model saved to {}".format(checkpoint_path))

sess.close()