### importing required data

In [None]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [None]:
# %rm -rf data
# %rm -rf logstrain_scribe.txt
# %rm -rf model.py
# %rm -rf utils.py

In [None]:
#!cp -r "/content/model" "/content/drive/MyDrive/Colab Notebooks/Handwriting Generation/model"

cp: cannot stat '/content/model': No such file or directory


In [None]:
!mkdir '/content/data'
!cp -r "/content/drive/MyDrive/Colab Notebooks/Handwriting Generation/data/ascii-all.tar.gz" "/content/data/ascii-all.tar.gz"
!cp -r "/content/drive/MyDrive/Colab Notebooks/Handwriting Generation/data/lineStrokes-all.tar.gz" "/content/data/lineStrokes-all.tar.gz"

In [None]:
!cp "/content/drive/MyDrive/Colab Notebooks/Handwriting Generation/model.py" "model.py"

In [None]:
#!cp "/content/sample.py" "/content/drive/MyDrive/Colab Notebooks/Handwriting Generation/sample.py" 
!cp "/content/drive/MyDrive/Colab Notebooks/Handwriting Generation/sample.py" "/content/sample.py"

In [None]:
!cp "/content/drive/MyDrive/Colab Notebooks/Handwriting Generation/utils.py" "utils.py"

In [None]:
!cp "/content/drive/MyDrive/Colab Notebooks/Handwriting Generation/data/strokes_training_data.cpkl" "data/strokes_training_data.cpkl"

In [None]:
#!cp -r "/content/drive/MyDrive/Colab Notebooks/Handwriting Generation/model" "/content/model"
!cp -r "/content/drive/MyDrive/Colab Notebooks/Handwriting Generation/model" "/content/model1"

In [None]:
!tar -xf data/ascii-all.tar.gz
!mv ascii data/ascii

In [None]:
!tar -xf data/lineStrokes-all.tar.gz
!mv lineStrokes data/lineStrokes

In [None]:
!pip install svgwrite

Collecting svgwrite
  Downloading svgwrite-1.4.1-py3-none-any.whl (66 kB)
[?25l[K     |█████                           | 10 kB 21.0 MB/s eta 0:00:01[K     |█████████▉                      | 20 kB 10.8 MB/s eta 0:00:01[K     |██████████████▊                 | 30 kB 9.0 MB/s eta 0:00:01[K     |███████████████████▋            | 40 kB 8.4 MB/s eta 0:00:01[K     |████████████████████████▌       | 51 kB 5.2 MB/s eta 0:00:01[K     |█████████████████████████████▍  | 61 kB 5.6 MB/s eta 0:00:01[K     |████████████████████████████████| 66 kB 2.9 MB/s 
[?25hInstalling collected packages: svgwrite
Successfully installed svgwrite-1.4.1


# imports


In [None]:
import numpy as np
import numpy.matlib
import math
import random
import os
import xml.etree.ElementTree as ET
import importlib
import pickle

%tensorflow_version 1.x
import tensorflow as tf
import time
from utils import *

TensorFlow 1.x selected.


# data loading

In [None]:

class DataLoader():
    def __init__(self, args, logger, limit = 500):
        self.data_dir = args.data_dir
        self.alphabet = args.alphabet
        self.batch_size = args.batch_size
        self.tsteps = args.tsteps
        self.data_scale = args.data_scale # scale data down by this factor
        self.ascii_steps = args.tsteps/args.tsteps_per_ascii
        self.logger = logger
        self.limit = limit # removes large noisy gaps in the data

        data_file = os.path.join(self.data_dir, "strokes_training_data.cpkl")
        stroke_dir = self.data_dir + "/lineStrokes"
        ascii_dir = self.data_dir + "/ascii"

        if not (os.path.exists(data_file)) :
            self.logger.write("\tcreating training data cpkl file from raw source")
            self.preprocess(stroke_dir, ascii_dir, data_file)

        self.load_preprocessed(data_file)
        self.reset_batch_pointer()

    def preprocess(self, stroke_dir, ascii_dir, data_file):
        # create data file from raw xml files from iam handwriting source.
        self.logger.write("\tparsing dataset...")
        
        # build the list of xml files
        filelist = []
        # Set the directory you want to start from
        rootDir = stroke_dir
        for dirName, subdirList, fileList in os.walk(rootDir):
            for fname in fileList:
                filelist.append(dirName+"/"+fname)

        # function to read each individual xml file
        def getStrokes(filename):
            tree = ET.parse(filename)
            root = tree.getroot()

            result = []

            x_offset = 1e20
            y_offset = 1e20
            y_height = 0
            for i in range(1, 4):
                x_offset = min(x_offset, float(root[0][i].attrib['x']))
                y_offset = min(y_offset, float(root[0][i].attrib['y']))
                y_height = max(y_height, float(root[0][i].attrib['y']))
            y_height -= y_offset
            x_offset -= 100
            y_offset -= 100

            for stroke in root[1].findall('Stroke'):
                points = []
                for point in stroke.findall('Point'):
                    points.append([float(point.attrib['x'])-x_offset,float(point.attrib['y'])-y_offset])
                result.append(points)
            return result
        
        # function to read each individual xml file
        def getAscii(filename, line_number):
            with open(filename, "r") as f:
                s = f.read()
            s = s[s.find("CSR"):]
            if len(s.split("\n")) > line_number+2:
                s = s.split("\n")[line_number+2]
                return s
            else:
                return ""
                
        # converts a list of arrays into a 2d numpy int16 array
        def convert_stroke_to_array(stroke):
            n_point = 0
            for i in range(len(stroke)):
                n_point += len(stroke[i])
            stroke_data = np.zeros((n_point, 3), dtype=np.int16)

            prev_x = 0
            prev_y = 0
            counter = 0

            for j in range(len(stroke)):
                for k in range(len(stroke[j])):
                    stroke_data[counter, 0] = int(stroke[j][k][0]) - prev_x
                    stroke_data[counter, 1] = int(stroke[j][k][1]) - prev_y
                    prev_x = int(stroke[j][k][0])
                    prev_y = int(stroke[j][k][1])
                    stroke_data[counter, 2] = 0
                    if (k == (len(stroke[j])-1)): 
                        stroke_data[counter, 2] = 1
                    counter += 1
            return stroke_data

        # build stroke database of every xml file inside iam database
        strokes = []
        asciis = []
        for i in range(len(filelist)):
            if (filelist[i][-3:] == 'xml'):
                stroke_file = filelist[i]
#                 print 'processing '+stroke_file
                stroke = convert_stroke_to_array(getStrokes(stroke_file))
                
                ascii_file = stroke_file.replace("lineStrokes","ascii")[:-7] + ".txt"
                line_number = stroke_file[-6:-4]
                line_number = int(line_number) - 1
                ascii = getAscii(ascii_file, line_number)
                if len(ascii) > 10:
                    strokes.append(stroke)
                    asciis.append(ascii)
                else:
                    self.logger.write("\tline length was too short. line was: " + ascii)
                
        assert(len(strokes)==len(asciis)), "There should be a 1:1 correspondence between stroke data and ascii labels."
        f = open(data_file,"wb")
        pickle.dump([strokes,asciis], f, protocol=2)
        f.close()
        self.logger.write("\tfinished parsing dataset. saved {} lines".format(len(strokes)))


    def load_preprocessed(self, data_file):
        f = open(data_file,"rb")
        [self.raw_stroke_data, self.raw_ascii_data] = pickle.load(f)
        f.close()

        # goes thru the list, and only keeps the text entries that have more than tsteps points
        self.stroke_data = []
        self.ascii_data = []
        self.valid_stroke_data = []
        self.valid_ascii_data = []
        counter = 0

        # every 1 in 20 (5%) will be used for validation data
        cur_data_counter = 0
        for i in range(len(self.raw_stroke_data)):
            data = self.raw_stroke_data[i]
            if len(data) > (self.tsteps+2):
                # removes large gaps from the data
                data = np.minimum(data, self.limit)
                data = np.maximum(data, -self.limit)
                data = np.array(data,dtype=np.float32)
                data[:,0:2] /= self.data_scale
                cur_data_counter = cur_data_counter + 1
                if cur_data_counter % 20 == 0:
                  self.valid_stroke_data.append(data)
                  self.valid_ascii_data.append(self.raw_ascii_data[i])
                else:
                    self.stroke_data.append(data)
                    self.ascii_data.append(self.raw_ascii_data[i])

        # minus 1, since we want the ydata to be a shifted version of x data
        self.num_batches = int(len(self.stroke_data) / self.batch_size)
        self.logger.write("\tloaded dataset:")
        self.logger.write("\t\t{} train individual data points".format(len(self.stroke_data)))
        self.logger.write("\t\t{} valid individual data points".format(len(self.valid_stroke_data)))
        self.logger.write("\t\t{} batches".format(self.num_batches))

    def validation_data(self):
        # returns validation data
        x_batch = []
        y_batch = []
        ascii_list = []
        for i in range(self.batch_size):
            valid_ix = i%len(self.valid_stroke_data)
            data = self.valid_stroke_data[valid_ix]
            x_batch.append(np.copy(data[:self.tsteps]))
            y_batch.append(np.copy(data[1:self.tsteps+1]))
            ascii_list.append(self.valid_ascii_data[valid_ix])
        one_hots = [to_one_hot(s, self.ascii_steps, self.alphabet) for s in ascii_list]
        return x_batch, y_batch, ascii_list, one_hots

    def next_batch(self):
        # returns a randomized, tsteps-sized portion of the training data
        x_batch = []
        y_batch = []
        ascii_list = []
        for i in range(self.batch_size):
            data = self.stroke_data[self.idx_perm[self.pointer]]
            idx = random.randint(0, len(data)-self.tsteps-2)
            x_batch.append(np.copy(data[:self.tsteps]))
            y_batch.append(np.copy(data[1:self.tsteps+1]))
            ascii_list.append(self.ascii_data[self.idx_perm[self.pointer]])
            self.tick_batch_pointer()
        one_hots = [to_one_hot(s, self.ascii_steps, self.alphabet) for s in ascii_list]
        return x_batch, y_batch, ascii_list, one_hots

    def tick_batch_pointer(self):
        self.pointer += 1
        if (self.pointer >= len(self.stroke_data)):
            self.reset_batch_pointer()
    def reset_batch_pointer(self):
        self.idx_perm = np.random.permutation(len(self.stroke_data))
        self.pointer = 0

In [None]:
# utility function for converting input ascii characters into vectors the network can understand.
# index position 0 means "unknown"
def to_one_hot(s, ascii_steps, alphabet):
    steplimit=3e3; s = s[:3e3] if len(s) > 3e3 else s # clip super-long strings
    seq = [alphabet.find(char) + 1 for char in s]
    ascii_steps = int(ascii_steps)
    if len(seq) >= ascii_steps:
      seq = seq[: ascii_steps]
    else:
      seq = seq + [0]*(ascii_steps - len(seq))
    one_hot = np.zeros((ascii_steps,len(alphabet)+1))
    one_hot[np.arange(ascii_steps),seq] = 1
    return one_hot


# model definitions

In [None]:
class Model():
	def __init__(self, args, logger):
		self.logger = logger

		# ----- transfer some of the args params over to the model

		# model params
		self.rnn_size = args.rnn_size
		self.train = args.train
		self.nmixtures = args.nmixtures
		self.kmixtures = args.kmixtures
		self.batch_size = args.batch_size if self.train else 1 # training/sampling specific
		self.tsteps = args.tsteps if self.train else 1 # training/sampling specific
		self.alphabet = args.alphabet
		# training params
		self.dropout = args.dropout
		self.grad_clip = args.grad_clip
		# misc
		self.tsteps_per_ascii = args.tsteps_per_ascii
		self.data_dir = args.data_dir

		self.graves_initializer = tf.truncated_normal_initializer(mean=0., stddev=.075, seed=None, dtype=tf.float32)
		self.window_b_initializer = tf.truncated_normal_initializer(mean=-3.0, stddev=.25, seed=None, dtype=tf.float32) # hacky initialization

		self.logger.write('\tusing alphabet{}'.format(self.alphabet))
		self.char_vec_len = len(self.alphabet) + 1 #plus one for <UNK> token
		self.ascii_steps = args.tsteps/args.tsteps_per_ascii


		# ----- build the basic recurrent network architecture
		cell_func = tf.contrib.rnn.LSTMCell # could be GRUCell or RNNCell
		self.cell0 = cell_func(args.rnn_size, state_is_tuple=True, initializer=self.graves_initializer)
		self.cell1 = cell_func(args.rnn_size, state_is_tuple=True, initializer=self.graves_initializer)
		self.cell2 = cell_func(args.rnn_size, state_is_tuple=True, initializer=self.graves_initializer)

		if (self.train and self.dropout < 1): # training mode
			self.cell0 = tf.contrib.rnn.DropoutWrapper(self.cell0, output_keep_prob = self.dropout)
			self.cell1 = tf.contrib.rnn.DropoutWrapper(self.cell1, output_keep_prob = self.dropout)
			self.cell2 = tf.contrib.rnn.DropoutWrapper(self.cell2, output_keep_prob = self.dropout)

		self.input_data = tf.placeholder(dtype=tf.float32, shape=[None, self.tsteps, 3])
		self.target_data = tf.placeholder(dtype=tf.float32, shape=[None, self.tsteps, 3])
		self.istate_cell0 = self.cell0.zero_state(batch_size=self.batch_size, dtype=tf.float32)
		self.istate_cell1 = self.cell1.zero_state(batch_size=self.batch_size, dtype=tf.float32)
		self.istate_cell2 = self.cell2.zero_state(batch_size=self.batch_size, dtype=tf.float32)

		#slice the input volume into separate vols for each tstep
		inputs = [tf.squeeze(input_, [1]) for input_ in tf.split(self.input_data, self.tsteps, 1)]
		#build cell0 computational graph
		outs_cell0, self.fstate_cell0 = tf.contrib.legacy_seq2seq.rnn_decoder(inputs, self.istate_cell0, self.cell0, loop_function=None, scope='cell0')


	# ----- build the gaussian character window
		def get_window(alpha, beta, kappa, c):
			# phi -> [? x 1 x ascii_steps] and is a tf matrix
			# c -> [? x ascii_steps x alphabet] and is a tf matrix
			ascii_steps = c.get_shape()[1].value #number of items in sequence
			phi = get_phi(ascii_steps, alpha, beta, kappa)
			window = tf.matmul(phi,c)
			window = tf.squeeze(window, [1]) # window ~ [?,alphabet]
			return window, phi

		#get phi for all t,u (returns a [1 x tsteps] matrix) that defines the window
		def get_phi(ascii_steps, alpha, beta, kappa):
			# alpha, beta, kappa -> [?,kmixtures,1] and each is a tf variable
			u = np.linspace(0,ascii_steps-1,ascii_steps) # weight all the U items in the sequence
			kappa_term = tf.square( tf.subtract(kappa,u))
			exp_term = tf.multiply(-beta,kappa_term)
			phi_k = tf.multiply(alpha, tf.exp(exp_term))
			phi = tf.reduce_sum(phi_k,1, keep_dims=True)
			return phi # phi ~ [?,1,ascii_steps]

		def get_window_params(i, out_cell0, kmixtures, prev_kappa, reuse=True):
			hidden = out_cell0.get_shape()[1]
			n_out = 3*kmixtures
			with tf.variable_scope('window',reuse=reuse):
				window_w = tf.get_variable("window_w", [hidden, n_out], initializer=self.graves_initializer)
				window_b = tf.get_variable("window_b", [n_out], initializer=self.window_b_initializer)
			abk_hats = tf.nn.xw_plus_b(out_cell0, window_w, window_b) # abk_hats ~ [?,n_out]
			abk = tf.exp(tf.reshape(abk_hats, [-1, 3*kmixtures,1])) # abk_hats ~ [?,n_out] = "alpha, beta, kappa hats"

			alpha, beta, kappa = tf.split(abk, 3, 1) # alpha_hat, etc ~ [?,kmixtures]
			kappa = kappa + prev_kappa
			return alpha, beta, kappa # each ~ [?,kmixtures,1]

		self.init_kappa = tf.placeholder(dtype=tf.float32, shape=[None, self.kmixtures, 1]) 
		self.char_seq = tf.placeholder(dtype=tf.float32, shape=[None, self.ascii_steps, self.char_vec_len])
		prev_kappa = self.init_kappa
		prev_window = self.char_seq[:,0,:]

		#add gaussian window result
		reuse = False
		for i in range(len(outs_cell0)):
			[alpha, beta, new_kappa] = get_window_params(i, outs_cell0[i], self.kmixtures, prev_kappa, reuse=reuse)
			window, phi = get_window(alpha, beta, new_kappa, self.char_seq)
			outs_cell0[i] = tf.concat((outs_cell0[i],window), 1) #concat outputs
			outs_cell0[i] = tf.concat((outs_cell0[i],inputs[i]), 1) #concat input data
			prev_kappa = new_kappa
			prev_window = window
			reuse = True
		#save some attention mechanism params (useful for sampling/debugging later)
		self.window = window
		self.phi = phi
		self.new_kappa = new_kappa
		self.alpha = alpha


	# ----- finish building LSTMs 2 and 3
		outs_cell1, self.fstate_cell1 = tf.contrib.legacy_seq2seq.rnn_decoder(outs_cell0, self.istate_cell1, self.cell1, loop_function=None, scope='cell1')

		outs_cell2, self.fstate_cell2 = tf.contrib.legacy_seq2seq.rnn_decoder(outs_cell1, self.istate_cell2, self.cell2, loop_function=None, scope='cell2')

	# ----- start building the Mixture Density Network on top (start with a dense layer to predict the MDN params)
		n_out = 1 + self.nmixtures * 6 # params = end_of_stroke + 6 parameters per Gaussian
		with tf.variable_scope('mdn_dense'):
			mdn_w = tf.get_variable("output_w", [self.rnn_size, n_out], initializer=self.graves_initializer)
			mdn_b = tf.get_variable("output_b", [n_out], initializer=self.graves_initializer)

		out_cell2 = tf.reshape(tf.concat(outs_cell2, 1), [-1, args.rnn_size]) #concat outputs for efficiency
		output = tf.nn.xw_plus_b(out_cell2, mdn_w, mdn_b) #data flows through dense nn


	# ----- build mixture density cap on top of second recurrent cell
		def gaussian2d(x1, x2, mu1, mu2, s1, s2, rho):
			# define gaussian mdn (eq 24, 25 from http://arxiv.org/abs/1308.0850)
			x_mu1 = tf.subtract(x1, mu1)
			x_mu2 = tf.subtract(x2, mu2)
			Z = tf.square(tf.div(x_mu1, s1)) + \
			    tf.square(tf.div(x_mu2, s2)) - \
			    2*tf.div(tf.multiply(rho, tf.multiply(x_mu1, x_mu2)), tf.multiply(s1, s2))
			rho_square_term = 1-tf.square(rho)
			power_e = tf.exp(tf.div(-Z,2*rho_square_term))
			regularize_term = 2*np.pi*tf.multiply(tf.multiply(s1, s2), tf.sqrt(rho_square_term))
			gaussian = tf.div(power_e, regularize_term)
			return gaussian

		def get_loss(pi, x1_data, x2_data, eos_data, mu1, mu2, sigma1, sigma2, rho, eos):
			# define loss function (eq 26 of http://arxiv.org/abs/1308.0850)
			gaussian = gaussian2d(x1_data, x2_data, mu1, mu2, sigma1, sigma2, rho)
			term1 = tf.multiply(gaussian, pi)
			term1 = tf.reduce_sum(term1, 1, keep_dims=True) #do inner summation
			term1 = -tf.log(tf.maximum(term1, 1e-20)) # some errors are zero -> numerical errors.

			term2 = tf.multiply(eos, eos_data) + tf.multiply(1-eos, 1-eos_data) #modified Bernoulli -> eos probability
			term2 = -tf.log(term2) #negative log error gives loss

			return tf.reduce_sum(term1 + term2) #do outer summation

		# now transform dense NN outputs into params for MDN
		def get_mdn_coef(Z):
			# returns the tf slices containing mdn dist params (eq 18...23 of http://arxiv.org/abs/1308.0850)
			eos_hat = Z[:, 0:1] #end of sentence tokens
			pi_hat, mu1_hat, mu2_hat, sigma1_hat, sigma2_hat, rho_hat = tf.split(Z[:, 1:], 6, 1)
			self.pi_hat, self.sigma1_hat, self.sigma2_hat = \
										pi_hat, sigma1_hat, sigma2_hat # these are useful for bias method during sampling

			eos = tf.sigmoid(-1*eos_hat) # technically we gained a negative sign
			pi = tf.nn.softmax(pi_hat) # softmax z_pi:
			mu1 = mu1_hat; mu2 = mu2_hat # leave mu1, mu2 as they are
			sigma1 = tf.exp(sigma1_hat); sigma2 = tf.exp(sigma2_hat) # exp for sigmas
			rho = tf.tanh(rho_hat) # tanh for rho (squish between -1 and 1)rrr

			return [eos, pi, mu1, mu2, sigma1, sigma2, rho]

		# reshape target data (as we did the input data)
		flat_target_data = tf.reshape(self.target_data,[-1, 3])
		[x1_data, x2_data, eos_data] = tf.split(flat_target_data, 3, 1) #we might as well split these now

		[self.eos, self.pi, self.mu1, self.mu2, self.sigma1, self.sigma2, self.rho] = get_mdn_coef(output)

		loss = get_loss(self.pi, x1_data, x2_data, eos_data, self.mu1, self.mu2, self.sigma1, self.sigma2, self.rho, self.eos)
		self.cost = loss / (self.batch_size * self.tsteps)

		# ----- bring together all variables and prepare for training
		self.learning_rate = tf.Variable(0.0, trainable=False)
		self.decay = tf.Variable(0.0, trainable=False)
		self.momentum = tf.Variable(0.0, trainable=False)

		tvars = tf.trainable_variables()
		grads, _ = tf.clip_by_global_norm(tf.gradients(self.cost, tvars), self.grad_clip)

		if args.optimizer == 'adam':
			self.optimizer = tf.train.AdamOptimizer(learning_rate=self.learning_rate)
		elif args.optimizer == 'rmsprop':
			self.optimizer = tf.train.RMSPropOptimizer(learning_rate=self.learning_rate, decay=self.decay, momentum=self.momentum)
		else:
			raise ValueError("Optimizer type not recognized")
		self.train_op = self.optimizer.apply_gradients(zip(grads, tvars))

		# ----- some TensorFlow I/O
		self.sess = tf.InteractiveSession()
		self.saver = tf.train.Saver(tf.global_variables())
		self.sess.run(tf.global_variables_initializer())

		# ----- for restoring previous models
	def try_load_model(self, save_path):
		load_was_success = True # yes, I'm being optimistic
		global_step = 0
		try:
			save_dir = '/'.join(save_path.split('/')[:-1])
			ckpt = tf.train.get_checkpoint_state(save_dir)
			load_path = ckpt.model_checkpoint_path
			self.saver.restore(self.sess, load_path)
		except:
			self.logger.write("no saved model to load. starting new session")
			load_was_success = False
		else:
			self.logger.write("loaded model: {}".format(load_path))
			self.saver = tf.train.Saver(tf.global_variables())
			global_step = int(load_path.split('-')[-1])
		return load_was_success, global_step

# model training

In [None]:
def train_model(args):
	logger = Logger(args) # make logging utility
	logger.write("\nTRAINING MODE...")
	logger.write("{}\n".format(args))
	logger.write("loading data...")
	data_loader = DataLoader(args, logger=logger)
	
	logger.write("building model...")
	model = Model(args, logger=logger)

	logger.write("attempt to load saved model...")
	load_was_success, global_step = model.try_load_model(args.save_path)

	v_x, v_y, v_s, v_c = data_loader.validation_data()
	valid_inputs = {model.input_data: v_x, model.target_data: v_y, model.char_seq: v_c}

	logger.write("training...")
	model.sess.run(tf.assign(model.decay, args.decay ))
	model.sess.run(tf.assign(model.momentum, args.momentum ))
	running_average = 0.0 ; remember_rate = 0.99
	for e in range(global_step//args.nbatches, args.nepochs):
		model.sess.run(tf.assign(model.learning_rate, args.learning_rate * (args.lr_decay ** e)))
		logger.write("learning rate: {}".format(model.learning_rate.eval()))

		c0, c1, c2 = model.istate_cell0.c.eval(), model.istate_cell1.c.eval(), model.istate_cell2.c.eval()
		h0, h1, h2 = model.istate_cell0.h.eval(), model.istate_cell1.h.eval(), model.istate_cell2.h.eval()
		kappa = np.zeros((args.batch_size, args.kmixtures, 1))

		for b in range(global_step%args.nbatches, args.nbatches):
			
			i = e * args.nbatches + b
			if global_step is not 0 : i+=1 ; global_step = 0

			if i % args.save_every == 0 and (i > 0):
				model.saver.save(model.sess, args.save_path, global_step = i) ; logger.write('SAVED MODEL')

			start = time.time()
			x, y, s, c = data_loader.next_batch()

			feed = {model.input_data: x, model.target_data: y, model.char_seq: c, model.init_kappa: kappa, \
					model.istate_cell0.c: c0, model.istate_cell1.c: c1, model.istate_cell2.c: c2, \
					model.istate_cell0.h: h0, model.istate_cell1.h: h1, model.istate_cell2.h: h2}

			[train_loss, _] = model.sess.run([model.cost, model.train_op], feed)
			feed.update(valid_inputs)
			feed[model.init_kappa] = np.zeros((args.batch_size, args.kmixtures, 1))
			[valid_loss] = model.sess.run([model.cost], feed)
			
			running_average = running_average*remember_rate + train_loss*(1-remember_rate)

			end = time.time()
			if i % 10 is 0: logger.write("{}/{}, loss = {:.3f}, regloss = {:.5f}, valid_loss = {:.3f}, time = {:.3f}" \
				.format(i, args.nepochs * args.nbatches, train_loss, running_average, valid_loss, end - start) )

# sample creation

In [None]:
from sample import *
def sample_model(args, logger=None):
	if args.text == '':
		strings = ['call me ishmael some years ago', 'A project by Sam Greydanus', 'mmm mmm mmm mmm mmm mmm mmm', \
			'What I cannot create I do not understand', 'You know nothing Jon Snow'] # test strings
	else:
		strings = [args.text]

	logger = Logger(args) if logger is None else logger # instantiate logger
	logger.write("\nSAMPLING MODE...")
	logger.write("loading data...")
	
	logger.write("building model...")
	model = Model(args, logger)

	logger.write("attempt to load saved model...")
	load_was_success, global_step = model.try_load_model(args.save_path)

	if load_was_success:
		for s in strings:
			strokes, phis, windows, kappas = sample(s, model, args)

			w_save_path = '{}figures/iter-{}-w-{}'.format(args.log_dir, global_step, s[:10].replace(' ', '_'))
			g_save_path = '{}figures/iter-{}-g-{}'.format(args.log_dir, global_step, s[:10].replace(' ', '_'))
			l_save_path = '{}figures/iter-{}-l-{}'.format(args.log_dir, global_step, s[:10].replace(' ', '_'))

			window_plots(phis, windows, save_path=w_save_path)
			gauss_plot(strokes, 'Heatmap for "{}"'.format(s), figsize = (2*len(s),4), save_path=g_save_path)
			line_plot(strokes, 'Line plot for "{}"'.format(s), figsize = (len(s),2), save_path=l_save_path)

			# make sure that kappas are reasonable
			logger.write( "kappas: \n{}".format(str(kappas[min(kappas.shape[0]-1, args.tsteps_per_ascii),:])) )
	else:
		logger.write("load failed, sampling canceled")

	if True:
		tf.reset_default_graph()
		time.sleep(args.sleep_time)
		sample_model(args, logger=logger)

In [None]:
tf.reset_default_graph()
#importlib.reload(utils.py)
#from utils import *
class Args():
  def __init__(self):
    #general model params
    self.train=True
    self.rnn_size = 100
    self.tsteps = 150
    self.nmixtures = 8

    #indow params
    self.kmixtures = 1
    self.alphabet=" abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ"
    self.tsteps_per_ascii = 25

    #training params
    self.batch_size=32
    self.nbatches=int(500)
    self.nepochs=int(250)
    self.dropout = 0.85


    self.grad_clip = 10.
    self.optimizer='rmsprop'
    self.learning_rate=1e-4
    self.lr_decay=1.0
    self.data_dir = './data'
    self.decay=0.95
    self.momentum = 0.9


    self.log_dir='./logs'
    self.data_scale=50
    self.data_dir='./data'
    self.save_path='./model2/savedModel.ckpt'
    self.save_every=1000
    

    self.text="Hello this is Anu"
    self.bias=1.0
    self.sleep_time=60*5
    self.style=-1




args = Args()
train_model(args)
#sample_model(args)


TRAINING MODE...
<__main__.Args object at 0x7f1ad473c190>

loading data...
	loaded dataset:
		11262 train individual data points
		592 valid individual data points
		351 batches
building model...
	using alphabet abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ




attempt to load saved model...
no saved model to load. starting new session
training...
learning rate: 9.999999747378752e-05
0/125000, loss = 3.090, regloss = 0.03090, valid_loss = 3.061, time = 46.975
10/125000, loss = 3.106, regloss = 0.31705, valid_loss = 3.056, time = 0.446
20/125000, loss = 3.039, regloss = 0.57752, valid_loss = 3.046, time = 0.454
30/125000, loss = 2.912, regloss = 0.81331, valid_loss = 3.033, time = 0.446
40/125000, loss = 3.039, regloss = 1.02522, valid_loss = 3.014, time = 0.823
50/125000, loss = 2.982, regloss = 1.21226, valid_loss = 2.990, time = 0.444
60/125000, loss = 3.012, regloss = 1.37867, valid_loss = 2.960, time = 0.446
70/125000, loss = 2.879, regloss = 1.52687, valid_loss = 2.917, time = 0.447
80/125000, loss = 2.799, regloss = 1.65591, valid_loss = 2.855, time = 0.459
90/125000, loss = 2.761, regloss = 1.76637, valid_loss = 2.763, time = 0.446
100/125000, loss = 2.564, regloss = 1.85541, valid_loss = 2.612, time = 0.449
110/125000, loss = 2.413, r

KeyboardInterrupt: ignored

In [None]:
!cp -r '/content/model2' "/content/drive/MyDrive/Colab Notebooks/Handwriting Generation/model2"