Skip to content

Commit

Permalink
Trainable model. Some updates and fixes.
Browse files Browse the repository at this point in the history
  • Loading branch information
emreaksan committed Jul 16, 2018
1 parent 691d126 commit d6a26c5
Show file tree
Hide file tree
Showing 13 changed files with 284 additions and 291 deletions.
1 change: 1 addition & 0 deletions .gitignore
Expand Up @@ -106,3 +106,4 @@ ENV/
# Training
runs/
runs_evaluation/
data/
9 changes: 6 additions & 3 deletions README.md
Expand Up @@ -10,16 +10,19 @@ We collected data from 94 authors by using [IAMOnDB](http://www.fki.inf.unibe.ch

## Pretrained Model
1. You can download a pretrained model from [our project page](https://ait.ethz.ch/projects/2018/deepwriting/downloads/tf-1514981744-deepwriting_synthesis_model.tar.gz).
2. Either move under `<repository_path>/runs/` or update `validation data path.` in config.json.
2. Either move under `<repository_path>/runs/` or update `validation data path.` in config.json.
3. You can run
```
python tf_evaluate_hw.py -S <path_to_model_folder> -M tf-1514981744-deepwriting_synthesis_model -QL
```


## Dependencies
1. Numpy
1. numpy
2. Tensorflow 1.2+ (not sure if earlier versions work.)
3. Matplotlib
3. matplotlib
4. OpenCV (pip install opencv-python is enough)
5. svgwrite
6. scipy
7. sklearn
8. Pillow
53 changes: 28 additions & 25 deletions config.py
@@ -1,23 +1,25 @@
import tensorflow as tf
import numpy as np


def main():
config = {}
config = dict()
config['seed'] = 17
tf.set_random_seed(17)

config['training_data'] = './data/dataset_scaled_word_300_eoc_split_old_training.npz'
config['validation_data'] = './data/dataset_scaled_word_300_eoc_split_old_validation.npz'
config['training_data'] = './data/deepwriting_training.npz'
config['validation_data'] = './data/deepwriting_validation.npz'
config['validate_model'] = False

config['model_save_dir'] = './runs/'

config['checkpoint_every_step'] = 1000
config['validate_every_step'] = 25 # validation performance
config['img_summary_every_step'] = 100 # tf_summary
config['print_every_step'] = 2 # print
config['validate_every_step'] = 25 # validation performance
# Model predictions are converted into images and displayed in Tensorboard. Set 0 to disable image summaries.
config['img_summary_every_step'] = 0
config['print_every_step'] = 2 # print

config['reduce_loss'] = "mean_per_step" # "mean" "sum_mean", "mean", "sum".
config['reduce_loss'] = "mean_per_step" # "mean" "sum_mean", "mean", "sum".
config['batch_size'] = 64
config['num_epochs'] = 200
config['learning_rate'] = 1e-3
Expand All @@ -26,7 +28,7 @@ def main():
config['learning_rate_decay_rate'] = 0.96

config['create_timeline'] = False
config['tensorboard_verbose'] = 0 # 1 for histogram summaries and 2 for latent space norms.
config['tensorboard_verbose'] = 0 # 1 for histogram summaries and 2 for latent space norms.
config['use_dynamic_rnn'] = True
config['use_bucket_feeder'] = True
config['use_staging_area'] = True
Expand All @@ -43,7 +45,7 @@ def main():
#
config['output'] = {}
config['output']['keys'] = ['out_mu', 'out_sigma', 'out_rho', 'out_pen', 'out_eoc']
config['output']['dims'] = [2, 2, 1, 1, 1] # Ideally these should be set by the model.
config['output']['dims'] = [2, 2, 1, 1, 1] # Ideally these should be set by the model.
config['output']['activation_funcs'] = [None, 'softplus', 'tanh', 'sigmoid', 'sigmoid']

config['latent_rnn'] = {} # See get_rnn_cell function in tf_model_utils.
Expand Down Expand Up @@ -85,48 +87,49 @@ def main():
config['latent_hidden_size'] = 512
config['latent_size'] = 32

config['num_gmm_components'] = 70 # We have 70 characters in our alphabet.
config['num_gmm_components'] = 70 # We have 70 characters in our alphabet.
config['gmm_component_size'] = 32

config['reconstruction_loss'] = "nll_normal" # "mse", "l1"
config['loss_weights'] = {'reconstruction_loss': 1, 'kld_loss': 1, 'pen_loss': 1, 'eoc_loss': 1, 'gmm_sigma_regularizer':None, 'classification_loss':1}
config['reconstruction_loss'] = "nll_normal_bi" # "nll_normal_diag", "nll_normal_bi", "mse", "l1"
config['loss_weights'] = {'reconstruction_loss': 1, 'kld_loss': 1, 'pen_loss': 1, 'eoc_loss': 1, 'gmm_sigma_regularizer': None, 'classification_loss': 1}

config['experiment_name'] = "deepwriting-synthesis_model"
config['experiment_name'] = "deepwriting-synthesis_model-bivariate"

return config


def classifier():
config = {}
config = dict()
config['seed'] = 17
tf.set_random_seed(17)

config['training_data'] = './data/dataset_scaled_word_300_eoc_split_old_training.npz'
config['validation_data'] = './data/dataset_scaled_word_300_eoc_split_old_validation.npz'
config['training_data'] = './data/deepwriting_training.npz'
config['validation_data'] = './data/deepwriting_validation.npz'
config['validate_model'] = True

config['model_save_dir'] = './runs/'

config['checkpoint_every_step'] = 1000
config['validate_every_step'] = 100 # validation performance
config['print_every_step'] = 2 # print
config['validate_every_step'] = 100 # validation performance
config['print_every_step'] = 2 # print

config['reduce_loss'] = "mean_per_step" #"mean_per_step" "sum_mean", "mean", "sum".
config['reduce_loss'] = "mean_per_step" # "mean_per_step" "sum_mean", "mean", "sum".
config['batch_size'] = 64
config['num_epochs'] = 15
config['learning_rate'] = 9e-4
config['learning_rate_type'] = 'exponential' # 'fixed' # 'exponential'
config['learning_rate_decay_steps'] = 1000
config['learning_rate_decay_rate'] = 0.93

config['tensorboard_verbose'] = 1 # 1 for histogram summaries and 2 for latent space norms.
config['tensorboard_verbose'] = 1 # 1 for histogram summaries and 2 for latent space norms.
config['use_dynamic_rnn'] = True
config['use_bucket_feeder'] = True
config['use_staging_area'] = True

config['grad_clip_by_norm'] = 1 # If it is 0, then gradient clipping will not be applied.
config['grad_clip_by_value'] = 0 # If it is 0, then gradient clipping will not be applied.

config['model_cls'] = 'BiDirectionalRNNClassifier' #'RNNClassifier', 'BiDirectionalRNNClassifier
config['model_cls'] = 'BiDirectionalRNNClassifier' # 'RNNClassifier', 'BiDirectionalRNNClassifier
config['dataset_cls'] = 'HandWritingClassificationDataset'

config['use_bow_labels'] = True
Expand All @@ -136,7 +139,7 @@ def classifier():
if config['input_layer'] == {}:
config['input_layer']['num_layers'] = 1 # number of fully connected (FC) layers on top of RNN.
config['input_layer']['size'] = 256 # number of FC neurons.
config['input_layer']['activation_fn'] = 'relu' # type of activation function after each FC layer.
config['input_layer']['activation_fn'] = 'relu' # type of activation function after each FC layer.

config['rnn_layer'] = {} # See get_rnn_cell function in tf_model_utils.
config['rnn_layer']['num_layers'] = 4 # (default: 1)
Expand All @@ -145,13 +148,13 @@ def classifier():
config['rnn_layer']['stack_fw_bw_cells'] = True # (default: True). Only used in bidirectional models.

config['output_layer'] = {}
config['output_layer']['num_layers'] = 1 # number of FC layers on top of RNN.
config['output_layer']['size'] = 256 # number of FC neurons.
config['output_layer']['num_layers'] = 1 # number of FC layers on top of RNN.
config['output_layer']['size'] = 256 # number of FC neurons.
config['output_layer']['activation_fn'] = 'relu' # type of activation function after each FC layer.
# Predictions, i.e., outputs of the model.
config['output_layer'] = {}
config['output_layer']['out_keys'] = ['out_char', 'out_eoc', 'out_bow']
config['output_layer']['out_dims'] = None # If set None, then dataset.target_dims will be used.
config['output_layer']['out_dims'] = None # If set None, then dataset.target_dims will be used.
config['output_layer']['out_activation_fn'] = [None, 'sigmoid', 'sigmoid']

config['loss_weights'] = {'classification_loss': 1, 'eoc_loss': 1, 'bow_loss': 1}
Expand Down
Binary file not shown.
4 changes: 1 addition & 3 deletions dataset_hw.py
Expand Up @@ -376,13 +376,12 @@ def __init__(self, data_path, var_len_seq=None, use_bow_labels=False, data_augme
if self.bow_target:
self.target_dims = [self.alphabet_size, 1, 1] # char_labels, end-of-character, sow
else:
self.target_dims = [self.alphabet_size, 1] #char_labels, end-of-character
self.target_dims = [self.alphabet_size, 1] # char_labels, end-of-character

# sequence length, strokes, targets
# The dimensions with None will be padded if sequence_length isn't passed.
self.sample_shape = [[], [self.sequence_length, sum(self.input_dims)], [self.sequence_length, sum(self.target_dims)]]


def sample_generator(self):
"""
Creates a generator object which returns one data sample at a time. It is used by DataFeeder objects.
Expand All @@ -407,7 +406,6 @@ def sample_generator(self):
stroke_augmented = stroke
yield [stroke.shape[0], stroke_augmented, np.float32(np.hstack([char_label, np.expand_dims(eoc_label,-1)]))]


def fetch_sample(self, sample_idx):
"""
Prepares one data sample (i.e. return of sample_generator) given index.
Expand Down
7 changes: 3 additions & 4 deletions source/tf_data_feeder.py
Expand Up @@ -43,7 +43,6 @@ def __init__(self, dataset, num_epochs, batch_size=16, queue_capacity=512, shuff
self.enqueue_op = self.input_queue.enqueue(self.queue_placeholders)
self.dequeue_op = self.input_queue.dequeue()


def batch_queue(self, dynamic_pad=True, queue_capacity=512, queue_threads=4, name="batch_generator"):
"""
A plain feeder is used and range of sequence lengths in a batch will be arbitrary.
Expand Down Expand Up @@ -95,7 +94,6 @@ def batch_queue_bucket(self, buckets, dynamic_pad=True, queue_capacity=128, queu
name=name)
return self.batch


def __enqueue(self, tf_session, tf_coord):
"""
while (self.epoch < self.num_epochs) and (not self.terminated):
Expand All @@ -116,11 +114,12 @@ def __enqueue(self, tf_session, tf_coord):
except tf.errors.CancelledError:
pass


def init(self, tf_session, tf_coord):
# TODO: it is not multi-threaded.
self.enqueue_threads = threading.Thread(target=self.__enqueue, args=[tf_session, tf_coord])
self.enqueue_threads.start()


class TFStagingArea(object):

def __init__(self, tensors, device_name=None):
Expand All @@ -141,4 +140,4 @@ def preload_op(self):

@property
def tensors(self):
return self._tensors
return self._tensors
12 changes: 6 additions & 6 deletions source/tf_loss.py
@@ -1,6 +1,7 @@
import tensorflow as tf
import numpy as np


def logli_normal_bivariate(x, mu, sigma, rho, reduce_sum=False):
"""
Bivariate Gaussian log-likelihood. Rank of arguments is expected to be 3.
Expand Down Expand Up @@ -36,9 +37,10 @@ def logli_normal_bivariate(x, mu, sigma, rho, reduce_sum=False):
else:
return tf.reduce_sum(result, reduce_sum)

def logli_normal_isotropic(x, mu, sigma, reduce_sum=False):

def logli_normal_diag_cov(x, mu, sigma, reduce_sum=False):
"""
Isotropic Gaussian log-likelihood.
Log-likelihood of Gaussian with diagonal covariance matrix.
Args:
x:
Expand All @@ -49,7 +51,7 @@ def logli_normal_isotropic(x, mu, sigma, reduce_sum=False):
Returns:
"""
with tf.name_scope('logli_normal_isotropic'):
with tf.name_scope('logli_normal_diag_cov'):
ssigma2 = tf.maximum(1e-6, tf.square(sigma)*2)
denom_log = tf.log(tf.sqrt(np.pi * ssigma2))
norm = tf.square(tf.subtract(x, mu))
Expand All @@ -75,9 +77,7 @@ def logli_bernoulli(x, theta, reduce_sum=False):
"""
with tf.name_scope('logli_bernoulli'):
# return tf.reduce_sum(tf.reduce_mean(tf.multiply(y, tf.log(tf.maximum(1e-9, theta))) + tf.multiply((1-y), tf.log(tf.maximum(1e-9, 1-theta))), 2), 1)
result = (
tf.multiply(x, tf.log(tf.maximum(1e-9, theta))) + tf.multiply((1 - x), tf.log(tf.maximum(1e-9, 1 - theta))))
result = (tf.multiply(x, tf.log(tf.maximum(1e-9, theta))) + tf.multiply((1 - x), tf.log(tf.maximum(1e-9, 1 - theta))))

if reduce_sum is False:
return result
Expand Down
32 changes: 14 additions & 18 deletions source/tf_models.py
Expand Up @@ -14,6 +14,7 @@
be implemented by inheriting from the vanilla architecture.
"""


class VRNN():
def __init__(self, config, input_op, input_seq_length_op, target_op, input_dims, target_dims, reuse, batch_size=-1, mode="training"):

Expand Down Expand Up @@ -139,21 +140,21 @@ def build_loss(self):
# TODO: Use dataset object to parse the concatenated targets.
targets_mu = self.target_pieces[0]

if not self.reconstruction_loss_key in self.ops_loss:
if self.reconstruction_loss_key not in self.ops_loss:
with tf.name_scope('reconstruction_loss'):
if self.reconstruction_loss == 'nll_normal':
# Gaussian log likelihood loss.
self.ops_loss[self.reconstruction_loss_key] = -self.reconstruction_loss_weight*self.reduce_loss_func(self.seq_loss_mask*tf_loss.logli_normal_isotropic(targets_mu, self.out_mu, self.out_sigma, reduce_sum=False))
# Gaussian log likelihood loss.
if self.reconstruction_loss == 'nll_normal_iso':
self.ops_loss[self.reconstruction_loss_key] = -self.reconstruction_loss_weight*self.reduce_loss_func(self.seq_loss_mask*tf_loss.logli_normal_diag_cov(targets_mu, self.out_mu, self.out_sigma, reduce_sum=False))
# L1 norm.
elif self.reconstruction_loss == "l1":
# L1 norm.
self.ops_loss[self.reconstruction_loss_key] = self.reconstruction_loss_weight*self.reduce_loss_func(self.seq_loss_mask*tf.losses.absolute_difference(targets_mu, self.out_mu, reduction='none'))
# Mean-squared error.
elif self.reconstruction_loss == "mse":
# Mean-squared error.
self.ops_loss[self.reconstruction_loss_key] = self.reconstruction_loss_weight*self.reduce_loss_func(self.seq_loss_mask*tf.losses.mean_squared_error(targets_mu, self.out_mu, reduction='none'))
else:
raise Exception("Undefined loss.")

if not "loss_kld" in self.ops_loss:
if "loss_kld" not in self.ops_loss:
with tf.name_scope('kld_loss'):
self.ops_loss['loss_kld'] = self.kld_loss_weight*self.reduce_loss_func(self.seq_loss_mask*tf_loss.kld_normal_isotropic(self.q_mu, self.q_sigma, self.p_mu, self.p_sigma, reduce_sum=False))

Expand All @@ -165,7 +166,6 @@ def accumulate_loss(self):
self.loss += loss_op
self.ops_loss['total_loss'] = self.loss


def log_loss(self, eval_loss, step=0, epoch=0, time_elapsed=None, prefix=""):
loss_format = prefix + "{}/{} \t Total: {:.4f} \t"
loss_entries = [step, epoch, eval_loss['total_loss']]
Expand All @@ -181,7 +181,6 @@ def log_loss(self, eval_loss, step=0, epoch=0, time_elapsed=None, prefix=""):
else:
print(loss_format.format(*loss_entries))


def log_num_parameters(self):
num_param = 0
for v in tf.global_variables():
Expand All @@ -190,7 +189,6 @@ def log_num_parameters(self):
self.num_parameters = num_param
print("# of parameters: " + str(num_param))


def create_summary_plots(self):
"""
Creates scalar summaries for loss plots. Iterates through `ops_loss` member and create a summary entry.
Expand Down Expand Up @@ -319,7 +317,6 @@ def sample_unbiased(self, session, seq_len=500, ops_eval=None, **kwargs):
eval_results = session.run(eval_op_list, feed)
return eval_results


def sample_biased(self, session, seq_len, prev_state, prev_sample=None, ops_eval=None, **kwargs):
"""
Initializes the model by using state of a real sample.
Expand Down Expand Up @@ -441,22 +438,21 @@ def build_predictions_layer(self):
# Mask for precise loss calculation.
self.seq_loss_mask = tf.expand_dims(tf.sequence_mask(lengths=self.input_seq_length, maxlen=tf.reduce_max(self.input_seq_length), dtype=tf.float32), -1)


def build_loss(self):
if self.is_training or self.is_validation:
# TODO: Use dataset object to parse the concatenated targets.
targets_mu = self.target_pieces[0]

if not self.reconstruction_loss_key in self.ops_loss:
if self.reconstruction_loss_key not in self.ops_loss:
with tf.name_scope('reconstruction_loss'):
if self.reconstruction_loss == 'nll_normal':
# Gaussian log likelihood loss.
self.ops_loss[self.reconstruction_loss_key] = -self.reconstruction_loss_weight*self.reduce_loss_func(self.seq_loss_mask*tf_loss.logli_normal_isotropic(targets_mu, self.out_mu, self.out_sigma, reduce_sum=False))
# Gaussian log likelihood loss.
if self.reconstruction_loss == 'nll_normal_iso':
self.ops_loss[self.reconstruction_loss_key] = -self.reconstruction_loss_weight*self.reduce_loss_func(self.seq_loss_mask*tf_loss.logli_normal_diag_cov(targets_mu, self.out_mu, self.out_sigma, reduce_sum=False))
# L1 norm.
elif self.reconstruction_loss == "l1":
# L1 norm.
self.ops_loss[self.reconstruction_loss_key] = self.reconstruction_loss_weight*self.reduce_loss_func(self.seq_loss_mask*tf.losses.absolute_difference(targets_mu, self.out_mu, reduction='none'))
# Mean-squared error.
elif self.reconstruction_loss == "mse":
# Mean-squared error.
self.ops_loss[self.reconstruction_loss_key] = self.reconstruction_loss_weight*self.reduce_loss_func(self.seq_loss_mask*tf.losses.mean_squared_error(targets_mu, self.out_mu, reduction='none'))
else:
raise Exception("Undefined loss.")
Expand Down
1 change: 1 addition & 0 deletions tf_dataset_hw.py
Expand Up @@ -11,6 +11,7 @@ def __init__(self, data_path, var_len_seq=None):
# Add tensorflow data types.
self.sample_tf_type = [tf.int32, tf.float32, tf.float32]


class HandWritingDatasetConditionalTF(HandWritingDatasetConditional):
"""
Tensorflow extension to HandWritingDataset class.
Expand Down

0 comments on commit d6a26c5

Please sign in to comment.