Trainable model. Some updates and fixes.

emreaksan · Jul 16, 2018 · d6a26c5 · d6a26c5
1 parent 691d126
commit d6a26c5
Show file tree

Hide file tree

Showing 13 changed files with 284 additions and 291 deletions.
diff --git a/.gitignore b/.gitignore
@@ -106,3 +106,4 @@ ENV/
 # Training
 runs/
 runs_evaluation/
+data/
diff --git a/README.md b/README.md
@@ -10,16 +10,19 @@ We collected data from 94 authors by using [IAMOnDB](http://www.fki.inf.unibe.ch
 
 ## Pretrained Model
 1. You can download a pretrained model from [our project page](https://ait.ethz.ch/projects/2018/deepwriting/downloads/tf-1514981744-deepwriting_synthesis_model.tar.gz).
-2. Either move under `<repository_path>/runs/` or update `validation data path.` in config.json. 
+2. Either move under `<repository_path>/runs/` or update `validation data path.` in config.json.
 3. You can run
 ```
 python tf_evaluate_hw.py -S <path_to_model_folder> -M tf-1514981744-deepwriting_synthesis_model -QL
 ```
 
 
 ## Dependencies
-1. Numpy
+1. numpy
 2. Tensorflow 1.2+ (not sure if earlier versions work.)
-3. Matplotlib
+3. matplotlib
 4. OpenCV (pip install opencv-python is enough)
 5. svgwrite
+6. scipy
+7. sklearn
+8. Pillow
diff --git a/config.py b/config.py
@@ -1,23 +1,25 @@
 import tensorflow as tf
 import numpy as np
 
+
 def main():
-    config = {}
+    config = dict()
     config['seed'] = 17
     tf.set_random_seed(17)
 
-    config['training_data'] = './data/dataset_scaled_word_300_eoc_split_old_training.npz'
-    config['validation_data'] = './data/dataset_scaled_word_300_eoc_split_old_validation.npz'
+    config['training_data'] = './data/deepwriting_training.npz'
+    config['validation_data'] = './data/deepwriting_validation.npz'
     config['validate_model'] = False
 
     config['model_save_dir'] = './runs/'
 
     config['checkpoint_every_step'] = 1000
-    config['validate_every_step'] = 25 # validation performance
-    config['img_summary_every_step'] = 100   # tf_summary
-    config['print_every_step'] = 2 # print
+    config['validate_every_step'] = 25  # validation performance
+    # Model predictions are converted into images and displayed in Tensorboard. Set 0 to disable image summaries.
+    config['img_summary_every_step'] = 0
+    config['print_every_step'] = 2  # print
 
-    config['reduce_loss'] = "mean_per_step" # "mean" "sum_mean", "mean", "sum".
+    config['reduce_loss'] = "mean_per_step"  # "mean" "sum_mean", "mean", "sum".
     config['batch_size'] = 64
     config['num_epochs'] = 200
     config['learning_rate'] = 1e-3
@@ -26,7 +28,7 @@ def main():
     config['learning_rate_decay_rate'] = 0.96
 
     config['create_timeline'] = False
-    config['tensorboard_verbose'] = 0 # 1 for histogram summaries and 2 for latent space norms.
+    config['tensorboard_verbose'] = 0  # 1 for histogram summaries and 2 for latent space norms.
     config['use_dynamic_rnn'] = True
     config['use_bucket_feeder'] = True
     config['use_staging_area'] = True
@@ -43,7 +45,7 @@ def main():
     #
     config['output'] = {}
     config['output']['keys'] = ['out_mu', 'out_sigma', 'out_rho', 'out_pen', 'out_eoc']
-    config['output']['dims'] = [2, 2, 1, 1, 1] # Ideally these should be set by the model.
+    config['output']['dims'] = [2, 2, 1, 1, 1]  # Ideally these should be set by the model.
     config['output']['activation_funcs'] = [None, 'softplus', 'tanh', 'sigmoid', 'sigmoid']
 
     config['latent_rnn'] = {}                       # See get_rnn_cell function in tf_model_utils.
@@ -85,48 +87,49 @@ def main():
     config['latent_hidden_size'] = 512
     config['latent_size'] = 32
 
-    config['num_gmm_components'] = 70 # We have 70 characters in our alphabet.
+    config['num_gmm_components'] = 70  # We have 70 characters in our alphabet.
     config['gmm_component_size'] = 32
 
-    config['reconstruction_loss'] = "nll_normal"  # "mse", "l1"
-    config['loss_weights'] = {'reconstruction_loss': 1, 'kld_loss': 1, 'pen_loss': 1, 'eoc_loss': 1, 'gmm_sigma_regularizer':None, 'classification_loss':1}
+    config['reconstruction_loss'] = "nll_normal_bi"  # "nll_normal_diag", "nll_normal_bi", "mse", "l1"
+    config['loss_weights'] = {'reconstruction_loss': 1, 'kld_loss': 1, 'pen_loss': 1, 'eoc_loss': 1, 'gmm_sigma_regularizer': None, 'classification_loss': 1}
 
-    config['experiment_name'] = "deepwriting-synthesis_model"
+    config['experiment_name'] = "deepwriting-synthesis_model-bivariate"
 
     return config
 
+
 def classifier():
-    config = {}
+    config = dict()
     config['seed'] = 17
     tf.set_random_seed(17)
 
-    config['training_data'] = './data/dataset_scaled_word_300_eoc_split_old_training.npz'
-    config['validation_data'] = './data/dataset_scaled_word_300_eoc_split_old_validation.npz'
+    config['training_data'] = './data/deepwriting_training.npz'
+    config['validation_data'] = './data/deepwriting_validation.npz'
     config['validate_model'] = True
 
     config['model_save_dir'] = './runs/'
 
     config['checkpoint_every_step'] = 1000
-    config['validate_every_step'] = 100 # validation performance
-    config['print_every_step'] = 2 # print
+    config['validate_every_step'] = 100  # validation performance
+    config['print_every_step'] = 2  # print
 
-    config['reduce_loss'] = "mean_per_step" #"mean_per_step" "sum_mean", "mean", "sum".
+    config['reduce_loss'] = "mean_per_step"  # "mean_per_step" "sum_mean", "mean", "sum".
     config['batch_size'] = 64
     config['num_epochs'] = 15
     config['learning_rate'] = 9e-4
     config['learning_rate_type'] = 'exponential'  # 'fixed'  # 'exponential'
     config['learning_rate_decay_steps'] = 1000
     config['learning_rate_decay_rate'] = 0.93
 
-    config['tensorboard_verbose'] = 1 # 1 for histogram summaries and 2 for latent space norms.
+    config['tensorboard_verbose'] = 1  # 1 for histogram summaries and 2 for latent space norms.
     config['use_dynamic_rnn'] = True
     config['use_bucket_feeder'] = True
     config['use_staging_area'] = True
 
     config['grad_clip_by_norm'] = 1  # If it is 0, then gradient clipping will not be applied.
     config['grad_clip_by_value'] = 0  # If it is 0, then gradient clipping will not be applied.
 
-    config['model_cls'] = 'BiDirectionalRNNClassifier' #'RNNClassifier', 'BiDirectionalRNNClassifier
+    config['model_cls'] = 'BiDirectionalRNNClassifier'  # 'RNNClassifier', 'BiDirectionalRNNClassifier
     config['dataset_cls'] = 'HandWritingClassificationDataset'
 
     config['use_bow_labels'] = True
@@ -136,7 +139,7 @@ def classifier():
     if config['input_layer'] == {}:
         config['input_layer']['num_layers'] = 1  # number of fully connected (FC) layers on top of RNN.
         config['input_layer']['size'] = 256  # number of FC neurons.
-        config['input_layer']['activation_fn'] = 'relu' # type of activation function after each FC layer.
+        config['input_layer']['activation_fn'] = 'relu'  # type of activation function after each FC layer.
 
     config['rnn_layer'] = {}  # See get_rnn_cell function in tf_model_utils.
     config['rnn_layer']['num_layers'] = 4  # (default: 1)
@@ -145,13 +148,13 @@ def classifier():
     config['rnn_layer']['stack_fw_bw_cells'] = True  # (default: True). Only used in bidirectional models.
 
     config['output_layer'] = {}
-    config['output_layer']['num_layers'] = 1 # number of FC layers on top of RNN.
-    config['output_layer']['size'] = 256 # number of FC neurons.
+    config['output_layer']['num_layers'] = 1  # number of FC layers on top of RNN.
+    config['output_layer']['size'] = 256  # number of FC neurons.
     config['output_layer']['activation_fn'] = 'relu'  # type of activation function after each FC layer.
     # Predictions, i.e., outputs of the model.
     config['output_layer'] = {}
     config['output_layer']['out_keys'] = ['out_char', 'out_eoc', 'out_bow']
-    config['output_layer']['out_dims'] = None # If set None, then dataset.target_dims will be used.
+    config['output_layer']['out_dims'] = None  # If set None, then dataset.target_dims will be used.
     config['output_layer']['out_activation_fn'] = [None, 'sigmoid', 'sigmoid']
 
     config['loss_weights'] = {'classification_loss': 1, 'eoc_loss': 1, 'bow_loss': 1}

diff --git a/data/dataset_scaled_word_300_eoc_split_old_validation.npz b/data/dataset_scaled_word_300_eoc_split_old_validation.npz
diff --git a/dataset_hw.py b/dataset_hw.py
@@ -376,13 +376,12 @@ def __init__(self, data_path, var_len_seq=None, use_bow_labels=False, data_augme
         if self.bow_target:
             self.target_dims = [self.alphabet_size, 1, 1]  # char_labels, end-of-character, sow
         else:
-            self.target_dims = [self.alphabet_size, 1] #char_labels, end-of-character
+            self.target_dims = [self.alphabet_size, 1] # char_labels, end-of-character
 
         # sequence length, strokes, targets
         # The dimensions with None will be padded if sequence_length isn't passed.
         self.sample_shape = [[], [self.sequence_length, sum(self.input_dims)], [self.sequence_length, sum(self.target_dims)]]
 
-
     def sample_generator(self):
         """
         Creates a generator object which returns one data sample at a time. It is used by DataFeeder objects.
@@ -407,7 +406,6 @@ def sample_generator(self):
                     stroke_augmented = stroke
                 yield [stroke.shape[0], stroke_augmented, np.float32(np.hstack([char_label, np.expand_dims(eoc_label,-1)]))]
 
-
     def fetch_sample(self, sample_idx):
         """
         Prepares one data sample (i.e. return of sample_generator) given index.

diff --git a/source/tf_data_feeder.py b/source/tf_data_feeder.py
@@ -43,7 +43,6 @@ def __init__(self, dataset, num_epochs, batch_size=16, queue_capacity=512, shuff
         self.enqueue_op = self.input_queue.enqueue(self.queue_placeholders)
         self.dequeue_op = self.input_queue.dequeue()
 
-
     def batch_queue(self, dynamic_pad=True, queue_capacity=512, queue_threads=4, name="batch_generator"):
         """
         A plain feeder is used and range of sequence lengths in a batch will be arbitrary.
@@ -95,7 +94,6 @@ def batch_queue_bucket(self, buckets, dynamic_pad=True, queue_capacity=128, queu
                                     name=name)
         return self.batch
 
-
     def __enqueue(self, tf_session, tf_coord):
         """
         while (self.epoch < self.num_epochs) and (not self.terminated):
@@ -116,11 +114,12 @@ def __enqueue(self, tf_session, tf_coord):
             except tf.errors.CancelledError:
                 pass
 
-
     def init(self, tf_session, tf_coord):
+        # TODO: it is not multi-threaded.
         self.enqueue_threads = threading.Thread(target=self.__enqueue, args=[tf_session, tf_coord])
         self.enqueue_threads.start()
 
+
 class TFStagingArea(object):
 
     def __init__(self, tensors, device_name=None):
@@ -141,4 +140,4 @@ def preload_op(self):
 
     @property
     def tensors(self):
-        return self._tensors
+        return self._tensors
diff --git a/source/tf_loss.py b/source/tf_loss.py
@@ -1,6 +1,7 @@
 import tensorflow as tf
 import numpy as np
 
+
 def logli_normal_bivariate(x, mu, sigma, rho, reduce_sum=False):
     """
     Bivariate Gaussian log-likelihood. Rank of arguments is expected to be 3.
@@ -36,9 +37,10 @@ def logli_normal_bivariate(x, mu, sigma, rho, reduce_sum=False):
         else:
             return tf.reduce_sum(result, reduce_sum)
 
-def logli_normal_isotropic(x, mu, sigma, reduce_sum=False):
+
+def logli_normal_diag_cov(x, mu, sigma, reduce_sum=False):
     """
-    Isotropic Gaussian log-likelihood.
+    Log-likelihood of Gaussian with diagonal covariance matrix.
 
     Args:
         x:
@@ -49,7 +51,7 @@ def logli_normal_isotropic(x, mu, sigma, reduce_sum=False):
     Returns:
 
     """
-    with tf.name_scope('logli_normal_isotropic'):
+    with tf.name_scope('logli_normal_diag_cov'):
         ssigma2 = tf.maximum(1e-6, tf.square(sigma)*2)
         denom_log = tf.log(tf.sqrt(np.pi * ssigma2))
         norm = tf.square(tf.subtract(x, mu))
@@ -75,9 +77,7 @@ def logli_bernoulli(x, theta, reduce_sum=False):
 
     """
     with tf.name_scope('logli_bernoulli'):
-        # return tf.reduce_sum(tf.reduce_mean(tf.multiply(y, tf.log(tf.maximum(1e-9, theta))) + tf.multiply((1-y), tf.log(tf.maximum(1e-9, 1-theta))), 2), 1)
-        result = (
-        tf.multiply(x, tf.log(tf.maximum(1e-9, theta))) + tf.multiply((1 - x), tf.log(tf.maximum(1e-9, 1 - theta))))
+        result = (tf.multiply(x, tf.log(tf.maximum(1e-9, theta))) + tf.multiply((1 - x), tf.log(tf.maximum(1e-9, 1 - theta))))
 
         if reduce_sum is False:
             return result

diff --git a/source/tf_models.py b/source/tf_models.py
@@ -14,6 +14,7 @@
 be implemented by inheriting from the vanilla architecture.
 """
 
+
 class VRNN():
     def __init__(self, config, input_op, input_seq_length_op, target_op, input_dims, target_dims, reuse, batch_size=-1, mode="training"):
 
@@ -139,21 +140,21 @@ def build_loss(self):
             # TODO: Use dataset object to parse the concatenated targets.
             targets_mu = self.target_pieces[0]
 
-            if not self.reconstruction_loss_key in self.ops_loss:
+            if self.reconstruction_loss_key not in self.ops_loss:
                 with tf.name_scope('reconstruction_loss'):
-                    if self.reconstruction_loss == 'nll_normal':
-                        # Gaussian log likelihood loss.
-                        self.ops_loss[self.reconstruction_loss_key] = -self.reconstruction_loss_weight*self.reduce_loss_func(self.seq_loss_mask*tf_loss.logli_normal_isotropic(targets_mu, self.out_mu, self.out_sigma, reduce_sum=False))
+                    # Gaussian log likelihood loss.
+                    if self.reconstruction_loss == 'nll_normal_iso':
+                        self.ops_loss[self.reconstruction_loss_key] = -self.reconstruction_loss_weight*self.reduce_loss_func(self.seq_loss_mask*tf_loss.logli_normal_diag_cov(targets_mu, self.out_mu, self.out_sigma, reduce_sum=False))
+                    # L1 norm.
                     elif self.reconstruction_loss == "l1":
-                        # L1 norm.
                         self.ops_loss[self.reconstruction_loss_key] = self.reconstruction_loss_weight*self.reduce_loss_func(self.seq_loss_mask*tf.losses.absolute_difference(targets_mu, self.out_mu, reduction='none'))
+                    # Mean-squared error.
                     elif self.reconstruction_loss == "mse":
-                        # Mean-squared error.
                         self.ops_loss[self.reconstruction_loss_key] = self.reconstruction_loss_weight*self.reduce_loss_func(self.seq_loss_mask*tf.losses.mean_squared_error(targets_mu, self.out_mu, reduction='none'))
                     else:
                         raise Exception("Undefined loss.")
 
-            if not "loss_kld" in self.ops_loss:
+            if "loss_kld" not in self.ops_loss:
                 with tf.name_scope('kld_loss'):
                     self.ops_loss['loss_kld'] = self.kld_loss_weight*self.reduce_loss_func(self.seq_loss_mask*tf_loss.kld_normal_isotropic(self.q_mu, self.q_sigma, self.p_mu, self.p_sigma, reduce_sum=False))
 
@@ -165,7 +166,6 @@ def accumulate_loss(self):
             self.loss += loss_op
         self.ops_loss['total_loss'] = self.loss
 
-
     def log_loss(self, eval_loss, step=0, epoch=0, time_elapsed=None, prefix=""):
         loss_format = prefix + "{}/{} \t Total: {:.4f} \t"
         loss_entries = [step, epoch, eval_loss['total_loss']]
@@ -181,7 +181,6 @@ def log_loss(self, eval_loss, step=0, epoch=0, time_elapsed=None, prefix=""):
         else:
             print(loss_format.format(*loss_entries))
 
-
     def log_num_parameters(self):
         num_param = 0
         for v in tf.global_variables():
@@ -190,7 +189,6 @@ def log_num_parameters(self):
         self.num_parameters = num_param
         print("# of parameters: " + str(num_param))
 
-
     def create_summary_plots(self):
         """
         Creates scalar summaries for loss plots. Iterates through `ops_loss` member and create a summary entry.
@@ -319,7 +317,6 @@ def sample_unbiased(self, session, seq_len=500, ops_eval=None, **kwargs):
         eval_results = session.run(eval_op_list, feed)
         return eval_results
 
-
     def sample_biased(self, session, seq_len, prev_state, prev_sample=None, ops_eval=None, **kwargs):
         """
         Initializes the model by using state of a real sample.
@@ -441,22 +438,21 @@ def build_predictions_layer(self):
         # Mask for precise loss calculation.
         self.seq_loss_mask = tf.expand_dims(tf.sequence_mask(lengths=self.input_seq_length, maxlen=tf.reduce_max(self.input_seq_length), dtype=tf.float32), -1)
 
-
     def build_loss(self):
         if self.is_training or self.is_validation:
             # TODO: Use dataset object to parse the concatenated targets.
             targets_mu = self.target_pieces[0]
 
-            if not self.reconstruction_loss_key in self.ops_loss:
+            if self.reconstruction_loss_key not in self.ops_loss:
                 with tf.name_scope('reconstruction_loss'):
-                    if self.reconstruction_loss == 'nll_normal':
-                        # Gaussian log likelihood loss.
-                        self.ops_loss[self.reconstruction_loss_key] = -self.reconstruction_loss_weight*self.reduce_loss_func(self.seq_loss_mask*tf_loss.logli_normal_isotropic(targets_mu, self.out_mu, self.out_sigma, reduce_sum=False))
+                    # Gaussian log likelihood loss.
+                    if self.reconstruction_loss == 'nll_normal_iso':
+                        self.ops_loss[self.reconstruction_loss_key] = -self.reconstruction_loss_weight*self.reduce_loss_func(self.seq_loss_mask*tf_loss.logli_normal_diag_cov(targets_mu, self.out_mu, self.out_sigma, reduce_sum=False))
+                    # L1 norm.
                     elif self.reconstruction_loss == "l1":
-                        # L1 norm.
                         self.ops_loss[self.reconstruction_loss_key] = self.reconstruction_loss_weight*self.reduce_loss_func(self.seq_loss_mask*tf.losses.absolute_difference(targets_mu, self.out_mu, reduction='none'))
+                    # Mean-squared error.
                     elif self.reconstruction_loss == "mse":
-                        # Mean-squared error.
                         self.ops_loss[self.reconstruction_loss_key] = self.reconstruction_loss_weight*self.reduce_loss_func(self.seq_loss_mask*tf.losses.mean_squared_error(targets_mu, self.out_mu, reduction='none'))
                     else:
                         raise Exception("Undefined loss.")

diff --git a/tf_dataset_hw.py b/tf_dataset_hw.py
@@ -11,6 +11,7 @@ def __init__(self, data_path, var_len_seq=None):
         # Add tensorflow data types.
         self.sample_tf_type = [tf.int32, tf.float32, tf.float32]
 
+
 class HandWritingDatasetConditionalTF(HandWritingDatasetConditional):
     """
     Tensorflow extension to HandWritingDataset class.