add a potential TF bug note in MultiheadAttentionEncoder

asyml · Apr 13, 2019 · 90b3075 · 90b3075
1 parent 522a72c
commit 90b3075
Show file tree

Hide file tree

Showing 2 changed files with 8 additions and 0 deletions.
diff --git a/examples/transformer/run.sh b/examples/transformer/run.sh
@@ -0,0 +1,3 @@
+#!/bin/bash
+
+python3 transformer_main.py --run_mode=train_and_evaluate --config_model=config_model --config_data=config_iwslt15
diff --git a/texar/modules/encoders/multihead_attention.py b/texar/modules/encoders/multihead_attention.py
@@ -159,6 +159,11 @@ def _update_and_return(layer, key):
                         res = cache[key]
                         if isinstance(res, tf.TensorArray):
                             # inference-like decoding
+                            # TODO(zhiting): This writing op may cause a bug
+                            # on CPU--it looks the two TensorArray
+                            # cache['self_keys'] and cache['self_values']
+                            # will mix up starting from certain step, causing
+                            # shape mismatch. This op looks fine on GPU.
                             res = res.write(
                                 res.size(), tf.squeeze(out, axis=[1]))
                             out = transpose_batch_time(res.stack())