minor fixes to the LRA setup

facebookresearch · Jan 12, 2022 · 79cdac9 · 79cdac9
1 parent b48293e
commit 79cdac9
Show file tree

Hide file tree

Showing 3 changed files with 4 additions and 3 deletions.
diff --git a/requirements-lra.txt b/requirements-lra.txt
@@ -4,5 +4,6 @@
 tensorboard>=2.3.0
 tensorflow>=2.3.1
 tensorflow-datasets>=4.0.1
+tensorflow-text>=2.7.3
 submitit
 fvcore
diff --git a/xformers/benchmarks/LRA/code/config.json b/xformers/benchmarks/LRA/code/config.json
@@ -80,7 +80,7 @@
             "eval_frequency": 50,
             "num_train_steps": 10000,
             "num_eval_steps": 62,
-            "gradient_accumulation": 1
+            "gradient_accumulation": 2
         },
         "model": {
             "pooling_mode": "mean",
@@ -94,7 +94,7 @@
             },
             "xformer": [
                 {
-                    "reversible": true,
+                    "reversible": false,
                     "block_type": "encoder",
                     "num_layers": 2,
                     "layer_norm_style": "pre",

diff --git a/xformers/components/attention/attention_mask.py b/xformers/components/attention/attention_mask.py
@@ -24,7 +24,7 @@ class AttentionMask:
     """
 
     def __init__(self, additive_mask: torch.Tensor, is_causal: bool = False):
-        assert additive_mask.is_floating_point()
+        assert additive_mask.is_floating_point(), additive_mask.dtype
         assert not additive_mask.requires_grad
 
         if additive_mask.ndim == 2: