expose the bias options for both MLP and FusedMLP, use the same defaults

facebookresearch · Mar 1, 2022 · 19af415 · 19af415
1 parent a65c243
commit 19af415
Show file tree

Hide file tree

Showing 4 changed files with 12 additions and 6 deletions.
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -4,6 +4,10 @@ All notable changes to this project will be documented in this file.
 The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/),
 and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).
 
+## [0.0.x] - TBD
+### Fixed
+- Expose bias flag for feedforwards, same default as Timm [#220]
+
 ## [0.0.9] - 2022-02-09
 ### Added
 - Compositional Attention [#41]

diff --git a/examples/microViT.py b/examples/microViT.py
@@ -208,7 +208,7 @@ def test_step(self, batch, _):
     # Adjust batch depending on the available memory on your machine.
     # You can also use reversible layers to save memory
     REF_BATCH = 4096
-    BATCH = 512
+    BATCH = 256
 
     MAX_EPOCHS = 20
     NUM_WORKERS = 4

diff --git a/xformers/components/feedforward/fused_mlp.py b/xformers/components/feedforward/fused_mlp.py
@@ -37,6 +37,7 @@ def __init__(
                 dropout: float,
                 activation: Activation,
                 hidden_layer_multiplier: int,
+                bias: bool = True,
                 *args,
                 **kwargs,
             ):
@@ -45,13 +46,13 @@ def __init__(
                 dim_mlp = hidden_layer_multiplier * dim_model
 
                 self.mlp = nn.Sequential(
-                    nn.Linear(in_features=dim_model, out_features=dim_mlp, bias=False),
+                    nn.Linear(in_features=dim_model, out_features=dim_mlp, bias=bias),
                     # pyre-ignore[16]: TODO(T101400990): Pyre did not recognize
                     # the `FusedLinear` import.
                     FusedDropoutBias(
                         p=dropout, bias_shape=dim_mlp, activation=activation
                     ),
-                    nn.Linear(in_features=dim_mlp, out_features=dim_model, bias=False),
+                    nn.Linear(in_features=dim_mlp, out_features=dim_model, bias=bias),
                     # pyre-ignore[16]: TODO(T101400990): Pyre did not recognize
                     # the `FusedLinear` import.
                     FusedDropoutBias(p=dropout, bias_shape=dim_model, activation=None),

diff --git a/xformers/components/feedforward/mlp.py b/xformers/components/feedforward/mlp.py
@@ -28,16 +28,17 @@ def __init__(
         dropout: float,
         activation: Activation,
         hidden_layer_multiplier: int,
+        bias: bool = True,
         *args,
-        **kwargs
+        **kwargs,
     ):
         super().__init__()
 
         self.mlp = nn.Sequential(
-            nn.Linear(dim_model, hidden_layer_multiplier * dim_model),
+            nn.Linear(dim_model, hidden_layer_multiplier * dim_model, bias=bias),
             build_activation(activation),
             nn.Dropout(dropout),
-            nn.Linear(hidden_layer_multiplier * dim_model, dim_model),
+            nn.Linear(hidden_layer_multiplier * dim_model, dim_model, bias=bias),
             nn.Dropout(dropout),
         )