[fix] OSS: Eager gradient release - free memory (#120)

* minor, but gives some memory back * adjust CI and regression checks to 4 gpu
facebookresearch · Oct 1, 2020 · 1c2a6f6 · 1c2a6f6
1 parent 79ded82
commit 1c2a6f6
Show file tree

Hide file tree

Showing 2 changed files with 14 additions and 1 deletion.
diff --git a/.circleci/config.yml b/.circleci/config.yml
@@ -100,7 +100,7 @@ run_oss_benchmark: &run_oss_benchmark
   - run:
       name: Run OSS Benchmark
       command: |
-        python benchmarks/oss.py --check_regression
+        python benchmarks/oss.py --check_regression --world_size 4 --reference_speed 21.2 --reference_memory 4220 --reference_loss 0.63
         python benchmarks/oss.py --gloo --optim_type oss
 
 

diff --git a/fairscale/optim/oss.py b/fairscale/optim/oss.py
@@ -157,6 +157,8 @@ def step(self, closure: Optional[Callable[[], float]] = None, **kwargs: Any) ->
         self._sync_param_groups()
 
         # Run the optimizer step on this shard only:
+        self._free_other_grads()
+
         if closure is not None:
             loss = self.optim.step(closure=closure, **kwargs)  # type: ignore
         else:
@@ -367,3 +369,14 @@ def _broadcast_state_dict(self) -> None:
                 # Discard this tensor/rank, broadcast necessary for syncing
                 logging.debug("Discarding broadcast from rank %s", rank)
                 broadcast_object(empty_buffer, src_rank=rank, group=self.group, dist_device=self._device)
+
+    def _free_other_grads(self) -> None:
+        """Free all the gradients only useful for the other ranks
+        """
+        for i, partition in enumerate(self.partition_parameters()):
+            if i == self.rank:
+                continue
+
+            for p in partition:
+                for t in p["params"]:
+                    t.grad = None