updated changelog

facebookresearch · Feb 20, 2021 · 9d3ce35 · 9d3ce35
1 parent fb7f7e5
commit 9d3ce35
Show file tree

Hide file tree

Showing 5 changed files with 16 additions and 14 deletions.
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -5,6 +5,8 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/),
 and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).
 
 ## NEXT - TBD
+### Fixed
+- Catch corner case when the model is too small with respect to the world size, and shards are empty ([#406] (https://github.com/facebookresearch/fairscale/pull/406))
 
 ## [0.1.7] - 2021-02-19
 ### Fixed

diff --git a/fairscale/optim/oss.py b/fairscale/optim/oss.py
@@ -141,8 +141,10 @@ def partition_parameters(self) -> List[List[dict]]:
                     self._partition_parameters[rank].append(param_group_rank)
 
             assert min(sum(len(pg["params"]) for pg in partition) for partition in self._partition_parameters) > 0, (
-                "One or more empty shards detected, the world size is too big or the model too small."
-                + "Please reduce your world size if this is the model you would like to train"
+                "One or more empty shards detected, the world size is too big or the model too small.\n"
+                + "Please reduce your world size if this is the model you would like to train\n"
+                + f"Current world size: {self.world_size}\n"
+                + "Current number of parameters: {}".format(sum(len(pg["params"]) for pg in self.param_groups))
             )
 
         return self._partition_parameters

diff --git a/fairscale/utils/golden_testing_data.py b/fairscale/utils/golden_testing_data.py
@@ -8,12 +8,12 @@
 
 adascale_test_data = [
     # "input" value is a list of input tensors for micro-batch/rank 0 and micro-batch/rank 1.
-    {"input": [[1.0, 0], [0, 1.0]], "expected_gain": 2.0},
+    {"input": [[1.0, 0], [0, 1.0]], "expected_gain": 4.0 / 3},
     {"input": [[1.0, 1.0], [1.0, 1.0]], "expected_gain": 1.0000001249999846},
     {"input": [[-1.0, 1.0], [1.0, -1.0]], "expected_gain": 2.0},
-    {"input": [[1.0, 4.0], [5.0, 0.5]], "expected_gain": 1.5022222222222221},
-    {"input": [[-0.2, 3.0], [5.0, 0.5]], "expected_gain": 1.9433267229211089},
+    {"input": [[1.0, 4.0], [5.0, 0.5]], "expected_gain": 1.4688796680497926},
+    {"input": [[-0.2, 3.0], [5.0, 0.5]], "expected_gain": 1.8472893901708},
     # "inputs" to trigger multiple iteration tests, which make sure the
     # smoothing factor calculation is also covered.
-    {"inputs": [[[-0.2, 3.3], [5.2, 0.7]], [[1.0, 4.0], [3.1, 0.1]]], "expected_gain": 1.744159431359284},
+    {"inputs": [[[-0.2, 3.3], [5.2, 0.7]], [[1.0, 4.0], [3.1, 0.1]]], "expected_gain": 1.6720968158031417},
 ]
diff --git a/tests/optim/test_oss.py b/tests/optim/test_oss.py
@@ -264,13 +264,9 @@ def test_zero_grad():
 
 def run_test_catch_empty_shardd(rank, world_size, tempfile_name):
     dist_init(rank, world_size, tempfile_name, backend="gloo")
-    x = torch.rand(1)
     m = torch.nn.Linear(1, 1)
-    try:
-        o = optim.OSS(m.parameters(), lr=0.1)
-        assert False, "One shard is empty, this should have been caught"
-    except AssertionError:
-        pass
+    with pytest.raises(AssertionError):
+        _ = optim.OSS(m.parameters(), lr=0.1)
 
     dist.destroy_process_group()
 

diff --git a/tests/optim/test_oss_adascale.py b/tests/optim/test_oss_adascale.py
@@ -37,7 +37,7 @@ def _test_basic_func(rank, world_size, tempfile_name, test_case, oss, model=None
     _dist_init(rank, world_size, tempfile_name, backend="nccl")
 
     if model is None:
-        model = Linear(2, 2, bias=False)
+        model = Linear(2, 2)
     model.to("cuda")
     model = DDP(model, device_ids=[rank])
 
@@ -65,7 +65,9 @@ def _test_basic_func(rank, world_size, tempfile_name, test_case, oss, model=None
         optim.zero_grad()
 
     if "expected_gain" in test_case:
-        assert np.allclose(optim.gain(), test_case["expected_gain"]), optim.gain()
+        assert np.allclose(optim.gain(), test_case["expected_gain"]), "{} vs {}".format(
+            optim.gain(), test_case["expected_gain"]
+        )
 
     if "expected_mean_weight" in test_case:
         mean_weight = mean([model.module[i].weight.data.mean().item() for i in range(4)])