From 0ccee07a3b7d3c6ddfbf547d110aac72d02eb13f Mon Sep 17 00:00:00 2001 From: Mannat Singh Date: Mon, 22 Mar 2021 11:58:26 -0700 Subject: [PATCH 1/3] Fix broken Sharded SGD tests Differential Revision: D27235293 fbshipit-source-id: f1ade64a1ecc6887b5fafd7d5a5e9b8bd4acc78d --- .circleci/config.yml | 12 ++++++------ test/optim_sharded_sgd_test.py | 20 ++++++++++++-------- 2 files changed, 18 insertions(+), 14 deletions(-) diff --git a/.circleci/config.yml b/.circleci/config.yml index 71fa7eda7a..c554387368 100644 --- a/.circleci/config.yml +++ b/.circleci/config.yml @@ -126,10 +126,10 @@ jobs: # fallback to using the latest cache if no exact match is found - v7-cpu-dependencies- - - <<: *install_dep - - <<: *install_dev_dep + - <<: *install_dep + - <<: *pip_list - save_cache: @@ -177,10 +177,10 @@ jobs: # fallback to using the latest cache if no exact match is found - v4-gpu-dependencies- - - <<: *install_dep - - <<: *install_dev_dep + - <<: *install_dep + - <<: *pip_list - <<: *check_cuda_available @@ -214,10 +214,10 @@ jobs: # fallback to using the latest cache if no exact match is found - v2-gpu-bc-dependencies- - - <<: *install_dep_bc - - <<: *install_dev_dep + - <<: *install_dep_bc + - <<: *pip_list - <<: *check_cuda_available diff --git a/test/optim_sharded_sgd_test.py b/test/optim_sharded_sgd_test.py index f647b6be57..5c04a999e6 100644 --- a/test/optim_sharded_sgd_test.py +++ b/test/optim_sharded_sgd_test.py @@ -25,25 +25,29 @@ from classy_vision.optim.zero import ZeRO -def dist_init(rank, world_size): - os.environ["MASTER_ADDR"] = "localhost" - os.environ["MASTER_PORT"] = "29500" - dist.init_process_group(backend=dist.Backend.GLOO, rank=rank, world_size=world_size) +def dist_init(rank, world_size, filename): + dist.init_process_group( + init_method="file://" + filename, + backend=dist.Backend.GLOO, + rank=rank, + world_size=world_size, + ) class TestOptimizerStateShardingIntegration(unittest.TestCase, TestOptimizer): @staticmethod - def _maybe_destro_dist(): + def _maybe_destroy_dist(): if dist.is_initialized(): logging.debug("Destroy previous torch dist process group") dist.destroy_process_group() def setUp(self): - self._maybe_destro_dist() - dist_init(0, 1) + self._maybe_destroy_dist() + self.filename = tempfile.NamedTemporaryFile(delete=True).name + dist_init(0, 1, self.filename) def tearDown(self): - self._maybe_destro_dist() + self._maybe_destroy_dist() def _get_config(self): return {"name": "zero", "base_optimizer": {"name": "sgd"}, "num_epochs": 3} From cfb6e0a43b5ad585eed30304e563f166a87cea69 Mon Sep 17 00:00:00 2001 From: Mannat Singh Date: Mon, 22 Mar 2021 11:58:26 -0700 Subject: [PATCH 2/3] Move the adamw test to manual Differential Revision: D27237736 fbshipit-source-id: 8e4d896081ba8c85b5bbd1b82956a5729c672a69 --- test/{ => manual}/optim_adamw_test.py | 0 1 file changed, 0 insertions(+), 0 deletions(-) rename test/{ => manual}/optim_adamw_test.py (100%) diff --git a/test/optim_adamw_test.py b/test/manual/optim_adamw_test.py similarity index 100% rename from test/optim_adamw_test.py rename to test/manual/optim_adamw_test.py From 6ab22d32915a67f3d8a0d273bb9b71527df52ced Mon Sep 17 00:00:00 2001 From: Mannat Singh Date: Mon, 22 Mar 2021 11:58:38 -0700 Subject: [PATCH 3/3] Fix SiLU breakage in RegNets for PT < 1.7 (#725) Summary: Pull Request resolved: https://github.com/facebookresearch/ClassyVision/pull/725 SiLU is only available from PT >= 1.7 Now that our tests work fine, we can finally detect and fix issues like these Differential Revision: D27236649 fbshipit-source-id: 086842f027970e99cadf50ac5e8fc02ec7e508e8 --- classy_vision/models/regnet.py | 12 +++++------- 1 file changed, 5 insertions(+), 7 deletions(-) diff --git a/classy_vision/models/regnet.py b/classy_vision/models/regnet.py index a939d5998b..ad4309a060 100644 --- a/classy_vision/models/regnet.py +++ b/classy_vision/models/regnet.py @@ -447,17 +447,15 @@ class RegNet(ClassyModel): def __init__(self, params: RegNetParams): super().__init__() - if params.activation_type == ActivationType.SILU and get_torch_version() < [ - 1, - 7, - ]: - raise RuntimeError("SiLU activation is only supported since PyTorch 1.7") - + silu = None if get_torch_version() < [1, 7] else nn.SiLU() activation = { ActivationType.RELU: nn.ReLU(params.relu_in_place), - ActivationType.SILU: nn.SiLU(), + ActivationType.SILU: silu, }[params.activation_type] + if activation is None: + raise RuntimeError("SiLU activation is only supported since PyTorch 1.7") + # Ad hoc stem self.stem = { StemType.RES_STEM_CIFAR: ResStemCifar,