From cf653460334705301bc8de769a1de86fdf089f34 Mon Sep 17 00:00:00 2001 From: Priya Goyal Date: Tue, 9 Feb 2021 12:36:10 -0800 Subject: [PATCH] Organize hooks better in a dedicated HOOKS key in the config (#174) Summary: Pull Request resolved: https://github.com/facebookresearch/vissl/pull/174 vissl has many hooks that can be configured. but they were scattered all over the defaults.yaml unclear what parameter is a hook. reorganizing them under `HOOKS` for better readibility Reviewed By: min-xu-ai Differential Revision: D26284129 fbshipit-source-id: c71a69bb838cdb76d3e0594efddf584a5b3f8b68 --- GETTING_STARTED.md | 6 +- ...al_resnet_8gpu_transfer_in1k_fulltune.yaml | 4 +- ...eval_resnet_8gpu_transfer_in1k_linear.yaml | 4 +- ...inal_output_8gpu_transfer_in1k_linear.yaml | 4 +- ...resnet_moco_8gpu_transfer_in1k_linear.yaml | 4 +- ...pool_output_8gpu_transfer_in1k_linear.yaml | 4 +- ...et_8gpu_transfer_inaturalist18_linear.yaml | 4 +- ...esnet_8gpu_transfer_openimages_linear.yaml | 4 +- ...snet_8gpu_transfer_places205_fulltune.yaml | 4 +- ..._8gpu_transfer_in1k_semi_sup_fulltune.yaml | 4 +- ..._transfer_places205_semi_sup_fulltune.yaml | 4 +- ...l_resnet_8gpu_transfer_imagenette_160.yaml | 4 +- .../simclr_1node_resnet_imagenette_160.yaml | 6 +- .../clusterfit_resnet_8gpu_imagenet.yaml | 4 +- .../deepclusterv2_2crops_resnet.yaml | 6 +- .../pretrain/jigsaw/jigsaw_8gpu_resnet.yaml | 4 +- .../pretrain/moco/moco_1node_resnet.yaml | 16 +-- .../pretrain/npid/npid++_4nodes_resnet.yaml | 4 +- .../pretrain/npid/npid_8gpu_resnet.yaml | 4 +- .../pretrain/rotnet/rotnet_8gpu_resnet.yaml | 4 +- .../simclr/models/efficientnet_B0.yaml | 7 +- .../simclr/models/efficientnet_B1.yaml | 3 +- .../simclr/models/efficientnet_B2.yaml | 3 +- .../pretrain/simclr/simclr_8node_resnet.yaml | 6 +- .../pretrain/swav/models/efficientnet_B0.yaml | 3 +- .../pretrain/swav/models/efficientnet_B1.yaml | 3 +- .../pretrain/swav/models/efficientnet_B2.yaml | 3 +- .../pretrain/swav/swav_8node_resnet.yaml | 6 +- .../test_cpu_efficientnet_simclr.yaml | 10 +- .../test/cpu_test/test_cpu_regnet_moco.yaml | 8 +- .../test/cpu_test/test_cpu_regnet_simclr.yaml | 4 +- .../test/cpu_test/test_cpu_resnet_simclr.yaml | 8 +- .../quick_deepcluster_v2.yaml | 8 +- .../quick_eval_in1k_linear.yaml | 4 +- .../quick_eval_in1k_linear_imagefolder.yaml | 4 +- ...ick_eval_in1k_linear_imagefolder_head.yaml | 4 +- .../test/integration_test/quick_moco.yaml | 8 +- .../test/integration_test/quick_pirl.yaml | 12 +- .../test/integration_test/quick_simclr.yaml | 8 +- .../integration_test/quick_simclr_2node.yaml | 8 +- .../quick_simclr_checkpointing.yaml | 8 +- .../quick_simclr_efficientnet.yaml | 10 +- .../quick_simclr_imagefolder.yaml | 8 +- .../quick_simclr_multicrop.yaml | 8 +- .../quick_simclr_pytorch_amp.yaml | 8 +- .../integration_test/quick_simclr_regnet.yaml | 10 +- .../quick_simclr_sync_loader.yaml | 8 +- .../quick_simclr_synthetic.yaml | 8 +- .../test/integration_test/quick_swav.yaml | 8 +- .../integration_test/quick_swav_momentum.yaml | 8 +- dev/run_quick_tests.sh | 2 +- docker/Dockerfile | 2 +- docker/conda/Dockerfile | 2 +- docs/source/getting_started.rst | 7 +- docs/source/vissl_modules/hooks.rst | 2 +- docs/source/visualization.rst | 8 +- vissl/config/defaults.yaml | 103 +++++++++++------- vissl/hooks/__init__.py | 20 ++-- vissl/hooks/state_update_hooks.py | 4 +- vissl/utils/tensorboard.py | 8 +- 60 files changed, 288 insertions(+), 172 deletions(-) diff --git a/GETTING_STARTED.md b/GETTING_STARTED.md index 19c5f716f..7b5a99d35 100644 --- a/GETTING_STARTED.md +++ b/GETTING_STARTED.md @@ -41,7 +41,7 @@ imagenet_full_size ### If VISSL is built from source We provide a config to train model using the pretext SimCLR task on the ResNet50 model. -Change the `DATA.TRAIN.DATA_PATHS` path to the ImageNet train dataset folder path. +Change the `DATA.TRAIN.DATA_PATHS` path to the ImageNet train dataset folder path. ```bash cd $HOME/vissl @@ -52,7 +52,7 @@ python3 tools/run_distributed_engines.py \ config.DATA.TRAIN.DATA_PATHS=["/path/to/my/imagenet/folder/train"] \ config=test/integration_test/quick_simclr_imagefolder \ config.CHECKPOINT.DIR="./checkpoints" \ - config.TENSORBOARD_SETUP.USE_TENSORBOARD=true + config.HOOKS.TENSORBOARD_SETUP.USE_TENSORBOARD=true ``` ### If using pre-built conda/pip VISSL packages @@ -100,7 +100,7 @@ python3 run_distributed_engines.py \ config.DATA.TRAIN.DATA_PATHS=["/path/to/my/imagenet/folder/train"] \ config=quick_1gpu_resnet50_simclr \ config.CHECKPOINT.DIR="./checkpoints" \ - config.TENSORBOARD_SETUP.USE_TENSORBOARD=true + config.HOOKS.TENSORBOARD_SETUP.USE_TENSORBOARD=true ``` Explore **all the parameters and settings VISSL supports** in [VISSL defaults.yaml file](https://github.com/facebookresearch/vissl/blob/master/vissl/config/defaults.yaml) diff --git a/configs/config/benchmark/imagenet1k_fulltune/eval_resnet_8gpu_transfer_in1k_fulltune.yaml b/configs/config/benchmark/imagenet1k_fulltune/eval_resnet_8gpu_transfer_in1k_fulltune.yaml index f61495712..4772feda7 100644 --- a/configs/config/benchmark/imagenet1k_fulltune/eval_resnet_8gpu_transfer_in1k_fulltune.yaml +++ b/configs/config/benchmark/imagenet1k_fulltune/eval_resnet_8gpu_transfer_in1k_fulltune.yaml @@ -7,7 +7,9 @@ config: TEST_MODEL: True SEED_VALUE: 1 MULTI_PROCESSING_METHOD: forkserver - MONITOR_PERF_STATS: True + HOOKS: + PERF_STATS: + MONITOR_PERF_STATS: True DATA: NUM_DATALOADER_WORKERS: 5 TRAIN: diff --git a/configs/config/benchmark/linear_image_classification/imagenet1k/eval_resnet_8gpu_transfer_in1k_linear.yaml b/configs/config/benchmark/linear_image_classification/imagenet1k/eval_resnet_8gpu_transfer_in1k_linear.yaml index 50f92aafc..014c58fe6 100644 --- a/configs/config/benchmark/linear_image_classification/imagenet1k/eval_resnet_8gpu_transfer_in1k_linear.yaml +++ b/configs/config/benchmark/linear_image_classification/imagenet1k/eval_resnet_8gpu_transfer_in1k_linear.yaml @@ -7,7 +7,9 @@ config: TEST_MODEL: True SEED_VALUE: 1 MULTI_PROCESSING_METHOD: forkserver - MONITOR_PERF_STATS: True + HOOKS: + PERF_STATS: + MONITOR_PERF_STATS: True DATA: NUM_DATALOADER_WORKERS: 5 TRAIN: diff --git a/configs/config/benchmark/linear_image_classification/imagenet1k/other_styles/eval_resnet_final_output_8gpu_transfer_in1k_linear.yaml b/configs/config/benchmark/linear_image_classification/imagenet1k/other_styles/eval_resnet_final_output_8gpu_transfer_in1k_linear.yaml index 1e37cbd8e..af06f7f90 100644 --- a/configs/config/benchmark/linear_image_classification/imagenet1k/other_styles/eval_resnet_final_output_8gpu_transfer_in1k_linear.yaml +++ b/configs/config/benchmark/linear_image_classification/imagenet1k/other_styles/eval_resnet_final_output_8gpu_transfer_in1k_linear.yaml @@ -7,7 +7,9 @@ config: TEST_MODEL: True SEED_VALUE: 1 MULTI_PROCESSING_METHOD: forkserver - MONITOR_PERF_STATS: True + HOOKS: + PERF_STATS: + MONITOR_PERF_STATS: True DATA: NUM_DATALOADER_WORKERS: 5 TRAIN: diff --git a/configs/config/benchmark/linear_image_classification/imagenet1k/other_styles/eval_resnet_moco_8gpu_transfer_in1k_linear.yaml b/configs/config/benchmark/linear_image_classification/imagenet1k/other_styles/eval_resnet_moco_8gpu_transfer_in1k_linear.yaml index 71bcab72f..7d7923470 100644 --- a/configs/config/benchmark/linear_image_classification/imagenet1k/other_styles/eval_resnet_moco_8gpu_transfer_in1k_linear.yaml +++ b/configs/config/benchmark/linear_image_classification/imagenet1k/other_styles/eval_resnet_moco_8gpu_transfer_in1k_linear.yaml @@ -7,7 +7,9 @@ config: TEST_MODEL: True SEED_VALUE: 1 MULTI_PROCESSING_METHOD: forkserver - MONITOR_PERF_STATS: True + HOOKS: + PERF_STATS: + MONITOR_PERF_STATS: True DATA: NUM_DATALOADER_WORKERS: 28 TRAIN: diff --git a/configs/config/benchmark/linear_image_classification/imagenet1k/other_styles/eval_resnet_res5_avgpool_output_8gpu_transfer_in1k_linear.yaml b/configs/config/benchmark/linear_image_classification/imagenet1k/other_styles/eval_resnet_res5_avgpool_output_8gpu_transfer_in1k_linear.yaml index 8940fb098..9295a6329 100644 --- a/configs/config/benchmark/linear_image_classification/imagenet1k/other_styles/eval_resnet_res5_avgpool_output_8gpu_transfer_in1k_linear.yaml +++ b/configs/config/benchmark/linear_image_classification/imagenet1k/other_styles/eval_resnet_res5_avgpool_output_8gpu_transfer_in1k_linear.yaml @@ -7,7 +7,9 @@ config: TEST_MODEL: True SEED_VALUE: 1 MULTI_PROCESSING_METHOD: forkserver - MONITOR_PERF_STATS: True + HOOKS: + PERF_STATS: + MONITOR_PERF_STATS: True DATA: NUM_DATALOADER_WORKERS: 5 TRAIN: diff --git a/configs/config/benchmark/linear_image_classification/inaturalist18/eval_resnet_8gpu_transfer_inaturalist18_linear.yaml b/configs/config/benchmark/linear_image_classification/inaturalist18/eval_resnet_8gpu_transfer_inaturalist18_linear.yaml index c40063ed1..1b8ade49d 100644 --- a/configs/config/benchmark/linear_image_classification/inaturalist18/eval_resnet_8gpu_transfer_inaturalist18_linear.yaml +++ b/configs/config/benchmark/linear_image_classification/inaturalist18/eval_resnet_8gpu_transfer_inaturalist18_linear.yaml @@ -7,7 +7,9 @@ config: TEST_MODEL: True SEED_VALUE: 0 MULTI_PROCESSING_METHOD: forkserver - MONITOR_PERF_STATS: True + HOOKS: + PERF_STATS: + MONITOR_PERF_STATS: True DATA: NUM_DATALOADER_WORKERS: 8 TRAIN: diff --git a/configs/config/benchmark/linear_image_classification/openimages/eval_resnet_8gpu_transfer_openimages_linear.yaml b/configs/config/benchmark/linear_image_classification/openimages/eval_resnet_8gpu_transfer_openimages_linear.yaml index a3a9838a7..57feec497 100644 --- a/configs/config/benchmark/linear_image_classification/openimages/eval_resnet_8gpu_transfer_openimages_linear.yaml +++ b/configs/config/benchmark/linear_image_classification/openimages/eval_resnet_8gpu_transfer_openimages_linear.yaml @@ -7,7 +7,9 @@ config: TEST_MODEL: True SEED_VALUE: 1 MULTI_PROCESSING_METHOD: forkserver - MONITOR_PERF_STATS: True + HOOKS: + PERF_STATS: + MONITOR_PERF_STATS: True DATA: NUM_DATALOADER_WORKERS: 2 TRAIN: diff --git a/configs/config/benchmark/places205_fulltune/eval_resnet_8gpu_transfer_places205_fulltune.yaml b/configs/config/benchmark/places205_fulltune/eval_resnet_8gpu_transfer_places205_fulltune.yaml index 45a256d21..0fe762956 100644 --- a/configs/config/benchmark/places205_fulltune/eval_resnet_8gpu_transfer_places205_fulltune.yaml +++ b/configs/config/benchmark/places205_fulltune/eval_resnet_8gpu_transfer_places205_fulltune.yaml @@ -7,7 +7,9 @@ config: TEST_MODEL: True SEED_VALUE: 1 MULTI_PROCESSING_METHOD: forkserver - MONITOR_PERF_STATS: True + HOOKS: + PERF_STATS: + MONITOR_PERF_STATS: True DATA: NUM_DATALOADER_WORKERS: 5 TRAIN: diff --git a/configs/config/benchmark/semi_supervised/imagenet1k/eval_resnet_8gpu_transfer_in1k_semi_sup_fulltune.yaml b/configs/config/benchmark/semi_supervised/imagenet1k/eval_resnet_8gpu_transfer_in1k_semi_sup_fulltune.yaml index 3ef854658..cd7424c4e 100644 --- a/configs/config/benchmark/semi_supervised/imagenet1k/eval_resnet_8gpu_transfer_in1k_semi_sup_fulltune.yaml +++ b/configs/config/benchmark/semi_supervised/imagenet1k/eval_resnet_8gpu_transfer_in1k_semi_sup_fulltune.yaml @@ -7,7 +7,9 @@ config: TEST_MODEL: True SEED_VALUE: 1 MULTI_PROCESSING_METHOD: forkserver - MONITOR_PERF_STATS: False + HOOKS: + PERF_STATS: + MONITOR_PERF_STATS: False DATA: NUM_DATALOADER_WORKERS: 5 TRAIN: diff --git a/configs/config/benchmark/semi_supervised/places205/eval_resnet_8gpu_transfer_places205_semi_sup_fulltune.yaml b/configs/config/benchmark/semi_supervised/places205/eval_resnet_8gpu_transfer_places205_semi_sup_fulltune.yaml index 5102ed579..bc737fde6 100644 --- a/configs/config/benchmark/semi_supervised/places205/eval_resnet_8gpu_transfer_places205_semi_sup_fulltune.yaml +++ b/configs/config/benchmark/semi_supervised/places205/eval_resnet_8gpu_transfer_places205_semi_sup_fulltune.yaml @@ -7,7 +7,9 @@ config: TEST_MODEL: True SEED_VALUE: 1 MULTI_PROCESSING_METHOD: forkserver - MONITOR_PERF_STATS: False + HOOKS: + PERF_STATS: + MONITOR_PERF_STATS: False DATA: NUM_DATALOADER_WORKERS: 5 TRAIN: diff --git a/configs/config/debugging/benchmark/linear_image_classification/eval_resnet_8gpu_transfer_imagenette_160.yaml b/configs/config/debugging/benchmark/linear_image_classification/eval_resnet_8gpu_transfer_imagenette_160.yaml index d7e804478..8865ce575 100644 --- a/configs/config/debugging/benchmark/linear_image_classification/eval_resnet_8gpu_transfer_imagenette_160.yaml +++ b/configs/config/debugging/benchmark/linear_image_classification/eval_resnet_8gpu_transfer_imagenette_160.yaml @@ -7,7 +7,9 @@ config: TEST_MODEL: True SEED_VALUE: 1 MULTI_PROCESSING_METHOD: forkserver - MONITOR_PERF_STATS: True + HOOKS: + PERF_STATS: + MONITOR_PERF_STATS: True DATA: NUM_DATALOADER_WORKERS: 5 TRAIN: diff --git a/configs/config/debugging/pretrain/simclr/simclr_1node_resnet_imagenette_160.yaml b/configs/config/debugging/pretrain/simclr/simclr_1node_resnet_imagenette_160.yaml index b5b439946..b36208e9e 100644 --- a/configs/config/debugging/pretrain/simclr/simclr_1node_resnet_imagenette_160.yaml +++ b/configs/config/debugging/pretrain/simclr/simclr_1node_resnet_imagenette_160.yaml @@ -6,8 +6,10 @@ config: TEST_MODEL: False SEED_VALUE: 0 MULTI_PROCESSING_METHOD: forkserver - MONITOR_PERF_STATS: True - ROLLING_BTIME_FREQ: 313 + HOOKS: + PERF_STATS: + MONITOR_PERF_STATS: True + ROLLING_BTIME_FREQ: 313 DATA: NUM_DATALOADER_WORKERS: 5 TRAIN: diff --git a/configs/config/pretrain/clusterfit/clusterfit_resnet_8gpu_imagenet.yaml b/configs/config/pretrain/clusterfit/clusterfit_resnet_8gpu_imagenet.yaml index 774621606..cece16ada 100644 --- a/configs/config/pretrain/clusterfit/clusterfit_resnet_8gpu_imagenet.yaml +++ b/configs/config/pretrain/clusterfit/clusterfit_resnet_8gpu_imagenet.yaml @@ -6,7 +6,9 @@ config: TEST_MODEL: False SEED_VALUE: 1 MULTI_PROCESSING_METHOD: forkserver - MONITOR_PERF_STATS: False + HOOKS: + PERF_STATS: + MONITOR_PERF_STATS: False DATA: NUM_DATALOADER_WORKERS: 5 TRAIN: diff --git a/configs/config/pretrain/deepcluster_v2/deepclusterv2_2crops_resnet.yaml b/configs/config/pretrain/deepcluster_v2/deepclusterv2_2crops_resnet.yaml index 4066abd1d..377c3aed2 100644 --- a/configs/config/pretrain/deepcluster_v2/deepclusterv2_2crops_resnet.yaml +++ b/configs/config/pretrain/deepcluster_v2/deepclusterv2_2crops_resnet.yaml @@ -6,8 +6,10 @@ config: TEST_MODEL: False SEED_VALUE: 0 MULTI_PROCESSING_METHOD: forkserver - MONITOR_PERF_STATS: True - ROLLING_BTIME_FREQ: 313 + HOOKS: + PERF_STATS: + MONITOR_PERF_STATS: True + ROLLING_BTIME_FREQ: 313 DATA: NUM_DATALOADER_WORKERS: 8 TRAIN: diff --git a/configs/config/pretrain/jigsaw/jigsaw_8gpu_resnet.yaml b/configs/config/pretrain/jigsaw/jigsaw_8gpu_resnet.yaml index 1bcebe874..3c9fb2491 100644 --- a/configs/config/pretrain/jigsaw/jigsaw_8gpu_resnet.yaml +++ b/configs/config/pretrain/jigsaw/jigsaw_8gpu_resnet.yaml @@ -6,7 +6,9 @@ config: TEST_MODEL: False SEED_VALUE: 0 MULTI_PROCESSING_METHOD: forkserver - MONITOR_PERF_STATS: False + HOOKS: + PERF_STATS: + MONITOR_PERF_STATS: False DATA: NUM_DATALOADER_WORKERS: 5 TRAIN: diff --git a/configs/config/pretrain/moco/moco_1node_resnet.yaml b/configs/config/pretrain/moco/moco_1node_resnet.yaml index ce6346041..ac60cba2d 100644 --- a/configs/config/pretrain/moco/moco_1node_resnet.yaml +++ b/configs/config/pretrain/moco/moco_1node_resnet.yaml @@ -6,13 +6,15 @@ config: TEST_MODEL: False SEED_VALUE: 0 MULTI_PROCESSING_METHOD: forkserver - MONITOR_PERF_STATS: True - ROLLING_BTIME_FREQ: 313 - TENSORBOARD_SETUP: - USE_TENSORBOARD: True - EXPERIMENT_LOG_DIR: "moco_v2_reference" - LOG_PARAMS: False - FLUSH_EVERY_N_MIN: 20 + HOOKS: + PERF_STATS: + MONITOR_PERF_STATS: True + ROLLING_BTIME_FREQ: 313 + TENSORBOARD_SETUP: + USE_TENSORBOARD: True + EXPERIMENT_LOG_DIR: "moco_v2_reference" + LOG_PARAMS: False + FLUSH_EVERY_N_MIN: 20 DATA: NUM_DATALOADER_WORKERS: 5 TRAIN: diff --git a/configs/config/pretrain/npid/npid++_4nodes_resnet.yaml b/configs/config/pretrain/npid/npid++_4nodes_resnet.yaml index 4403e3de0..3ea579340 100644 --- a/configs/config/pretrain/npid/npid++_4nodes_resnet.yaml +++ b/configs/config/pretrain/npid/npid++_4nodes_resnet.yaml @@ -6,7 +6,9 @@ config: TEST_MODEL: False SEED_VALUE: 0 MULTI_PROCESSING_METHOD: forkserver - MONITOR_PERF_STATS: False + HOOKS: + PERF_STATS: + MONITOR_PERF_STATS: False DATA: NUM_DATALOADER_WORKERS: 8 TRAIN: diff --git a/configs/config/pretrain/npid/npid_8gpu_resnet.yaml b/configs/config/pretrain/npid/npid_8gpu_resnet.yaml index 0e77cee33..0bcf57cba 100644 --- a/configs/config/pretrain/npid/npid_8gpu_resnet.yaml +++ b/configs/config/pretrain/npid/npid_8gpu_resnet.yaml @@ -6,7 +6,9 @@ config: TEST_MODEL: False SEED_VALUE: 0 MULTI_PROCESSING_METHOD: forkserver - MONITOR_PERF_STATS: False + HOOKS: + PERF_STATS: + MONITOR_PERF_STATS: False DATA: NUM_DATALOADER_WORKERS: 5 TRAIN: diff --git a/configs/config/pretrain/rotnet/rotnet_8gpu_resnet.yaml b/configs/config/pretrain/rotnet/rotnet_8gpu_resnet.yaml index f184e4c0e..b00ef707a 100644 --- a/configs/config/pretrain/rotnet/rotnet_8gpu_resnet.yaml +++ b/configs/config/pretrain/rotnet/rotnet_8gpu_resnet.yaml @@ -7,7 +7,9 @@ config: TEST_MODEL: True SEED_VALUE: 1 MULTI_PROCESSING_METHOD: forkserver - MONITOR_PERF_STATS: False + HOOKS: + PERF_STATS: + MONITOR_PERF_STATS: False DATA: NUM_DATALOADER_WORKERS: 5 TRAIN: diff --git a/configs/config/pretrain/simclr/models/efficientnet_B0.yaml b/configs/config/pretrain/simclr/models/efficientnet_B0.yaml index 769a56ade..a168d9191 100644 --- a/configs/config/pretrain/simclr/models/efficientnet_B0.yaml +++ b/configs/config/pretrain/simclr/models/efficientnet_B0.yaml @@ -1,5 +1,9 @@ # @package _global_ config: + HOOKS: + MODEL_COMPLEXITY: + COMPUTE_COMPLEXITY: True + INPUT_SHAPE: [3, 224, 224] DATA: TRAIN: TRANSFORMS: @@ -20,9 +24,6 @@ config: mean: [0.485, 0.456, 0.406] std: [0.229, 0.224, 0.225] MODEL: - MODEL_COMPLEXITY: - COMPUTE_COMPLEXITY: True - INPUT_SHAPE: [3, 224, 224] TRUNK: NAME: efficientnet TRUNK_PARAMS: diff --git a/configs/config/pretrain/simclr/models/efficientnet_B1.yaml b/configs/config/pretrain/simclr/models/efficientnet_B1.yaml index be1ef2aaf..00ca0e1fe 100644 --- a/configs/config/pretrain/simclr/models/efficientnet_B1.yaml +++ b/configs/config/pretrain/simclr/models/efficientnet_B1.yaml @@ -19,10 +19,11 @@ config: - name: Normalize mean: [0.485, 0.456, 0.406] std: [0.229, 0.224, 0.225] - MODEL: + HOOKS: MODEL_COMPLEXITY: COMPUTE_COMPLEXITY: True INPUT_SHAPE: [3, 240, 240] + MODEL: TRUNK: NAME: efficientnet TRUNK_PARAMS: diff --git a/configs/config/pretrain/simclr/models/efficientnet_B2.yaml b/configs/config/pretrain/simclr/models/efficientnet_B2.yaml index 327156b3e..95aa2a6cb 100644 --- a/configs/config/pretrain/simclr/models/efficientnet_B2.yaml +++ b/configs/config/pretrain/simclr/models/efficientnet_B2.yaml @@ -19,10 +19,11 @@ config: - name: Normalize mean: [0.485, 0.456, 0.406] std: [0.229, 0.224, 0.225] - MODEL: + HOOKS: MODEL_COMPLEXITY: COMPUTE_COMPLEXITY: True INPUT_SHAPE: [3, 260, 260] + MODEL: TRUNK: NAME: efficientnet TRUNK_PARAMS: diff --git a/configs/config/pretrain/simclr/simclr_8node_resnet.yaml b/configs/config/pretrain/simclr/simclr_8node_resnet.yaml index 080fe6465..2d38dde27 100644 --- a/configs/config/pretrain/simclr/simclr_8node_resnet.yaml +++ b/configs/config/pretrain/simclr/simclr_8node_resnet.yaml @@ -6,8 +6,10 @@ config: TEST_MODEL: False SEED_VALUE: 0 MULTI_PROCESSING_METHOD: forkserver - MONITOR_PERF_STATS: True - ROLLING_BTIME_FREQ: 313 + HOOKS: + PERF_STATS: + MONITOR_PERF_STATS: True + ROLLING_BTIME_FREQ: 313 DATA: NUM_DATALOADER_WORKERS: 5 TRAIN: diff --git a/configs/config/pretrain/swav/models/efficientnet_B0.yaml b/configs/config/pretrain/swav/models/efficientnet_B0.yaml index d8fd9374d..bad7d59c8 100644 --- a/configs/config/pretrain/swav/models/efficientnet_B0.yaml +++ b/configs/config/pretrain/swav/models/efficientnet_B0.yaml @@ -20,10 +20,11 @@ config: - name: Normalize mean: [0.485, 0.456, 0.406] std: [0.229, 0.224, 0.225] - MODEL: + HOOKS: MODEL_COMPLEXITY: COMPUTE_COMPLEXITY: True INPUT_SHAPE: [3, 224, 224] + MODEL: TRUNK: NAME: efficientnet TRUNK_PARAMS: diff --git a/configs/config/pretrain/swav/models/efficientnet_B1.yaml b/configs/config/pretrain/swav/models/efficientnet_B1.yaml index e59b080dd..2bb48104d 100644 --- a/configs/config/pretrain/swav/models/efficientnet_B1.yaml +++ b/configs/config/pretrain/swav/models/efficientnet_B1.yaml @@ -20,10 +20,11 @@ config: - name: Normalize mean: [0.485, 0.456, 0.406] std: [0.229, 0.224, 0.225] - MODEL: + HOOKS: MODEL_COMPLEXITY: COMPUTE_COMPLEXITY: True INPUT_SHAPE: [3, 240, 240] + MODEL: TRUNK: NAME: efficientnet TRUNK_PARAMS: diff --git a/configs/config/pretrain/swav/models/efficientnet_B2.yaml b/configs/config/pretrain/swav/models/efficientnet_B2.yaml index 314e03d5c..1589882b4 100644 --- a/configs/config/pretrain/swav/models/efficientnet_B2.yaml +++ b/configs/config/pretrain/swav/models/efficientnet_B2.yaml @@ -20,10 +20,11 @@ config: - name: Normalize mean: [0.485, 0.456, 0.406] std: [0.229, 0.224, 0.225] - MODEL: + HOOKS: MODEL_COMPLEXITY: COMPUTE_COMPLEXITY: True INPUT_SHAPE: [3, 260, 260] + MODEL: TRUNK: NAME: efficientnet TRUNK_PARAMS: diff --git a/configs/config/pretrain/swav/swav_8node_resnet.yaml b/configs/config/pretrain/swav/swav_8node_resnet.yaml index a6a67bfe1..4356f702b 100644 --- a/configs/config/pretrain/swav/swav_8node_resnet.yaml +++ b/configs/config/pretrain/swav/swav_8node_resnet.yaml @@ -6,8 +6,10 @@ config: TEST_MODEL: False SEED_VALUE: 0 MULTI_PROCESSING_METHOD: forkserver - MONITOR_PERF_STATS: True - ROLLING_BTIME_FREQ: 313 + HOOKS: + PERF_STATS: + MONITOR_PERF_STATS: True + ROLLING_BTIME_FREQ: 313 DATA: NUM_DATALOADER_WORKERS: 8 TRAIN: diff --git a/configs/config/test/cpu_test/test_cpu_efficientnet_simclr.yaml b/configs/config/test/cpu_test/test_cpu_efficientnet_simclr.yaml index 51eff0064..bcde2f3e4 100644 --- a/configs/config/test/cpu_test/test_cpu_efficientnet_simclr.yaml +++ b/configs/config/test/cpu_test/test_cpu_efficientnet_simclr.yaml @@ -6,7 +6,12 @@ config: TEST_MODEL: False SEED_VALUE: 0 MULTI_PROCESSING_METHOD: forkserver - MONITOR_PERF_STATS: True + HOOKS: + PERF_STATS: + MONITOR_PERF_STATS: True + MODEL_COMPLEXITY: + COMPUTE_COMPLEXITY: False + INPUT_SHAPE: [3, 224, 224] DATA: NUM_DATALOADER_WORKERS: 0 TRAIN: @@ -40,9 +45,6 @@ config: METERS: name: "" MODEL: - MODEL_COMPLEXITY: - COMPUTE_COMPLEXITY: False - INPUT_SHAPE: [3, 224, 224] TRUNK: NAME: efficientnet TRUNK_PARAMS: diff --git a/configs/config/test/cpu_test/test_cpu_regnet_moco.yaml b/configs/config/test/cpu_test/test_cpu_regnet_moco.yaml index 9ae4973b0..1c305b6fb 100644 --- a/configs/config/test/cpu_test/test_cpu_regnet_moco.yaml +++ b/configs/config/test/cpu_test/test_cpu_regnet_moco.yaml @@ -6,7 +6,11 @@ config: TEST_MODEL: False SEED_VALUE: 0 MULTI_PROCESSING_METHOD: forkserver - MONITOR_PERF_STATS: True + HOOKS: + PERF_STATS: + MONITOR_PERF_STATS: True + MODEL_COMPLEXITY: + COMPUTE_COMPLEXITY: False DATA: NUM_DATALOADER_WORKERS: 0 TRAIN: @@ -40,8 +44,6 @@ config: METERS: name: "" MODEL: - MODEL_COMPLEXITY: - COMPUTE_COMPLEXITY: False TRUNK: NAME: regnet TRUNK_PARAMS: diff --git a/configs/config/test/cpu_test/test_cpu_regnet_simclr.yaml b/configs/config/test/cpu_test/test_cpu_regnet_simclr.yaml index c8ca57429..905ececf0 100644 --- a/configs/config/test/cpu_test/test_cpu_regnet_simclr.yaml +++ b/configs/config/test/cpu_test/test_cpu_regnet_simclr.yaml @@ -6,7 +6,9 @@ config: TEST_MODEL: False SEED_VALUE: 0 MULTI_PROCESSING_METHOD: forkserver - MONITOR_PERF_STATS: True + HOOKS: + PERF_STATS: + MONITOR_PERF_STATS: True DATA: NUM_DATALOADER_WORKERS: 0 TRAIN: diff --git a/configs/config/test/cpu_test/test_cpu_resnet_simclr.yaml b/configs/config/test/cpu_test/test_cpu_resnet_simclr.yaml index c1361eb29..483640e61 100644 --- a/configs/config/test/cpu_test/test_cpu_resnet_simclr.yaml +++ b/configs/config/test/cpu_test/test_cpu_resnet_simclr.yaml @@ -6,7 +6,11 @@ config: TEST_MODEL: False SEED_VALUE: 0 MULTI_PROCESSING_METHOD: forkserver - MONITOR_PERF_STATS: True + HOOKS: + PERF_STATS: + MONITOR_PERF_STATS: True + MODEL_COMPLEXITY: + COMPUTE_COMPLEXITY: False DATA: NUM_DATALOADER_WORKERS: 0 TRAIN: @@ -40,8 +44,6 @@ config: METERS: name: "" MODEL: - MODEL_COMPLEXITY: - COMPUTE_COMPLEXITY: False TRUNK: NAME: resnet TRUNK_PARAMS: diff --git a/configs/config/test/integration_test/quick_deepcluster_v2.yaml b/configs/config/test/integration_test/quick_deepcluster_v2.yaml index b6118f83d..08c79c02f 100644 --- a/configs/config/test/integration_test/quick_deepcluster_v2.yaml +++ b/configs/config/test/integration_test/quick_deepcluster_v2.yaml @@ -6,9 +6,11 @@ config: TEST_MODEL: False SEED_VALUE: 0 MULTI_PROCESSING_METHOD: forkserver - MONITOR_PERF_STATS: True - PERF_STAT_FREQUENCY: 40 - ROLLING_BTIME_FREQ: 5 + HOOKS: + PERF_STATS: + MONITOR_PERF_STATS: True + PERF_STAT_FREQUENCY: 40 + ROLLING_BTIME_FREQ: 5 DATA: NUM_DATALOADER_WORKERS: 5 TRAIN: diff --git a/configs/config/test/integration_test/quick_eval_in1k_linear.yaml b/configs/config/test/integration_test/quick_eval_in1k_linear.yaml index 882476dca..c1edb0bb7 100644 --- a/configs/config/test/integration_test/quick_eval_in1k_linear.yaml +++ b/configs/config/test/integration_test/quick_eval_in1k_linear.yaml @@ -7,7 +7,9 @@ config: TEST_MODEL: True SEED_VALUE: 1 MULTI_PROCESSING_METHOD: forkserver - MONITOR_PERF_STATS: True + HOOKS: + PERF_STATS: + MONITOR_PERF_STATS: True DATA: NUM_DATALOADER_WORKERS: 2 TRAIN: diff --git a/configs/config/test/integration_test/quick_eval_in1k_linear_imagefolder.yaml b/configs/config/test/integration_test/quick_eval_in1k_linear_imagefolder.yaml index c1dacceb5..2b9135e54 100644 --- a/configs/config/test/integration_test/quick_eval_in1k_linear_imagefolder.yaml +++ b/configs/config/test/integration_test/quick_eval_in1k_linear_imagefolder.yaml @@ -7,7 +7,9 @@ config: TEST_MODEL: True SEED_VALUE: 1 MULTI_PROCESSING_METHOD: forkserver - MONITOR_PERF_STATS: True + HOOKS: + PERF_STATS: + MONITOR_PERF_STATS: True DATA: NUM_DATALOADER_WORKERS: 2 TRAIN: diff --git a/configs/config/test/integration_test/quick_eval_in1k_linear_imagefolder_head.yaml b/configs/config/test/integration_test/quick_eval_in1k_linear_imagefolder_head.yaml index a445d4a59..691ee8706 100644 --- a/configs/config/test/integration_test/quick_eval_in1k_linear_imagefolder_head.yaml +++ b/configs/config/test/integration_test/quick_eval_in1k_linear_imagefolder_head.yaml @@ -7,7 +7,9 @@ config: TEST_MODEL: True SEED_VALUE: 1 MULTI_PROCESSING_METHOD: forkserver - MONITOR_PERF_STATS: True + HOOKS: + PERF_STATS: + MONITOR_PERF_STATS: True DATA: NUM_DATALOADER_WORKERS: 2 TRAIN: diff --git a/configs/config/test/integration_test/quick_moco.yaml b/configs/config/test/integration_test/quick_moco.yaml index 2c8d21c3b..1609f0d4b 100644 --- a/configs/config/test/integration_test/quick_moco.yaml +++ b/configs/config/test/integration_test/quick_moco.yaml @@ -6,9 +6,11 @@ config: TEST_MODEL: False SEED_VALUE: 0 MULTI_PROCESSING_METHOD: forkserver - MONITOR_PERF_STATS: True - PERF_STAT_FREQUENCY: 10 - ROLLING_BTIME_FREQ: 5 + HOOKS: + PERF_STATS: + MONITOR_PERF_STATS: True + PERF_STAT_FREQUENCY: 10 + ROLLING_BTIME_FREQ: 5 DATA: NUM_DATALOADER_WORKERS: 5 TRAIN: diff --git a/configs/config/test/integration_test/quick_pirl.yaml b/configs/config/test/integration_test/quick_pirl.yaml index 89b5207a0..3db54b961 100644 --- a/configs/config/test/integration_test/quick_pirl.yaml +++ b/configs/config/test/integration_test/quick_pirl.yaml @@ -6,9 +6,13 @@ config: TEST_MODEL: False SEED_VALUE: 0 MULTI_PROCESSING_METHOD: forkserver - MONITOR_PERF_STATS: True - PERF_STAT_FREQUENCY: 10 - ROLLING_BTIME_FREQ: 5 + HOOKS: + PERF_STATS: + MONITOR_PERF_STATS: True + PERF_STAT_FREQUENCY: 10 + ROLLING_BTIME_FREQ: 5 + MODEL_COMPLEXITY: + COMPUTE_COMPLEXITY: False DATA: NUM_DATALOADER_WORKERS: 5 TRAIN: @@ -47,8 +51,6 @@ config: METERS: name: "" MODEL: - MODEL_COMPLEXITY: - COMPUTE_COMPLEXITY: False TRUNK: NAME: resnet TRUNK_PARAMS: diff --git a/configs/config/test/integration_test/quick_simclr.yaml b/configs/config/test/integration_test/quick_simclr.yaml index 9a4f115fe..12bbe30fd 100644 --- a/configs/config/test/integration_test/quick_simclr.yaml +++ b/configs/config/test/integration_test/quick_simclr.yaml @@ -6,9 +6,11 @@ config: TEST_MODEL: False SEED_VALUE: 0 MULTI_PROCESSING_METHOD: forkserver - MONITOR_PERF_STATS: True - PERF_STAT_FREQUENCY: 10 - ROLLING_BTIME_FREQ: 5 + HOOKS: + PERF_STATS: + MONITOR_PERF_STATS: True + PERF_STAT_FREQUENCY: 10 + ROLLING_BTIME_FREQ: 5 DATA: NUM_DATALOADER_WORKERS: 5 TRAIN: diff --git a/configs/config/test/integration_test/quick_simclr_2node.yaml b/configs/config/test/integration_test/quick_simclr_2node.yaml index ce60fc32d..928a828aa 100644 --- a/configs/config/test/integration_test/quick_simclr_2node.yaml +++ b/configs/config/test/integration_test/quick_simclr_2node.yaml @@ -6,9 +6,11 @@ config: TEST_MODEL: False SEED_VALUE: 0 MULTI_PROCESSING_METHOD: forkserver - MONITOR_PERF_STATS: True - PERF_STAT_FREQUENCY: 10 - ROLLING_BTIME_FREQ: 5 + HOOKS: + PERF_STATS: + MONITOR_PERF_STATS: True + PERF_STAT_FREQUENCY: 10 + ROLLING_BTIME_FREQ: 5 DATA: NUM_DATALOADER_WORKERS: 5 TRAIN: diff --git a/configs/config/test/integration_test/quick_simclr_checkpointing.yaml b/configs/config/test/integration_test/quick_simclr_checkpointing.yaml index f063bd6a7..01f51ee3c 100644 --- a/configs/config/test/integration_test/quick_simclr_checkpointing.yaml +++ b/configs/config/test/integration_test/quick_simclr_checkpointing.yaml @@ -6,9 +6,11 @@ config: TEST_MODEL: False SEED_VALUE: 0 MULTI_PROCESSING_METHOD: forkserver - MONITOR_PERF_STATS: True - PERF_STAT_FREQUENCY: 10 - ROLLING_BTIME_FREQ: 5 + HOOKS: + PERF_STATS: + MONITOR_PERF_STATS: True + PERF_STAT_FREQUENCY: 10 + ROLLING_BTIME_FREQ: 5 DATA: NUM_DATALOADER_WORKERS: 5 TRAIN: diff --git a/configs/config/test/integration_test/quick_simclr_efficientnet.yaml b/configs/config/test/integration_test/quick_simclr_efficientnet.yaml index 596e18736..af7702324 100644 --- a/configs/config/test/integration_test/quick_simclr_efficientnet.yaml +++ b/configs/config/test/integration_test/quick_simclr_efficientnet.yaml @@ -6,7 +6,12 @@ config: TEST_MODEL: False SEED_VALUE: 0 MULTI_PROCESSING_METHOD: forkserver - MONITOR_PERF_STATS: True + HOOKS: + PERF_STATS: + MONITOR_PERF_STATS: True + MODEL_COMPLEXITY: + COMPUTE_COMPLEXITY: False + INPUT_SHAPE: [3, 224, 224] DATA: NUM_DATALOADER_WORKERS: 5 TRAIN: @@ -42,9 +47,6 @@ config: METERS: name: "" MODEL: - MODEL_COMPLEXITY: - COMPUTE_COMPLEXITY: False - INPUT_SHAPE: [3, 224, 224] TRUNK: NAME: efficientnet TRUNK_PARAMS: diff --git a/configs/config/test/integration_test/quick_simclr_imagefolder.yaml b/configs/config/test/integration_test/quick_simclr_imagefolder.yaml index 569d8fc47..14eacaee7 100644 --- a/configs/config/test/integration_test/quick_simclr_imagefolder.yaml +++ b/configs/config/test/integration_test/quick_simclr_imagefolder.yaml @@ -6,9 +6,11 @@ config: TEST_MODEL: False SEED_VALUE: 0 MULTI_PROCESSING_METHOD: forkserver - MONITOR_PERF_STATS: True - PERF_STAT_FREQUENCY: 10 - ROLLING_BTIME_FREQ: 5 + HOOKS: + PERF_STATS: + MONITOR_PERF_STATS: True + PERF_STAT_FREQUENCY: 10 + ROLLING_BTIME_FREQ: 5 DATA: NUM_DATALOADER_WORKERS: 5 TRAIN: diff --git a/configs/config/test/integration_test/quick_simclr_multicrop.yaml b/configs/config/test/integration_test/quick_simclr_multicrop.yaml index bcf6fa103..dc55a3cfc 100644 --- a/configs/config/test/integration_test/quick_simclr_multicrop.yaml +++ b/configs/config/test/integration_test/quick_simclr_multicrop.yaml @@ -6,9 +6,11 @@ config: TEST_MODEL: False SEED_VALUE: 0 MULTI_PROCESSING_METHOD: forkserver - MONITOR_PERF_STATS: True - PERF_STAT_FREQUENCY: 10 - ROLLING_BTIME_FREQ: 5 + HOOKS: + PERF_STATS: + MONITOR_PERF_STATS: True + PERF_STAT_FREQUENCY: 10 + ROLLING_BTIME_FREQ: 5 DATA: NUM_DATALOADER_WORKERS: 5 TRAIN: diff --git a/configs/config/test/integration_test/quick_simclr_pytorch_amp.yaml b/configs/config/test/integration_test/quick_simclr_pytorch_amp.yaml index f2cd35fc6..0e0fe5214 100644 --- a/configs/config/test/integration_test/quick_simclr_pytorch_amp.yaml +++ b/configs/config/test/integration_test/quick_simclr_pytorch_amp.yaml @@ -6,9 +6,11 @@ config: TEST_MODEL: False SEED_VALUE: 0 MULTI_PROCESSING_METHOD: forkserver - MONITOR_PERF_STATS: True - PERF_STAT_FREQUENCY: 10 - ROLLING_BTIME_FREQ: 5 + HOOKS: + PERF_STATS: + MONITOR_PERF_STATS: True + PERF_STAT_FREQUENCY: 10 + ROLLING_BTIME_FREQ: 5 DATA: NUM_DATALOADER_WORKERS: 5 TRAIN: diff --git a/configs/config/test/integration_test/quick_simclr_regnet.yaml b/configs/config/test/integration_test/quick_simclr_regnet.yaml index 525c9dd79..503a423ab 100644 --- a/configs/config/test/integration_test/quick_simclr_regnet.yaml +++ b/configs/config/test/integration_test/quick_simclr_regnet.yaml @@ -6,7 +6,12 @@ config: TEST_MODEL: False SEED_VALUE: 0 MULTI_PROCESSING_METHOD: forkserver - MONITOR_PERF_STATS: True + HOOKS: + PERF_STATS: + MONITOR_PERF_STATS: True + MODEL_COMPLEXITY: + COMPUTE_COMPLEXITY: False + INPUT_SHAPE: [3, 224, 224] DATA: NUM_DATALOADER_WORKERS: 5 TRAIN: @@ -42,9 +47,6 @@ config: METERS: name: "" MODEL: - MODEL_COMPLEXITY: - COMPUTE_COMPLEXITY: False - INPUT_SHAPE: [3, 224, 224] TRUNK: NAME: regnet TRUNK_PARAMS: diff --git a/configs/config/test/integration_test/quick_simclr_sync_loader.yaml b/configs/config/test/integration_test/quick_simclr_sync_loader.yaml index 281b1431b..9430ef201 100644 --- a/configs/config/test/integration_test/quick_simclr_sync_loader.yaml +++ b/configs/config/test/integration_test/quick_simclr_sync_loader.yaml @@ -6,9 +6,11 @@ config: TEST_MODEL: False SEED_VALUE: 0 MULTI_PROCESSING_METHOD: forkserver - MONITOR_PERF_STATS: True - PERF_STAT_FREQUENCY: 10 - ROLLING_BTIME_FREQ: 5 + HOOKS: + PERF_STATS: + MONITOR_PERF_STATS: True + PERF_STAT_FREQUENCY: 10 + ROLLING_BTIME_FREQ: 5 DATA: NUM_DATALOADER_WORKERS: 5 ENABLE_ASYNC_GPU_COPY: false diff --git a/configs/config/test/integration_test/quick_simclr_synthetic.yaml b/configs/config/test/integration_test/quick_simclr_synthetic.yaml index 69020da24..dd4a6ede5 100644 --- a/configs/config/test/integration_test/quick_simclr_synthetic.yaml +++ b/configs/config/test/integration_test/quick_simclr_synthetic.yaml @@ -6,9 +6,11 @@ config: TEST_MODEL: False SEED_VALUE: 0 MULTI_PROCESSING_METHOD: forkserver - MONITOR_PERF_STATS: True - PERF_STAT_FREQUENCY: 10 - ROLLING_BTIME_FREQ: 5 + HOOKS: + PERF_STATS: + MONITOR_PERF_STATS: True + PERF_STAT_FREQUENCY: 10 + ROLLING_BTIME_FREQ: 5 DATA: NUM_DATALOADER_WORKERS: 5 TRAIN: diff --git a/configs/config/test/integration_test/quick_swav.yaml b/configs/config/test/integration_test/quick_swav.yaml index bf9618d13..27f02bc3a 100644 --- a/configs/config/test/integration_test/quick_swav.yaml +++ b/configs/config/test/integration_test/quick_swav.yaml @@ -6,9 +6,11 @@ config: TEST_MODEL: False SEED_VALUE: 0 MULTI_PROCESSING_METHOD: forkserver - MONITOR_PERF_STATS: True - PERF_STAT_FREQUENCY: 40 - ROLLING_BTIME_FREQ: 5 + HOOKS: + PERF_STATS: + MONITOR_PERF_STATS: True + PERF_STAT_FREQUENCY: 40 + ROLLING_BTIME_FREQ: 5 DATA: NUM_DATALOADER_WORKERS: 5 TRAIN: diff --git a/configs/config/test/integration_test/quick_swav_momentum.yaml b/configs/config/test/integration_test/quick_swav_momentum.yaml index e42971cda..87da2b26d 100644 --- a/configs/config/test/integration_test/quick_swav_momentum.yaml +++ b/configs/config/test/integration_test/quick_swav_momentum.yaml @@ -6,9 +6,11 @@ config: TEST_MODEL: False SEED_VALUE: 0 MULTI_PROCESSING_METHOD: forkserver - MONITOR_PERF_STATS: True - PERF_STAT_FREQUENCY: 40 - ROLLING_BTIME_FREQ: 5 + HOOKS: + PERF_STATS: + MONITOR_PERF_STATS: True + PERF_STAT_FREQUENCY: 40 + ROLLING_BTIME_FREQ: 5 DATA: NUM_DATALOADER_WORKERS: 5 TRAIN: diff --git a/dev/run_quick_tests.sh b/dev/run_quick_tests.sh index 15b302b65..29a9fe8e1 100755 --- a/dev/run_quick_tests.sh +++ b/dev/run_quick_tests.sh @@ -30,7 +30,7 @@ for cfg in "${CFG_LIST[@]}"; do CUDA_LAUNCH_BLOCKING=1 $BINARY config=$cfg \ config.DATA.TRAIN.DATA_SOURCES=[synthetic] \ hydra.verbose=true \ - config.TENSORBOARD_SETUP.USE_TENSORBOARD=true \ + config.HOOKS.TENSORBOARD_SETUP.USE_TENSORBOARD=true \ config.CHECKPOINT.DIR="$CHECKPOINT_DIR" && echo "TEST OK" || exit rm -rf $CHECKPOINT_DIR diff --git a/docker/Dockerfile b/docker/Dockerfile index 5dc8e5a4a..aa8289616 100644 --- a/docker/Dockerfile +++ b/docker/Dockerfile @@ -51,4 +51,4 @@ CMD ["bash"] ######## run gpu test # python tools/run_distributed_engines.py \ # config=test/circleci/quick_simclr_synthetic \ -# hydra.verbose=true config.TENSORBOARD_SETUP.USE_TENSORBOARD=true +# hydra.verbose=true config.HOOKS.TENSORBOARD_SETUP.USE_TENSORBOARD=true diff --git a/docker/conda/Dockerfile b/docker/conda/Dockerfile index 381e6646a..893c0965b 100644 --- a/docker/conda/Dockerfile +++ b/docker/conda/Dockerfile @@ -46,4 +46,4 @@ CMD ["bash"] ######## run gpu test # python tools/run_distributed_engines.py \ # config=test/circleci/quick_simclr_synthetic \ -# hydra.verbose=true config.TENSORBOARD_SETUP.USE_TENSORBOARD=true +# hydra.verbose=true config.HOOKS.TENSORBOARD_SETUP.USE_TENSORBOARD=true diff --git a/docs/source/getting_started.rst b/docs/source/getting_started.rst index a38b4050f..278de6274 100644 --- a/docs/source/getting_started.rst +++ b/docs/source/getting_started.rst @@ -64,7 +64,7 @@ Change the :code:`DATA.TRAIN.DATA_PATHS` path to the ImageNet train dataset fold config.DATA.TRAIN.DATA_PATHS=["/path/to/my/imagenet/folder/train"] \ config=test/integration_test/quick_simclr \ config.CHECKPOINT.DIR="./checkpoints" \ - config.TENSORBOARD_SETUP.USE_TENSORBOARD=true + config.HOOKS.TENSORBOARD_SETUP.USE_TENSORBOARD=true If using pre-built conda/pip VISSL packages @@ -118,7 +118,4 @@ We will use the pre-built VISSL tool for training `run_distributed_engines.py `_. - + config.HOOKS.TENSORBOARD_SETUP.USE_TENSORBOARD=true diff --git a/docs/source/vissl_modules/hooks.rst b/docs/source/vissl_modules/hooks.rst index 5ca418a86..7903adb71 100644 --- a/docs/source/vissl_modules/hooks.rst +++ b/docs/source/vissl_modules/hooks.rst @@ -30,7 +30,7 @@ VISSL supports many hooks. Users can configure which hooks to use from simple co - :code:`Tensorboard hook`: to enable this hook, set :code:`TENSORBOARD_SETUP.USE_TENSORBOARD=true` and configure the tensorboard settings -- :code:`Model Complexity hook`: this hook performs one single forward pass of the model on the synthetic input and computes the #FLOPs, #params and #activations in the model. To enable this hook, set :code:`MODEL.MODEL_COMPLEXITY.COMPUTE_COMPLEXITY=true` and configure it. +- :code:`Model Complexity hook`: this hook performs one single forward pass of the model on the synthetic input and computes the #FLOPs, #params and #activations in the model. To enable this hook, set :code:`HOOKS.MODEL_COMPLEXITY.COMPUTE_COMPLEXITY=true` and configure it. - :code:`Self-supervised Loss hooks`: VISSL has hooks specific to self-supervised approaches like MoCo, SwAV etc. These hooks are handy in performing some intermediate operations required in self-supervision. For example: :code:`MoCoHook` is called after every forward pass of the model and updates the momentum encoder network. Users don't need to do anything special for using these hooks. If the user configuration file has the loss function for an approach, VISSL will automatically enable the hooks for the approach. diff --git a/docs/source/visualization.rst b/docs/source/visualization.rst index 1fced8d4c..3e409f889 100644 --- a/docs/source/visualization.rst +++ b/docs/source/visualization.rst @@ -52,7 +52,7 @@ For example, to use Tensorboard during SwAV training, the command would look lik .. code-block:: bash python tools/run_distributed_engines.py config=pretrain/swav/swav_8node_resnet \ - config.TENSORBOARD_SETUP.USE_TENSORBOARD=true \ - config.TENSORBOARD_SETUP.LOG_PARAMS=true \ - config.TENSORBOARD_SETUP.LOG_PARAMS_GRADIENTS=true \ - config.TENSORBOARD_SETUP.LOG_DIR=/tmp/swav_tensorboard_events/ + config.HOOKS.TENSORBOARD_SETUP.USE_TENSORBOARD=true \ + config.HOOKS.TENSORBOARD_SETUP.LOG_PARAMS=true \ + config.HOOKS.TENSORBOARD_SETUP.LOG_PARAMS_GRADIENTS=true \ + config.HOOKS.TENSORBOARD_SETUP.LOG_DIR=/tmp/swav_tensorboard_events/ diff --git a/vissl/config/defaults.yaml b/vissl/config/defaults.yaml index a2b082872..1be7aeaaf 100644 --- a/vissl/config/defaults.yaml +++ b/vissl/config/defaults.yaml @@ -61,43 +61,81 @@ config: LOG_FREQUENCY: 10 # if the workflow is only test and not training TEST_ONLY: False + # if the model should be test as well. If set to False, only training will be done. + TEST_MODEL: True # how frequently should the validation be done. # 1 = after every epoch and N = after every N epochs TEST_EVERY_NUM_EPOCH: 1 SEED_VALUE: 0 - # if the model should be test as well. If set to False, only training will be done. - TEST_MODEL: True # Use the forkserver or spawn # https://github.com/pytorch/pytorch/blob/master/torch/nn/parallel/distributed.py#L142 MULTI_PROCESSING_METHOD: "forkserver" - # monitoring training statistics like: forward time, backward time, loss time, etc - MONITOR_PERF_STATS: False - # we print perf stats (if enabled) after every phase. If we want to print every few - # batches, set the frequency here. - PERF_STAT_FREQUENCY: -1 - # if we want to print the rolling average batch time, set the value below to number of - # training iterations over which we want to print average. The average is printed for - # master gpu. - ROLLING_BTIME_FREQ: -1 - # whether to log nvidia-smi or not. we make it optional in case nvidia-smi is not - # valid for some systems. - LOG_GPU_STATS: True # ----------------------------------------------------------------------------------- # # HOOKS # ----------------------------------------------------------------------------------- # HOOKS: + # ----------------------------------------------------------------------------------- # + # Perf hooks for several steps of model training + # ----------------------------------------------------------------------------------- # + PERF_STATS: + # monitoring training statistics like: forward time, backward time, loss time, etc + MONITOR_PERF_STATS: False + # we print perf stats (if enabled) after every phase. If we want to print every few + # batches, set the frequency here. + PERF_STAT_FREQUENCY: -1 + # if we want to print the rolling average batch time, set the value below to number of + # training iterations over which we want to print average. The average is printed for + # master gpu. + ROLLING_BTIME_FREQ: -1 + # ----------------------------------------------------------------------------------- # # torch.cuda.memory_summary() # ----------------------------------------------------------------------------------- # MEMORY_SUMMARY: # set this to true if you want to print memory summary. useful for profiling # memory consumption of model - PRINT_MEMORY_SUMMARY: False + PRINT_MEMORY_SUMMARY: True # at what iteration number should the memory summary be printed. usually # set to 1 for very large models LOG_ITERATION_NUM: 0 + # ----------------------------------------------------------------------------------- # + # nvidia-smi print + # ----------------------------------------------------------------------------------- # + # whether to log nvidia-smi or not. we make it optional in case nvidia-smi is not + # valid for some systems. + LOG_GPU_STATS: True + + # ----------------------------------------------------------------------------------- # + # MODEL_COMPLEXITY (#flops, #params, #activations in your model) + # ----------------------------------------------------------------------------------- # + MODEL_COMPLEXITY: + # set this to True if you want to compute #flops, #params, #activations in your model. + COMPUTE_COMPLEXITY: False + # the dummy input shape passed to the model to compute the complexity. Only forward pass + # is done for complexity calculation. + INPUT_SHAPE: [3, 224, 224] + + # ----------------------------------------------------------------------------------- # + # TENSORBOARD (visualization) + # ----------------------------------------------------------------------------------- # + TENSORBOARD_SETUP: + # whether to use tensorboard for the visualization + USE_TENSORBOARD: False + # log directory for tensorboard events + LOG_DIR: "." + EXPERIMENT_LOG_DIR: "tensorboard" + # flush logs every n minutes + FLUSH_EVERY_N_MIN: 5 + # whether to log the model parameters to tensorboard + LOG_PARAMS: True + # whether to log the model parameters gradients to tensorboard + LOG_PARAMS_GRADIENTS: True + # if we want to log the model parameters every few iterations, set the iteration + # frequency. -1 means the params will be logged only at the end of epochs. + LOG_PARAMS_EVERY_N_ITERS: 310 + # ----------------------------------------------------------------------------------- # # DATA # ----------------------------------------------------------------------------------- # @@ -266,6 +304,9 @@ config: # we can optimize memory a bit by running forward pass through each crop # separately. SINGLE_PASS_EVERY_CROP: False + # ----------------------------------------------------------------------------------- # + # Activation checkpointing from PyTorch + # ----------------------------------------------------------------------------------- # # Use activation checkpointing in the training phase. This is very for training # large models that require a lot of memory. ACTIVATION_CHECKPOINTING: @@ -273,7 +314,9 @@ config: # how many times the model should be checkpointed. User should tune this parameter # and find the number that offers best memory saving and compute tradeoff. NUM_ACTIVATION_CHECKPOINTING_SPLITS: 2 - # setup for Fairscale sharded DDP + # ----------------------------------------------------------------------------------- # + # ZeRO2 sharded DDP from Fairscale https://github.com/facebookresearch/fairscale + # ----------------------------------------------------------------------------------- # SHARDED_DDP_SETUP: reduce_buffer_size: -1 # ----------------------------------------------------------------------------------- # @@ -415,12 +458,9 @@ config: AMP_ARGS: {"opt_level": "O1"} # we support pytorch amp as well which is availale in pytorch>=1.6. AMP_TYPE: "apex" # apex | pytorch - MODEL_COMPLEXITY: - # set this to True if you want to compute #flops, #params, #activations in your model. - COMPUTE_COMPLEXITY: False - # the dummy input shape passed to the model to compute the complexity. Only forward pass - # is done for complexity calculation. - INPUT_SHAPE: [3, 224, 224] + # ----------------------------------------------------------------------------------- # + # MODEL WEIGHTS INIT from a weights file + # ----------------------------------------------------------------------------------- # # parameters for initializing a model from a pre-trained model file WEIGHTS_INIT: # path to the .torch weights files @@ -877,22 +917,3 @@ config: TOPK: 200 # if the features should be l2 normalized, set this to True L2_NORM_FEATS: False - - # ----------------------------------------------------------------------------------- # - # TENSORBOARD (visualization) - # ----------------------------------------------------------------------------------- # - TENSORBOARD_SETUP: - # whether to use tensorboard for the visualization - USE_TENSORBOARD: False - # log directory for tensorboard events - LOG_DIR: "." - EXPERIMENT_LOG_DIR: "tensorboard" - # flush logs every n minutes - FLUSH_EVERY_N_MIN: 5 - # whether to log the model parameters to tensorboard - LOG_PARAMS: True - # whether to log the model parameters gradients to tensorboard - LOG_PARAMS_GRADIENTS: True - # if we want to log the model parameters every few iterations, set the iteration - # frequency. -1 means the params will be logged only at the end of epochs. - LOG_PARAMS_EVERY_N_ITERS: 310 diff --git a/vissl/hooks/__init__.py b/vissl/hooks/__init__.py index 5bdd278a9..be02d7c28 100644 --- a/vissl/hooks/__init__.py +++ b/vissl/hooks/__init__.py @@ -60,7 +60,7 @@ def default_hook_generator(cfg: AttrDict) -> List[ClassyHook]: - loss specific hooks (swav loss, deepcluster loss, moco loss) used only when the loss is being used - model complexity hook (if user wants to compute model flops, activations, params) - enable the hook via MODEL.MODEL_COMPLEXITY.COMPUTE_COMPLEXITY = True + enable the hook via HOOKS.MODEL_COMPLEXITY.COMPUTE_COMPLEXITY = True Returns: hooks (List(functions)): list containing the hook functions that will be used @@ -68,9 +68,11 @@ def default_hook_generator(cfg: AttrDict) -> List[ClassyHook]: hooks = [] # conditionally add hooks based on use-case - if cfg.MONITOR_PERF_STATS: + if cfg.HOOKS.PERF_STATS.MONITOR_PERF_STATS: perf_stat_freq = ( - cfg.PERF_STAT_FREQUENCY if cfg.PERF_STAT_FREQUENCY > 0 else None + cfg.HOOKS.PERF_STATS.PERF_STAT_FREQUENCY + if cfg.HOOKS.PERF_STATS.PERF_STAT_FREQUENCY > 0 + else None ) hooks.append(LogPerfTimeMetricsHook(perf_stat_freq)) if cfg.LOSS.name == "swav_loss": @@ -97,19 +99,23 @@ def default_hook_generator(cfg: AttrDict) -> List[ClassyHook]: ) ] ) - if cfg.MODEL.MODEL_COMPLEXITY.COMPUTE_COMPLEXITY: + if cfg.HOOKS.MODEL_COMPLEXITY.COMPUTE_COMPLEXITY: hooks.extend([SSLModelComplexityHook()]) - if cfg.LOG_GPU_STATS: + if cfg.HOOKS.LOG_GPU_STATS: hooks.extend([LogGpuStatsHook()]) if cfg.HOOKS.MEMORY_SUMMARY.PRINT_MEMORY_SUMMARY: hooks.extend([LogGpuMemoryHook(cfg.HOOKS.MEMORY_SUMMARY.LOG_ITERATION_NUM)]) - if cfg.TENSORBOARD_SETUP.USE_TENSORBOARD: + if cfg.HOOKS.TENSORBOARD_SETUP.USE_TENSORBOARD: assert is_tensorboard_available(), "Tensorboard must be installed to use it." tb_hook = get_tensorboard_hook(cfg) hooks.extend([tb_hook]) # hooks that are used irrespective of workflow type - rolling_btime_freq = cfg.ROLLING_BTIME_FREQ if cfg.ROLLING_BTIME_FREQ > 0 else None + rolling_btime_freq = ( + cfg.HOOKS.PERF_STATS.ROLLING_BTIME_FREQ + if cfg.HOOKS.PERF_STATS.ROLLING_BTIME_FREQ > 0 + else None + ) hooks.extend( [ CheckNanLossHook(), diff --git a/vissl/hooks/state_update_hooks.py b/vissl/hooks/state_update_hooks.py index 5257a6b06..f4b32da57 100644 --- a/vissl/hooks/state_update_hooks.py +++ b/vissl/hooks/state_update_hooks.py @@ -31,11 +31,11 @@ class SSLModelComplexityHook(ClassyHook): def on_start(self, task) -> None: """ Before the training starts, run one forward only pass of the model on the - dummy input of shape specified by user in MODEL.MODEL_COMPLEXITY.INPUT_SHAPE + dummy input of shape specified by user in HOOKS.MODEL_COMPLEXITY.INPUT_SHAPE We calculate the flops, activations and number of params in the model. """ self.num_flops, self.num_activations, self.num_parameters = 0, 0, 0 - input_shape = task.config["MODEL"]["MODEL_COMPLEXITY"]["INPUT_SHAPE"] + input_shape = task.config["HOOKS"]["MODEL_COMPLEXITY"]["INPUT_SHAPE"] try: self.num_parameters = count_params(task.base_model) self.num_parameters = round(float(self.num_parameters) / 1000000, 4) diff --git a/vissl/utils/tensorboard.py b/vissl/utils/tensorboard.py index d447f41d0..cf7827dd2 100644 --- a/vissl/utils/tensorboard.py +++ b/vissl/utils/tensorboard.py @@ -64,10 +64,10 @@ def get_tensorboard_hook(cfg): # get the tensorboard directory and check tensorboard is installed tensorboard_dir = get_tensorboard_dir(cfg) - flush_secs = cfg.TENSORBOARD_SETUP.FLUSH_EVERY_N_MIN * 60 + flush_secs = cfg.HOOKS.TENSORBOARD_SETUP.FLUSH_EVERY_N_MIN * 60 return SSLTensorboardHook( tb_writer=SummaryWriter(log_dir=tensorboard_dir, flush_secs=flush_secs), - log_params=cfg.TENSORBOARD_SETUP.LOG_PARAMS, - log_params_every_n_iterations=cfg.TENSORBOARD_SETUP.LOG_PARAMS_EVERY_N_ITERS, - log_params_gradients=cfg.TENSORBOARD_SETUP.LOG_PARAMS_GRADIENTS, + log_params=cfg.HOOKS.TENSORBOARD_SETUP.LOG_PARAMS, + log_params_every_n_iterations=cfg.HOOKS.TENSORBOARD_SETUP.LOG_PARAMS_EVERY_N_ITERS, + log_params_gradients=cfg.HOOKS.TENSORBOARD_SETUP.LOG_PARAMS_GRADIENTS, )