From fa4e31eff00eae90e00a2f702df24bef950826f8 Mon Sep 17 00:00:00 2001 From: ddelange <14880945+ddelange@users.noreply.github.com> Date: Tue, 2 May 2023 15:36:00 +0200 Subject: [PATCH 01/44] Add support for python 3.11 --- .github/workflow_scripts/env_setup.sh | 14 ++++++++------ .github/workflows/platform_tests-command.yml | 12 ++++++------ core/src/autogluon/core/_setup_utils.py | 2 +- docs/install-cpu-pip.md | 2 +- docs/install-cpu-source.md | 2 +- docs/install-gpu-pip.md | 2 +- docs/install-gpu-source.md | 2 +- docs/install-windows-gpu.md | 2 +- multimodal/setup.py | 12 +++++++----- tabular/setup.py | 2 +- 10 files changed, 28 insertions(+), 24 deletions(-) diff --git a/.github/workflow_scripts/env_setup.sh b/.github/workflow_scripts/env_setup.sh index 809f967a654..33eed43a7b8 100644 --- a/.github/workflow_scripts/env_setup.sh +++ b/.github/workflow_scripts/env_setup.sh @@ -16,21 +16,23 @@ function setup_build_contrib_env { } function setup_torch_gpu { - # Security-patched torch. - python3 -m pip install torch==1.13.1+cu116 torchvision==0.14.1+cu116 torchaudio==0.13.1+cu116 --extra-index-url https://download.pytorch.org/whl/cu116 + PIP_EXTRA_INDEX_URL=https://download.pytorch.org/whl/cu118 reinstall_torch } function setup_torch_cpu { - # Security-patched torch - python3 -m pip install torch==1.13.1 torchvision==0.14.1 torchaudio==0.13.1 --extra-index-url https://download.pytorch.org/whl/cpu + PIP_EXTRA_INDEX_URL=https://download.pytorch.org/whl/cpu reinstall_torch } function setup_torch_gpu_non_linux { - pip3 install torch==1.13.1+cu116 torchvision==0.14.1+cu116 --extra-index-url https://download.pytorch.org/whl/cu116 + setup_torch_gpu } function setup_torch_cpu_non_linux { - pip3 install torch==1.13.1 torchvision==0.14.1 + setup_torch_cpu +} + +function reinstall_torch { + pip3 install --force-reinstall torchvision~=0.15.1 } function setup_hf_model_mirror { diff --git a/.github/workflows/platform_tests-command.yml b/.github/workflows/platform_tests-command.yml index 49627a18f9b..ccd250dccaa 100644 --- a/.github/workflows/platform_tests-command.yml +++ b/.github/workflows/platform_tests-command.yml @@ -45,7 +45,7 @@ jobs: fail-fast: false matrix: os: [macos-latest, windows-latest, ubuntu-latest] - python: ["3.8", "3.9", "3.10"] + python: ["3.8", "3.9", "3.10", "3.11"] steps: - name: Checkout repository for PR if: (github.event_name == 'workflow_dispatch') @@ -77,7 +77,7 @@ jobs: fail-fast: false matrix: os: [macos-latest, windows-latest, ubuntu-latest] - python: ["3.8", "3.9", "3.10"] + python: ["3.8", "3.9", "3.10", "3.11"] steps: - name: Checkout repository for PR if: (github.event_name == 'workflow_dispatch') @@ -109,7 +109,7 @@ jobs: fail-fast: false matrix: os: [macos-latest, windows-latest, ubuntu-latest] - python: ["3.8", "3.9", "3.10"] + python: ["3.8", "3.9", "3.10", "3.11"] steps: - name: Checkout repository for PR if: (github.event_name == 'workflow_dispatch') @@ -141,7 +141,7 @@ jobs: fail-fast: false matrix: os: [macos-latest, windows-latest, ubuntu-latest] - python: ["3.8", "3.9", "3.10"] + python: ["3.8", "3.9", "3.10", "3.11"] steps: - name: Checkout repository for PR if: (github.event_name == 'workflow_dispatch') @@ -174,7 +174,7 @@ jobs: fail-fast: false matrix: os: [macos-latest, windows-latest, ubuntu-latest] - python: ["3.8", "3.9", "3.10"] + python: ["3.8", "3.9", "3.10", "3.11"] steps: - name: Checkout repository for PR if: (github.event_name == 'workflow_dispatch') @@ -214,7 +214,7 @@ jobs: fail-fast: false matrix: os: [macos-latest, windows-latest, ubuntu-latest] - python: ["3.8", "3.9", "3.10"] + python: ["3.8", "3.9", "3.10", "3.11"] steps: - name: Checkout repository for PR if: (github.event_name == 'workflow_dispatch') diff --git a/core/src/autogluon/core/_setup_utils.py b/core/src/autogluon/core/_setup_utils.py index 52dfc5e9b57..4d21fdea889 100644 --- a/core/src/autogluon/core/_setup_utils.py +++ b/core/src/autogluon/core/_setup_utils.py @@ -13,7 +13,7 @@ os.path.join(os.path.dirname(os.path.abspath(__file__)), '..', '..', '..', '..') ) -PYTHON_REQUIRES = '>=3.8, <3.11' +PYTHON_REQUIRES = '>=3.8, <3.12' # Only put packages here that would otherwise appear multiple times across different module's setup.py files. diff --git a/docs/install-cpu-pip.md b/docs/install-cpu-pip.md index f04890f2541..04d2600c3e7 100644 --- a/docs/install-cpu-pip.md +++ b/docs/install-cpu-pip.md @@ -4,7 +4,7 @@ pip install -U setuptools wheel # CPU version of pytorch has smaller footprint - see installation instructions in # pytorch documentation - https://pytorch.org/get-started/locally/ -pip install torch==1.13.1+cpu torchvision==0.14.1+cpu -f https://download.pytorch.org/whl/cpu/torch_stable.html +pip install torchvision~=0.15.1 --force-reinstall --extra-index-url https://download.pytorch.org/whl/cpu pip install autogluon ``` diff --git a/docs/install-cpu-source.md b/docs/install-cpu-source.md index fbb7d4dcd42..68859b8682b 100644 --- a/docs/install-cpu-source.md +++ b/docs/install-cpu-source.md @@ -4,7 +4,7 @@ pip install -U setuptools wheel # CPU version of pytorch has smaller footprint - see installation instructions in # pytorch documentation - https://pytorch.org/get-started/locally/ -pip install torch==1.13.1+cpu torchvision==0.14.1+cpu --extra-index-url https://download.pytorch.org/whl/cpu +pip install torchvision~=0.15.1 --force-reinstall --extra-index-url https://download.pytorch.org/whl/cpu git clone https://github.com/autogluon/autogluon cd autogluon && ./full_install.sh diff --git a/docs/install-gpu-pip.md b/docs/install-gpu-pip.md index 8a9234f1b92..5b5e141b2f0 100644 --- a/docs/install-gpu-pip.md +++ b/docs/install-gpu-pip.md @@ -3,7 +3,7 @@ pip install -U pip pip install -U setuptools wheel # Install the proper version of PyTorch following https://pytorch.org/get-started/locally/ -pip install torch==1.13.1+cu116 torchvision==0.14.1+cu116 --extra-index-url https://download.pytorch.org/whl/cu116 +pip install torchvision~=0.15.1 --force-reinstall --extra-index-url https://download.pytorch.org/whl/cu118 pip install autogluon ``` diff --git a/docs/install-gpu-source.md b/docs/install-gpu-source.md index d7c578fb698..7db68dc6dad 100644 --- a/docs/install-gpu-source.md +++ b/docs/install-gpu-source.md @@ -3,7 +3,7 @@ pip install -U pip pip install -U setuptools wheel # Install the proper version of PyTorch following https://pytorch.org/get-started/locally/ -pip install torch==1.13.1+cu116 torchvision==0.14.1+cu116 --extra-index-url https://download.pytorch.org/whl/cu116 +pip install torchvision~=0.15.1 --force-reinstall --extra-index-url https://download.pytorch.org/whl/cu118 git clone https://github.com/autogluon/autogluon cd autogluon && ./full_install.sh diff --git a/docs/install-windows-gpu.md b/docs/install-windows-gpu.md index ed051cfdaf3..2de07bb2333 100644 --- a/docs/install-windows-gpu.md +++ b/docs/install-windows-gpu.md @@ -11,7 +11,7 @@ conda activate myenv 4. Install the proper GPU PyTorch version by following the [PyTorch Install Documentation](https://pytorch.org/get-started/locally/) (Recommended). Alternatively, use the following command: ```bash -pip install torch==1.13.1+cu116 torchvision==0.14.1+cu116 --extra-index-url https://download.pytorch.org/whl/cu116 +pip install torchvision~=0.15.1 --force-reinstall --extra-index-url https://download.pytorch.org/whl/cu118 ``` 5. Sanity check that your installation is valid and can detect your GPU via testing in Python: diff --git a/multimodal/setup.py b/multimodal/setup.py index a204c233c34..fcd5c7aef5e 100644 --- a/multimodal/setup.py +++ b/multimodal/setup.py @@ -33,13 +33,15 @@ "evaluate>=0.2.2,<0.4.0", "accelerate>=0.9,<0.17", "timm>=0.6.12,<0.7.0", - "torch>=1.9,<1.14", - "torchvision<0.15.0", + "torch>=1.9,<2.1", + "torchvision>=0.10.0,<0.16", "fairscale>=0.4.5,<0.4.14", - "scikit-image>=0.19.1,<0.20.0", - "pytorch-lightning>=1.9.0,<1.10.0", + "scikit-image>=0.19.1,<0.21.0", + "pytorch-lightning>=1.9.0,<2.1", "text-unidecode>=1.3,<1.4", - "torchmetrics>=0.11.0,<0.12.0", + # temporary for testing ref https://github.com/autogluon/autogluon/issues/2687#issuecomment-1531122312 + # "torchmetrics>=0.11.0,<0.12.0", + "torchmetrics @ https://github.com/Lightning-AI/torchmetrics/archive/95599c9a7bd5e6c8f0a8039562dbbc832b434031.zip" "transformers>=4.23.0,<4.27.0", "nptyping>=1.4.4,<2.5.0", "omegaconf>=2.1.1,<2.3.0", diff --git a/tabular/setup.py b/tabular/setup.py index 929710248b6..508d5355980 100644 --- a/tabular/setup.py +++ b/tabular/setup.py @@ -34,7 +34,7 @@ 'lightgbm>=3.3,<3.4', ], 'catboost': [ - 'catboost>=1.1,<1.2', + 'catboost>=1.1,<1.3', ], # FIXME: Debug why xgboost 1.6 has 4x+ slower inference on multiclass datasets compared to 1.4 # It is possibly only present on MacOS, haven't tested linux. From e7e6bbce96ad7b1a59164161b57b26f90e64e2a7 Mon Sep 17 00:00:00 2001 From: ddelange <14880945+ddelange@users.noreply.github.com> Date: Tue, 2 May 2023 15:49:21 +0200 Subject: [PATCH 02/44] Add missing comma --- multimodal/setup.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/multimodal/setup.py b/multimodal/setup.py index fcd5c7aef5e..560605f2a5e 100644 --- a/multimodal/setup.py +++ b/multimodal/setup.py @@ -41,7 +41,7 @@ "text-unidecode>=1.3,<1.4", # temporary for testing ref https://github.com/autogluon/autogluon/issues/2687#issuecomment-1531122312 # "torchmetrics>=0.11.0,<0.12.0", - "torchmetrics @ https://github.com/Lightning-AI/torchmetrics/archive/95599c9a7bd5e6c8f0a8039562dbbc832b434031.zip" + "torchmetrics @ https://github.com/Lightning-AI/torchmetrics/archive/95599c9a7bd5e6c8f0a8039562dbbc832b434031.zip", "transformers>=4.23.0,<4.27.0", "nptyping>=1.4.4,<2.5.0", "omegaconf>=2.1.1,<2.3.0", From 14e6fee69956d833ad41675ce7cf44bb1b198939 Mon Sep 17 00:00:00 2001 From: ddelange <14880945+ddelange@users.noreply.github.com> Date: Tue, 2 May 2023 16:25:20 +0200 Subject: [PATCH 03/44] Deprecate track_grad_norm ref https://github.com/Lightning-AI/lightning/pull/16745 --- .../src/autogluon/multimodal/configs/optimization/adamw.yaml | 1 - multimodal/src/autogluon/multimodal/matcher.py | 1 - multimodal/src/autogluon/multimodal/predictor.py | 1 - 3 files changed, 3 deletions(-) diff --git a/multimodal/src/autogluon/multimodal/configs/optimization/adamw.yaml b/multimodal/src/autogluon/multimodal/configs/optimization/adamw.yaml index d7ace45ed70..a0a180c903d 100644 --- a/multimodal/src/autogluon/multimodal/configs/optimization/adamw.yaml +++ b/multimodal/src/autogluon/multimodal/configs/optimization/adamw.yaml @@ -16,7 +16,6 @@ optimization: skip_final_val: False # Flag to skip the last validation gradient_clip_val: 1 gradient_clip_algorithm: "norm" - track_grad_norm: -1 # Whether to check gradient norm. We can set it to 2 to check for gradient norm. log_every_n_steps: 10 val_metric: null top_k: 3 diff --git a/multimodal/src/autogluon/multimodal/matcher.py b/multimodal/src/autogluon/multimodal/matcher.py index bdf22996378..f3132d9a962 100644 --- a/multimodal/src/autogluon/multimodal/matcher.py +++ b/multimodal/src/autogluon/multimodal/matcher.py @@ -910,7 +910,6 @@ def _fit( log_every_n_steps=OmegaConf.select(config, "optimization.log_every_n_steps", default=10), enable_progress_bar=enable_progress_bar, fast_dev_run=config.env.fast_dev_run, - track_grad_norm=OmegaConf.select(config, "optimization.track_grad_norm", default=-1), val_check_interval=config.optimization.val_check_interval, check_val_every_n_epoch=config.optimization.check_val_every_n_epoch if hasattr(config.optimization, "check_val_every_n_epoch") diff --git a/multimodal/src/autogluon/multimodal/predictor.py b/multimodal/src/autogluon/multimodal/predictor.py index fd03a6ec76e..6b172a1fa81 100644 --- a/multimodal/src/autogluon/multimodal/predictor.py +++ b/multimodal/src/autogluon/multimodal/predictor.py @@ -1475,7 +1475,6 @@ def _fit( log_every_n_steps=OmegaConf.select(config, "optimization.log_every_n_steps", default=10), enable_progress_bar=enable_progress_bar, fast_dev_run=config.env.fast_dev_run, - track_grad_norm=OmegaConf.select(config, "optimization.track_grad_norm", default=-1), val_check_interval=config.optimization.val_check_interval, check_val_every_n_epoch=config.optimization.check_val_every_n_epoch if hasattr(config.optimization, "check_val_every_n_epoch") From dd96a190bb5d0f5320bd51764ed91a7dbbe10e89 Mon Sep 17 00:00:00 2001 From: ddelange <14880945+ddelange@users.noreply.github.com> Date: Tue, 2 May 2023 17:24:31 +0200 Subject: [PATCH 04/44] Use 'auto' instead of None defaults --- multimodal/src/autogluon/multimodal/matcher.py | 8 ++++---- multimodal/src/autogluon/multimodal/predictor.py | 8 ++++---- 2 files changed, 8 insertions(+), 8 deletions(-) diff --git a/multimodal/src/autogluon/multimodal/matcher.py b/multimodal/src/autogluon/multimodal/matcher.py index f3132d9a962..73b78ac12f1 100644 --- a/multimodal/src/autogluon/multimodal/matcher.py +++ b/multimodal/src/autogluon/multimodal/matcher.py @@ -864,7 +864,7 @@ def _fit( if not hpo_mode: if num_gpus <= 1: - strategy = None + strategy = "auto" else: strategy = config.env.strategy else: @@ -872,7 +872,7 @@ def _fit( if use_ray_lightning: strategy = hpo_kwargs.get("_ray_lightning_plugin") else: - strategy = None + strategy = "auto" num_gpus = min(num_gpus, 1) config.env.num_gpus = num_gpus @@ -886,12 +886,12 @@ def _fit( log_filter = LogFilter(blacklist_msgs) with apply_log_filter(log_filter): trainer = pl.Trainer( - accelerator="gpu" if num_gpus > 0 else None, + accelerator="gpu" if num_gpus > 0 else "auto", devices=get_available_devices( num_gpus=num_gpus, auto_select_gpus=config.env.auto_select_gpus, use_ray_lightning=use_ray_lightning, - ), + ) or "auto", num_nodes=config.env.num_nodes, precision=precision, strategy=strategy, diff --git a/multimodal/src/autogluon/multimodal/predictor.py b/multimodal/src/autogluon/multimodal/predictor.py index 6b172a1fa81..88db84c693b 100644 --- a/multimodal/src/autogluon/multimodal/predictor.py +++ b/multimodal/src/autogluon/multimodal/predictor.py @@ -1429,7 +1429,7 @@ def _fit( reduce_bucket_size=config.env.deepspeed_allreduce_size, ) else: - strategy = None + strategy = "auto" else: strategy = config.env.strategy else: @@ -1437,7 +1437,7 @@ def _fit( if use_ray_lightning: strategy = hpo_kwargs.get("_ray_lightning_plugin") else: - strategy = None + strategy = "auto" num_gpus = min(num_gpus, 1) config.env.num_gpus = num_gpus @@ -1451,12 +1451,12 @@ def _fit( log_filter = LogFilter(blacklist_msgs) with apply_log_filter(log_filter): trainer = pl.Trainer( - accelerator="gpu" if num_gpus > 0 else None, + accelerator="gpu" if num_gpus > 0 else "auto", devices=get_available_devices( num_gpus=num_gpus, auto_select_gpus=config.env.auto_select_gpus, use_ray_lightning=use_ray_lightning, - ), + ) or "auto", num_nodes=config.env.num_nodes, precision=precision, strategy=strategy, From 9c922b9f22ce3c5be8e2171d29be8e02d66b5174 Mon Sep 17 00:00:00 2001 From: ddelange <14880945+ddelange@users.noreply.github.com> Date: Tue, 2 May 2023 17:28:08 +0200 Subject: [PATCH 05/44] Revert devices='auto' --- multimodal/src/autogluon/multimodal/matcher.py | 2 +- multimodal/src/autogluon/multimodal/predictor.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/multimodal/src/autogluon/multimodal/matcher.py b/multimodal/src/autogluon/multimodal/matcher.py index 73b78ac12f1..aa298064f75 100644 --- a/multimodal/src/autogluon/multimodal/matcher.py +++ b/multimodal/src/autogluon/multimodal/matcher.py @@ -891,7 +891,7 @@ def _fit( num_gpus=num_gpus, auto_select_gpus=config.env.auto_select_gpus, use_ray_lightning=use_ray_lightning, - ) or "auto", + ), num_nodes=config.env.num_nodes, precision=precision, strategy=strategy, diff --git a/multimodal/src/autogluon/multimodal/predictor.py b/multimodal/src/autogluon/multimodal/predictor.py index 88db84c693b..80cd9027900 100644 --- a/multimodal/src/autogluon/multimodal/predictor.py +++ b/multimodal/src/autogluon/multimodal/predictor.py @@ -1456,7 +1456,7 @@ def _fit( num_gpus=num_gpus, auto_select_gpus=config.env.auto_select_gpus, use_ray_lightning=use_ray_lightning, - ) or "auto", + ), num_nodes=config.env.num_nodes, precision=precision, strategy=strategy, From cf8d1537a578e1716f1a08a25728a9b27a50b502 Mon Sep 17 00:00:00 2001 From: Alexander Shirkov Date: Tue, 2 May 2023 18:09:23 -0700 Subject: [PATCH 06/44] fixes to inference; updated timeseries to align with the torch version --- multimodal/src/autogluon/multimodal/utils/inference.py | 2 +- tabular/setup.py | 2 +- timeseries/setup.py | 7 +++---- 3 files changed, 5 insertions(+), 6 deletions(-) diff --git a/multimodal/src/autogluon/multimodal/utils/inference.py b/multimodal/src/autogluon/multimodal/utils/inference.py index e8ae5a2b7ba..b47483e5c31 100644 --- a/multimodal/src/autogluon/multimodal/utils/inference.py +++ b/multimodal/src/autogluon/multimodal/utils/inference.py @@ -487,7 +487,7 @@ def predict( if num_gpus <= 1: # Force set strategy to be None if it's cpu-only or we have only one GPU. - strategy = None + strategy = "auto" precision = infer_precision(num_gpus=num_gpus, precision=predictor._config.env.precision, cpu_only_warning=False) diff --git a/tabular/setup.py b/tabular/setup.py index 508d5355980..17dcc42b0e6 100644 --- a/tabular/setup.py +++ b/tabular/setup.py @@ -43,7 +43,7 @@ 'xgboost>=1.6,<1.8', ], 'fastai': [ - 'torch>=1.9,<1.14', + 'torch>=1.9,<2.1', 'fastai>=2.3.1,<2.8', ], 'ray': [ diff --git a/timeseries/setup.py b/timeseries/setup.py index cc4f6c759ec..52b1d608ecf 100644 --- a/timeseries/setup.py +++ b/timeseries/setup.py @@ -29,8 +29,8 @@ "pandas", # version range defined in `core/_setup_utils.py` "statsmodels>=0.13.0,<0.14", "gluonts>=0.12.4,<0.13", - "torch>=1.9,<1.14", - "pytorch-lightning>=1.7.4,<1.10.0", + "torch>=1.9,<2.1", + "pytorch-lightning>=1.9.0,<2.1", "networkx", # version range defined in `core/_setup_utils.py` "statsforecast>=1.4.0,<1.5", "mlforecast>=0.7.0,<0.8.0", @@ -50,10 +50,9 @@ "isort>=5.10", "black>=22.3,<23.0", ], + "all": [], } -extras_require["all"] = [] - install_requires = ag.get_dependency_version_ranges(install_requires) if __name__ == "__main__": From 28ca9de6f3a9c3923c9db98e7865c28ab7bb3c79 Mon Sep 17 00:00:00 2001 From: ddelange <14880945+ddelange@users.noreply.github.com> Date: Wed, 3 May 2023 21:14:07 +0200 Subject: [PATCH 07/44] Deprecate compute_on_step https://github.com/Lightning-AI/torchmetrics/issues/789 --- multimodal/src/autogluon/multimodal/utils/map.py | 4 ---- 1 file changed, 4 deletions(-) diff --git a/multimodal/src/autogluon/multimodal/utils/map.py b/multimodal/src/autogluon/multimodal/utils/map.py index f40d739c740..507aac6eddf 100644 --- a/multimodal/src/autogluon/multimodal/utils/map.py +++ b/multimodal/src/autogluon/multimodal/utils/map.py @@ -166,8 +166,6 @@ class MeanAveragePrecision(Metric): Args: class_metrics: Option to enable per-class metrics for mAP and mAR_100. Has a performance impact. default: False - compute_on_step: - Forward only calls ``update()`` and return ``None`` if this is set to ``False``. dist_sync_on_step: Synchronize metric state across processes at each ``forward()`` before returning the value at the step @@ -190,7 +188,6 @@ class MeanAveragePrecision(Metric): def __init__( self, class_metrics: bool = False, - compute_on_step: bool = True, dist_sync_on_step: bool = False, process_group: Optional[Any] = None, dist_sync_fn: Callable = None, @@ -198,7 +195,6 @@ def __init__( iou_type: str = None, ) -> None: # type: ignore super().__init__( - compute_on_step=compute_on_step, dist_sync_on_step=dist_sync_on_step, process_group=process_group, dist_sync_fn=dist_sync_fn, From 0d4f8a49aeae2ae7084a2e7ffd92fd351d1eccda Mon Sep 17 00:00:00 2001 From: ddelange <14880945+ddelange@users.noreply.github.com> Date: Thu, 4 May 2023 10:05:48 +0200 Subject: [PATCH 08/44] Rename validation_epoch_end -> on_validation_epoch_end ref https://github.com/Lightning-AI/lightning/pull/16520 --- multimodal/src/autogluon/multimodal/optimization/lit_mmdet.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/multimodal/src/autogluon/multimodal/optimization/lit_mmdet.py b/multimodal/src/autogluon/multimodal/optimization/lit_mmdet.py index 4fc499cdd38..9ac18212dbc 100644 --- a/multimodal/src/autogluon/multimodal/optimization/lit_mmdet.py +++ b/multimodal/src/autogluon/multimodal/optimization/lit_mmdet.py @@ -230,7 +230,7 @@ def validation_step(self, batch, batch_idx, dataloader_idx=0): else: self.evaluate(batch, "val") - def validation_epoch_end(self, validation_step_outputs): + def on_validation_epoch_end(self): val_result = self.validation_metric.compute() if self.use_loss: self.log_dict({"val_direct_loss": val_result}, sync_dist=True) From 18fbb5f095fb39df1d1cdae07d16dccf4615cca8 Mon Sep 17 00:00:00 2001 From: ddelange <14880945+ddelange@users.noreply.github.com> Date: Thu, 4 May 2023 10:46:33 +0200 Subject: [PATCH 09/44] Remove property overwrite attempt Can't overwrite property ref https://github.com/Lightning-AI/lightning/blob/2.0.2/src/lightning/pytorch/trainer/trainer.py#L1384 So replaced with just an early return None ref https://github.com/Lightning-AI/lightning/blob/2.0.2/src/lightning/pytorch/core/hooks.py#L48-L49 --- multimodal/src/autogluon/multimodal/optimization/lit_module.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/multimodal/src/autogluon/multimodal/optimization/lit_module.py b/multimodal/src/autogluon/multimodal/optimization/lit_module.py index 42a909b576d..c2f7c1a1f8e 100644 --- a/multimodal/src/autogluon/multimodal/optimization/lit_module.py +++ b/multimodal/src/autogluon/multimodal/optimization/lit_module.py @@ -239,13 +239,13 @@ def training_step(self, batch, batch_idx): def on_validation_start(self) -> None: if self.skip_final_val and self.trainer.should_stop: - self.trainer.val_dataloaders = [] # skip the final validation by setting val_dataloaders empty self.log( self.validation_metric_name, self.validation_metric, on_step=False, on_epoch=True, ) + return None return super().on_validation_start() def validation_step(self, batch, batch_idx): From 6e626af6a2ce70b3255ea495b07ae12325a7e92d Mon Sep 17 00:00:00 2001 From: ddelange <14880945+ddelange@users.noreply.github.com> Date: Thu, 4 May 2023 12:12:49 +0200 Subject: [PATCH 10/44] Update k -> top_k ref https://github.com/Lightning-AI/torchmetrics/pull/1504 --- multimodal/tests/unittests/others/test_metrics.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/multimodal/tests/unittests/others/test_metrics.py b/multimodal/tests/unittests/others/test_metrics.py index 5f6e296c571..e8dd9e1ac9d 100644 --- a/multimodal/tests/unittests/others/test_metrics.py +++ b/multimodal/tests/unittests/others/test_metrics.py @@ -90,7 +90,7 @@ def ref_symmetric_hit_rate(features_a, features_b, logit_scale, top_ks=[1, 5, 10 ) target = torch.eye(num_elements, dtype=bool).reshape(-1) for k in top_ks: - hr_k = RetrievalHitRate(k=k) + hr_k = RetrievalHitRate(top_k=k) hit_rate += hr_k(preds, target, indexes=indexes) return hit_rate / (2 * len(top_ks)) From 06b41f235237f893c1f2ee1d1c11bd2bbe02b652 Mon Sep 17 00:00:00 2001 From: ddelange <14880945+ddelange@users.noreply.github.com> Date: Thu, 4 May 2023 12:14:02 +0200 Subject: [PATCH 11/44] Bump torchmetrics --- multimodal/setup.py | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/multimodal/setup.py b/multimodal/setup.py index 560605f2a5e..1d8e84039b3 100644 --- a/multimodal/setup.py +++ b/multimodal/setup.py @@ -39,9 +39,7 @@ "scikit-image>=0.19.1,<0.21.0", "pytorch-lightning>=1.9.0,<2.1", "text-unidecode>=1.3,<1.4", - # temporary for testing ref https://github.com/autogluon/autogluon/issues/2687#issuecomment-1531122312 - # "torchmetrics>=0.11.0,<0.12.0", - "torchmetrics @ https://github.com/Lightning-AI/torchmetrics/archive/95599c9a7bd5e6c8f0a8039562dbbc832b434031.zip", + "torchmetrics~=1.0.0rc0", "transformers>=4.23.0,<4.27.0", "nptyping>=1.4.4,<2.5.0", "omegaconf>=2.1.1,<2.3.0", From 6a0387e6ef8cfe838e27093831061909dd7b3985 Mon Sep 17 00:00:00 2001 From: ddelange <14880945+ddelange@users.noreply.github.com> Date: Thu, 4 May 2023 12:24:19 +0200 Subject: [PATCH 12/44] Bump pytorch-lightning commit https://github.com/autogluon/autogluon/pull/3190/commits/dd96a190bb5d0f5320bd51764ed91a7dbbe10e89 forces a minimum version bump of pytorch-lightning to 2.0.0 ref https://github.com/Lightning-AI/lightning/pull/16847 --- multimodal/setup.py | 2 +- timeseries/setup.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/multimodal/setup.py b/multimodal/setup.py index 1d8e84039b3..17d887dc64a 100644 --- a/multimodal/setup.py +++ b/multimodal/setup.py @@ -37,7 +37,7 @@ "torchvision>=0.10.0,<0.16", "fairscale>=0.4.5,<0.4.14", "scikit-image>=0.19.1,<0.21.0", - "pytorch-lightning>=1.9.0,<2.1", + "pytorch-lightning>=2.0.0,<2.1", "text-unidecode>=1.3,<1.4", "torchmetrics~=1.0.0rc0", "transformers>=4.23.0,<4.27.0", diff --git a/timeseries/setup.py b/timeseries/setup.py index 52b1d608ecf..57092cfc755 100644 --- a/timeseries/setup.py +++ b/timeseries/setup.py @@ -30,7 +30,7 @@ "statsmodels>=0.13.0,<0.14", "gluonts>=0.12.4,<0.13", "torch>=1.9,<2.1", - "pytorch-lightning>=1.9.0,<2.1", + "pytorch-lightning>=2.0.0,<2.1", "networkx", # version range defined in `core/_setup_utils.py` "statsforecast>=1.4.0,<1.5", "mlforecast>=0.7.0,<0.8.0", From 2e65ea8873609aab2fda92a837491344aa2697b4 Mon Sep 17 00:00:00 2001 From: ddelange <14880945+ddelange@users.noreply.github.com> Date: Thu, 4 May 2023 13:38:12 +0200 Subject: [PATCH 13/44] Remove map.py --- .../automm/object_detection/detection_eval.py | 3 - .../object_detection/detection_train.py | 3 - .../multimodal/optimization/utils.py | 2 +- .../autogluon/multimodal/utils/__init__.py | 1 - .../src/autogluon/multimodal/utils/map.py | 411 ------------------ 5 files changed, 1 insertion(+), 419 deletions(-) delete mode 100644 multimodal/src/autogluon/multimodal/utils/map.py diff --git a/examples/automm/object_detection/detection_eval.py b/examples/automm/object_detection/detection_eval.py index f7b355be618..2b13a0a885d 100644 --- a/examples/automm/object_detection/detection_eval.py +++ b/examples/automm/object_detection/detection_eval.py @@ -10,9 +10,6 @@ python detection_eval.py \ --test_path ./VOCdevkit/VOC2007/Annotations/test_cocoformat.json \ --checkpoint_name faster_rcnn_r50_fpn_1x_voc0712 - -Note that for now it's required to install nightly build torchmetrics. -This will be solved in next pr. (MeanAveragePrecision will be moved to AG temporarily.) """ import argparse diff --git a/examples/automm/object_detection/detection_train.py b/examples/automm/object_detection/detection_train.py index ba70a75906b..16c5f505338 100644 --- a/examples/automm/object_detection/detection_train.py +++ b/examples/automm/object_detection/detection_train.py @@ -23,9 +23,6 @@ --lr \ --wd \ --epochs - -Note that for now it's required to install nightly build torchmetrics. -This will be solved in next pr. (MeanAveragePrecision will be moved to AG temporarily.) """ import argparse diff --git a/multimodal/src/autogluon/multimodal/optimization/utils.py b/multimodal/src/autogluon/multimodal/optimization/utils.py index e1d023c2262..916dde96463 100644 --- a/multimodal/src/autogluon/multimodal/optimization/utils.py +++ b/multimodal/src/autogluon/multimodal/optimization/utils.py @@ -11,6 +11,7 @@ from pytorch_metric_learning import distances, losses, miners from torch import nn, optim from torch.nn import functional as F +from torchmetrics.detection.mean_ap import MeanAveragePrecision from transformers import Adafactor from transformers.trainer_pt_utils import get_parameter_names @@ -62,7 +63,6 @@ ROOT_MEAN_SQUARED_ERROR, SPEARMANR, ) -from ..utils.map import MeanAveragePrecision from .losses import FocalLoss, MultiNegativesSoftmaxLoss, SoftTargetCrossEntropy from .lr_scheduler import ( get_cosine_schedule_with_warmup, diff --git a/multimodal/src/autogluon/multimodal/utils/__init__.py b/multimodal/src/autogluon/multimodal/utils/__init__.py index a705a37c367..0dc0ce2ff48 100644 --- a/multimodal/src/autogluon/multimodal/utils/__init__.py +++ b/multimodal/src/autogluon/multimodal/utils/__init__.py @@ -46,7 +46,6 @@ from .inference import extract_from_output, infer_batch, predict, process_batch, use_realtime from .load import CustomUnpickler, load_text_tokenizers from .log import LogFilter, apply_log_filter, get_fit_complete_message, get_fit_start_message, make_exp_dir -from .map import MeanAveragePrecision from .matcher import compute_semantic_similarity, convert_data_for_ranking, create_siamese_model, semantic_search from .metric import compute_ranking_score, compute_score, get_minmax_mode, get_stopping_threshold, infer_metrics from .misc import logits_to_prob, merge_bio_format, shopee_dataset, tensor_to_ndarray, visualize_ner diff --git a/multimodal/src/autogluon/multimodal/utils/map.py b/multimodal/src/autogluon/multimodal/utils/map.py deleted file mode 100644 index 507aac6eddf..00000000000 --- a/multimodal/src/autogluon/multimodal/utils/map.py +++ /dev/null @@ -1,411 +0,0 @@ -import torchmetrics -from packaging import version - -# There is a bug >=0.9, <=0.11.0 -# And the slow speed problem is still not fixed in 0.11.1 -if version.parse(torchmetrics.__version__) > version.parse("0.12.0"): - from torchmetrics.detection.mean_ap import MeanAveragePrecision -else: - import logging - import sys - from dataclasses import dataclass - from typing import Any, Callable, Dict, List, Optional, Sequence, Union - - import torch - from torch import Tensor - from torchmetrics.metric import Metric - from torchmetrics.utilities.imports import ( - _PYCOCOTOOLS_AVAILABLE, - _TORCHVISION_AVAILABLE, - _TORCHVISION_GREATER_EQUAL_0_8, - ) - - if _TORCHVISION_AVAILABLE and _TORCHVISION_GREATER_EQUAL_0_8: - from torchvision.ops import box_convert - else: - box_convert = None - - if _PYCOCOTOOLS_AVAILABLE: - from pycocotools.coco import COCO - from pycocotools.cocoeval import COCOeval - else: - COCO, COCOeval = None, None - - log = logging.getLogger(__name__) - - @dataclass - class MAPMetricResults: - """Dataclass to wrap the final mAP results.""" - - map: Tensor - map_50: Tensor - map_75: Tensor - map_small: Tensor - map_medium: Tensor - map_large: Tensor - mar_1: Tensor - mar_10: Tensor - mar_100: Tensor - mar_small: Tensor - mar_medium: Tensor - mar_large: Tensor - map_per_class: Tensor - mar_100_per_class: Tensor - - def __getitem__(self, key: str) -> Union[Tensor, List[Tensor]]: - return getattr(self, key) - - # noinspection PyMethodMayBeStatic - class WriteToLog: - """Logging class to move logs to log.debug().""" - - def write(self, buf: str) -> None: # skipcq: PY-D0003, PYL-R0201 - for line in buf.rstrip().splitlines(): - log.debug(line.rstrip()) - - def flush(self) -> None: # skipcq: PY-D0003, PYL-R0201 - for handler in log.handlers: - handler.flush() - - def close(self) -> None: # skipcq: PY-D0003, PYL-R0201 - for handler in log.handlers: - handler.close() - - class _hide_prints: - """Internal helper context to suppress the default output of the pycocotools package.""" - - def __init__(self) -> None: - self._original_stdout = None - - def __enter__(self) -> None: - self._original_stdout = sys.stdout # type: ignore - sys.stdout = WriteToLog() # type: ignore - - def __exit__(self, exc_type, exc_val, exc_tb) -> None: # type: ignore - sys.stdout.close() - sys.stdout = self._original_stdout # type: ignore - - def _input_validator(preds: List[Dict[str, torch.Tensor]], targets: List[Dict[str, torch.Tensor]]) -> None: - """Ensure the correct input format of `preds` and `targets`""" - if not isinstance(preds, Sequence): - raise ValueError("Expected argument `preds` to be of type List") - if not isinstance(targets, Sequence): - raise ValueError("Expected argument `target` to be of type List") - if len(preds) != len(targets): - raise ValueError("Expected argument `preds` and `target` to have the same length") - - for k in ["boxes", "scores", "labels"]: - if any(k not in p for p in preds): - raise ValueError(f"Expected all dicts in `preds` to contain the `{k}` key") - - for k in ["boxes", "labels"]: - if any(k not in p for p in targets): - raise ValueError(f"Expected all dicts in `target` to contain the `{k}` key") - - if any(type(pred["boxes"]) is not torch.Tensor for pred in preds): - raise ValueError("Expected all boxes in `preds` to be of type torch.Tensor") - if any(type(pred["scores"]) is not torch.Tensor for pred in preds): - raise ValueError("Expected all scores in `preds` to be of type torch.Tensor") - if any(type(pred["labels"]) is not torch.Tensor for pred in preds): - raise ValueError("Expected all labels in `preds` to be of type torch.Tensor") - if any(type(target["boxes"]) is not torch.Tensor for target in targets): - raise ValueError("Expected all boxes in `target` to be of type torch.Tensor") - if any(type(target["labels"]) is not torch.Tensor for target in targets): - raise ValueError("Expected all labels in `target` to be of type torch.Tensor") - - for i, item in enumerate(targets): - if item["boxes"].size(0) != item["labels"].size(0): - raise ValueError( - f"Input boxes and labels of sample {i} in targets have a" - f" different length (expected {item['boxes'].size(0)} labels, got {item['labels'].size(0)})" - ) - for i, item in enumerate(preds): - if item["boxes"].size(0) != item["labels"].size(0) != item["scores"].size(0): - raise ValueError( - f"Input boxes, labels and scores of sample {i} in preds have a" - f" different length (expected {item['boxes'].size(0)} labels and scores," - f" got {item['labels'].size(0)} labels and {item['scores'].size(0)})" - ) - - def _fix_empty_tensors(boxes: torch.Tensor) -> torch.Tensor: - """Empty tensors can cause problems in DDP mode, this methods corrects them.""" - if boxes.numel() == 0 and boxes.ndim == 1: - return boxes.unsqueeze(0) - return boxes - - class MeanAveragePrecision(Metric): - r""" - Computes the `Mean-Average-Precision (mAP) and Mean-Average-Recall (mAR)\ - `_\ - for object detection predictions. - Optionally, the mAP and mAR values can be calculated per class. - - Predicted boxes and targets have to be in Pascal VOC format - (xmin-top left, ymin-top left, xmax-bottom right, ymax-bottom right). - See the :meth:`update` method for more information about the input format to this metric. - - For an example on how to use this metric check the `torchmetrics examples\ - `_ - - .. note:: - This metric is a wrapper for the - `pycocotools `_, - which is a standard implementation for the mAP metric for object detection. Using this metric - therefore requires you to have `pycocotools` installed. Please install with ``pip install pycocotools`` or - ``pip install torchmetrics[detection]``. - - .. note:: - This metric requires you to have `torchvision` version 0.8.0 or newer installed (with corresponding - version 1.7.0 of torch or newer). Please install with ``pip install torchvision`` or - ``pip install torchmetrics[detection]``. - - .. note:: - As the pycocotools library cannot deal with tensors directly, all results have to be transferred - to the CPU, this might have an performance impact on your training. - - Args: - class_metrics: - Option to enable per-class metrics for mAP and mAR_100. Has a performance impact. default: False - dist_sync_on_step: - Synchronize metric state across processes at each ``forward()`` - before returning the value at the step - process_group: - Specify the process group on which synchronization is called. - default: ``None`` (which selects the entire world) - dist_sync_fn: - Callback that performs the allgather operation on the metric state. When ``None``, DDP - will be used to perform the allgather - - Raises: - ImportError: - If ``pycocotools`` is not installed - ImportError: - If ``torchvision`` is not installed or version installed is lower than 0.8.0 - ValueError: - If ``class_metrics`` is not a boolean - """ - - def __init__( - self, - class_metrics: bool = False, - dist_sync_on_step: bool = False, - process_group: Optional[Any] = None, - dist_sync_fn: Callable = None, - box_format: str = None, - iou_type: str = None, - ) -> None: # type: ignore - super().__init__( - dist_sync_on_step=dist_sync_on_step, - process_group=process_group, - dist_sync_fn=dist_sync_fn, - ) - - if not _PYCOCOTOOLS_AVAILABLE: - raise ImportError( - "`MAP` metric requires that `pycocotools` installed." - " Please install with `pip install pycocotools` or `pip install torchmetrics[detection]`" - ) - if not (_TORCHVISION_AVAILABLE and _TORCHVISION_GREATER_EQUAL_0_8): - raise ImportError( - "`MAP` metric requires that `torchvision` version 0.8.0 or newer is installed." - " Please install with `pip install torchvision` or `pip install torchmetrics[detection]`" - ) - - if not isinstance(class_metrics, bool): - raise ValueError("Expected argument `class_metrics` to be a boolean") - self.class_metrics = class_metrics - - self.add_state("detection_boxes", default=[], dist_reduce_fx=None) - self.add_state("detection_scores", default=[], dist_reduce_fx=None) - self.add_state("detection_labels", default=[], dist_reduce_fx=None) - self.add_state("groundtruth_boxes", default=[], dist_reduce_fx=None) - self.add_state("groundtruth_labels", default=[], dist_reduce_fx=None) - - def update(self, preds: List[Dict[str, Tensor]], target: List[Dict[str, Tensor]]) -> None: # type: ignore - """Add detections and groundtruth to the metric. - - Args: - preds: A list consisting of dictionaries each containing the key-values\ - (each dictionary corresponds to a single image): - - ``boxes``: torch.FloatTensor of shape - [num_boxes, 4] containing `num_boxes` detection boxes of the format - [xmin, ymin, xmax, ymax] in absolute image coordinates. - - ``scores``: torch.FloatTensor of shape - [num_boxes] containing detection scores for the boxes. - - ``labels``: torch.IntTensor of shape - [num_boxes] containing 0-indexed detection classes for the boxes. - - target: A list consisting of dictionaries each containing the key-values\ - (each dictionary corresponds to a single image): - - ``boxes``: torch.FloatTensor of shape - [num_boxes, 4] containing `num_boxes` groundtruth boxes of the format - [xmin, ymin, xmax, ymax] in absolute image coordinates. - - ``labels``: torch.IntTensor of shape - [num_boxes] containing 1-indexed groundtruth classes for the boxes. - - Raises: - ValueError: - If ``preds`` is not of type List[Dict[str, torch.Tensor]] - ValueError: - If ``target`` is not of type List[Dict[str, torch.Tensor]] - ValueError: - If ``preds`` and ``target`` are not of the same length - ValueError: - If any of ``preds.boxes``, ``preds.scores`` - and ``preds.labels`` are not of the same length - ValueError: - If any of ``target.boxes`` and ``target.labels`` are not of the same length - ValueError: - If any box is not type float and of length 4 - ValueError: - If any class is not type int and of length 1 - ValueError: - If any score is not type float and of length 1 - """ - _input_validator(preds, target) - - for item in preds: - self.detection_boxes.append(_fix_empty_tensors(item["boxes"])) - self.detection_scores.append(item["scores"]) - self.detection_labels.append(item["labels"]) - - for item in target: - self.groundtruth_boxes.append(_fix_empty_tensors(item["boxes"])) - self.groundtruth_labels.append(item["labels"]) - - def compute(self) -> dict: - """Compute the `Mean-Average-Precision (mAP) and Mean-Average-Recall (mAR)` scores. All detections added in - the `update()` method are included. - - Note: - Main `map` score is calculated with @[ IoU=0.50:0.95 | area=all | maxDets=100 ] - - Returns: - dict containing - - - map: ``torch.Tensor`` - - map_50: ``torch.Tensor`` - - map_75: ``torch.Tensor`` - - map_small: ``torch.Tensor`` - - map_medium: ``torch.Tensor`` - - map_large: ``torch.Tensor`` - - mar_1: ``torch.Tensor`` - - mar_10: ``torch.Tensor`` - - mar_100: ``torch.Tensor`` - - mar_small: ``torch.Tensor`` - - mar_medium: ``torch.Tensor`` - - mar_large: ``torch.Tensor`` - - map_per_class: ``torch.Tensor`` (-1 if class metrics are disabled) - - mar_100_per_class: ``torch.Tensor`` (-1 if class metrics are disabled) - """ - coco_target, coco_preds = COCO(), COCO() - coco_target.dataset = self._get_coco_format(self.groundtruth_boxes, self.groundtruth_labels) - coco_preds.dataset = self._get_coco_format( - self.detection_boxes, self.detection_labels, self.detection_scores - ) - - with _hide_prints(): - coco_target.createIndex() - coco_preds.createIndex() - coco_eval = COCOeval(coco_target, coco_preds, "bbox") - coco_eval.evaluate() - coco_eval.accumulate() - coco_eval.summarize() - stats = coco_eval.stats - - map_per_class_values: Tensor = torch.Tensor([-1]) - mar_100_per_class_values: Tensor = torch.Tensor([-1]) - # if class mode is enabled, evaluate metrics per class - if self.class_metrics: - map_per_class_list = [] - mar_100_per_class_list = [] - for class_id in self._get_classes(): - coco_eval.params.catIds = [class_id] - with _hide_prints(): - coco_eval.evaluate() - coco_eval.accumulate() - coco_eval.summarize() - class_stats = coco_eval.stats - - map_per_class_list.append(torch.Tensor([class_stats[0]])) - mar_100_per_class_list.append(torch.Tensor([class_stats[8]])) - map_per_class_values = torch.Tensor(map_per_class_list) - mar_100_per_class_values = torch.Tensor(mar_100_per_class_list) - - metrics = MAPMetricResults( - map=torch.Tensor([stats[0]]), - map_50=torch.Tensor([stats[1]]), - map_75=torch.Tensor([stats[2]]), - map_small=torch.Tensor([stats[3]]), - map_medium=torch.Tensor([stats[4]]), - map_large=torch.Tensor([stats[5]]), - mar_1=torch.Tensor([stats[6]]), - mar_10=torch.Tensor([stats[7]]), - mar_100=torch.Tensor([stats[8]]), - mar_small=torch.Tensor([stats[9]]), - mar_medium=torch.Tensor([stats[10]]), - mar_large=torch.Tensor([stats[11]]), - map_per_class=map_per_class_values, - mar_100_per_class=mar_100_per_class_values, - ) - return metrics.__dict__ - - def _get_coco_format( - self, boxes: List[torch.Tensor], labels: List[torch.Tensor], scores: Optional[List[torch.Tensor]] = None - ) -> Dict: - """Transforms and returns all cached targets or predictions in COCO format. - - Format is defined at https://cocodataset.org/#format-data - """ - images = [] - annotations = [] - annotation_id = 1 # has to start with 1, otherwise COCOEval results are wrong - - boxes = [ - box_convert(box, in_fmt="xyxy", out_fmt="xywh") if box.ndim > 1 and box.size(1) == 4 else box - for box in boxes - ] - for image_id, (image_boxes, image_labels) in enumerate(zip(boxes, labels)): - image_boxes = image_boxes.cpu().tolist() - image_labels = image_labels.cpu().tolist() - - images.append({"id": image_id}) - for k, (image_box, image_label) in enumerate(zip(image_boxes, image_labels)): - if len(image_box) != 4: - raise ValueError( - f"Invalid input box of sample {image_id}, element {k} (expected 4 values, got {len(image_box)})" - ) - - if type(image_label) != int: - raise ValueError( - f"Invalid input class of sample {image_id}, element {k}" - f" (expected value of type integer, got type {type(image_label)})" - ) - - annotation = { - "id": annotation_id, - "image_id": image_id, - "bbox": image_box, - "category_id": image_label, - "area": image_box[2] * image_box[3], - "iscrowd": 0, - } - if scores is not None: - score = scores[image_id][k].cpu().tolist() - if type(score) != float: - raise ValueError( - f"Invalid input score of sample {image_id}, element {k}" - f" (expected value of type float, got type {type(score)})" - ) - annotation["score"] = score - annotations.append(annotation) - annotation_id += 1 - - classes = [{"id": i, "name": str(i)} for i in self._get_classes()] - return {"images": images, "annotations": annotations, "categories": classes} - - def _get_classes(self) -> list: - """Get list of unique classes depending on groundtruth_labels and detection_labels.""" - if len(self.detection_labels) > 0 or len(self.groundtruth_labels) > 0: - return torch.cat(self.detection_labels + self.groundtruth_labels).unique().cpu().tolist() - return [] From ef9fb68da31718248cbd4ef416286e397b1f4359 Mon Sep 17 00:00:00 2001 From: ddelange <14880945+ddelange@users.noreply.github.com> Date: Thu, 4 May 2023 14:17:31 +0200 Subject: [PATCH 14/44] Remove map.py leftover --- multimodal/src/autogluon/multimodal/utils/object_detection.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/multimodal/src/autogluon/multimodal/utils/object_detection.py b/multimodal/src/autogluon/multimodal/utils/object_detection.py index d2ddf738477..edfb5ad6b44 100644 --- a/multimodal/src/autogluon/multimodal/utils/object_detection.py +++ b/multimodal/src/autogluon/multimodal/utils/object_detection.py @@ -807,8 +807,7 @@ def cocoeval_torchmetrics(outputs: List): """ import torch - - from . import MeanAveragePrecision + from torchmetrics.detection.mean_ap import MeanAveragePrecision map_metric = MeanAveragePrecision(box_format="xyxy", iou_type="bbox", class_metrics=False) From cdbd78e837083b98bc1e866a929924237880e383 Mon Sep 17 00:00:00 2001 From: ddelange <14880945+ddelange@users.noreply.github.com> Date: Fri, 5 May 2023 07:31:32 +0200 Subject: [PATCH 15/44] Omit classes key from log_dict call ref https://github.com/Lightning-AI/torchmetrics/commit/ac64e630148671b2ce03855713562cbd1f32fd3e --- multimodal/src/autogluon/multimodal/optimization/lit_mmdet.py | 1 + 1 file changed, 1 insertion(+) diff --git a/multimodal/src/autogluon/multimodal/optimization/lit_mmdet.py b/multimodal/src/autogluon/multimodal/optimization/lit_mmdet.py index 9ac18212dbc..62ef4aff54d 100644 --- a/multimodal/src/autogluon/multimodal/optimization/lit_mmdet.py +++ b/multimodal/src/autogluon/multimodal/optimization/lit_mmdet.py @@ -236,6 +236,7 @@ def on_validation_epoch_end(self): self.log_dict({"val_direct_loss": val_result}, sync_dist=True) else: # TODO: add mAP/mAR_per_class + val_result.pop("classes", None) # introduced in torchmetrics v1.0.0 mAPs = {"val_" + k: v for k, v in val_result.items()} mAPs["val_mAP"] = mAPs["val_map"] self.log_dict(mAPs, sync_dist=True) From 4c94b714c5b78b13c7f44624c80c98b524cde003 Mon Sep 17 00:00:00 2001 From: ddelange <14880945+ddelange@users.noreply.github.com> Date: Fri, 5 May 2023 08:38:47 +0200 Subject: [PATCH 16/44] Sync torch and torchvision versions with pytorch-lightning ref https://github.com/Lightning-AI/lightning/blob/2.0.2/requirements/pytorch/base.txt#L5 ref https://github.com/autogluon/autogluon/pull/3190#discussion_r1185718069 --- multimodal/setup.py | 4 ++-- tabular/setup.py | 2 +- timeseries/setup.py | 2 +- 3 files changed, 4 insertions(+), 4 deletions(-) diff --git a/multimodal/setup.py b/multimodal/setup.py index 17d887dc64a..596cfcbc81c 100644 --- a/multimodal/setup.py +++ b/multimodal/setup.py @@ -33,8 +33,8 @@ "evaluate>=0.2.2,<0.4.0", "accelerate>=0.9,<0.17", "timm>=0.6.12,<0.7.0", - "torch>=1.9,<2.1", - "torchvision>=0.10.0,<0.16", + "torch>=1.11,<2.1", + "torchvision>=0.12.0,<0.16", "fairscale>=0.4.5,<0.4.14", "scikit-image>=0.19.1,<0.21.0", "pytorch-lightning>=2.0.0,<2.1", diff --git a/tabular/setup.py b/tabular/setup.py index 17dcc42b0e6..9f5e55edf5f 100644 --- a/tabular/setup.py +++ b/tabular/setup.py @@ -43,7 +43,7 @@ 'xgboost>=1.6,<1.8', ], 'fastai': [ - 'torch>=1.9,<2.1', + 'torch>=1.11,<2.1', 'fastai>=2.3.1,<2.8', ], 'ray': [ diff --git a/timeseries/setup.py b/timeseries/setup.py index 57092cfc755..0c0c4c81014 100644 --- a/timeseries/setup.py +++ b/timeseries/setup.py @@ -29,7 +29,7 @@ "pandas", # version range defined in `core/_setup_utils.py` "statsmodels>=0.13.0,<0.14", "gluonts>=0.12.4,<0.13", - "torch>=1.9,<2.1", + "torch>=1.11,<2.1", "pytorch-lightning>=2.0.0,<2.1", "networkx", # version range defined in `core/_setup_utils.py` "statsforecast>=1.4.0,<1.5", From 88d06643d520f7daec300b002429cad4485a860b Mon Sep 17 00:00:00 2001 From: ddelange <14880945+ddelange@users.noreply.github.com> Date: Sat, 6 May 2023 09:43:24 +0200 Subject: [PATCH 17/44] Remove track_grad_norm references https://github.com/autogluon/autogluon/pull/3190#discussion_r1186631732 --- .../advanced_topics/customization.ipynb | 25 +------------------ 1 file changed, 1 insertion(+), 24 deletions(-) diff --git a/docs/tutorials/multimodal/advanced_topics/customization.ipynb b/docs/tutorials/multimodal/advanced_topics/customization.ipynb index c0bd10d9886..073bffd82cb 100644 --- a/docs/tutorials/multimodal/advanced_topics/customization.ipynb +++ b/docs/tutorials/multimodal/advanced_topics/customization.ipynb @@ -360,29 +360,6 @@ "```\n" ] }, - { - "cell_type": "markdown", - "id": "02d07866", - "metadata": {}, - "source": [ - "### optimization.track_grad_norm\n", - "\n", - "Track the p-norm of gradients during training. May be set to ‘inf’ infinity-norm. If using Automatic Mixed Precision (AMP), the gradients will be unscaled before logging them." - ] - }, - { - "cell_type": "markdown", - "id": "1b60c371", - "metadata": {}, - "source": [ - "```\n", - "# default used by AutoMM (no tracking)\n", - "predictor.fit(hyperparameters={\"optimization.track_grad_norm\": -1})\n", - "# track the 2-norm\n", - "predictor.fit(hyperparameters={\"optimization.track_grad_norm\": 2})\n", - "```\n" - ] - }, { "cell_type": "markdown", "id": "abe87d32", @@ -1574,4 +1551,4 @@ }, "nbformat": 4, "nbformat_minor": 5 -} \ No newline at end of file +} From 43e4e1674fac889b316015b9f83442ce27931e9b Mon Sep 17 00:00:00 2001 From: ddelange <14880945+ddelange@users.noreply.github.com> Date: Fri, 26 May 2023 07:21:19 +0200 Subject: [PATCH 18/44] Fix catboost installation error for Github macos runners - https://github.com/autogluon/autogluon/pull/3190#issuecomment-1540599280 - https://github.com/catboost/catboost/issues/2371 - https://github.com/actions/setup-python/issues/654#issuecomment-1533455583 --- tabular/setup.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/tabular/setup.py b/tabular/setup.py index 9f5e55edf5f..5c33f252ada 100644 --- a/tabular/setup.py +++ b/tabular/setup.py @@ -34,6 +34,8 @@ 'lightgbm>=3.3,<3.4', ], 'catboost': [ + # https://github.com/autogluon/autogluon/pull/3190#issuecomment-1540599280 + 'catboost>=1.1,<1.2 ; python_version <= "3.8" and sys_platform == "darwin"', 'catboost>=1.1,<1.3', ], # FIXME: Debug why xgboost 1.6 has 4x+ slower inference on multiclass datasets compared to 1.4 From 6f1935dbb33ea44bda69bbd236d78c1a62b555d7 Mon Sep 17 00:00:00 2001 From: ddelange <14880945+ddelange@users.noreply.github.com> Date: Fri, 16 Jun 2023 10:25:01 +0200 Subject: [PATCH 19/44] Remove catboost hotfix 3.8.17 github runner was released ref https://github.com/actions/python-versions/blob/225ba42747d0f5e3dbd90ba15b9c7409a4b8c735/versions-manifest.json#L3370-L3396 this is built on macos 11, so it is properly detected --- tabular/setup.py | 3 --- 1 file changed, 3 deletions(-) diff --git a/tabular/setup.py b/tabular/setup.py index 42ed04df9d5..6dab8797a25 100644 --- a/tabular/setup.py +++ b/tabular/setup.py @@ -34,9 +34,6 @@ 'lightgbm>=3.3,<3.4', ], 'catboost': [ - # catboost>=1.2 no longer has macosx 10.x wheels available, which are needed for Github CI on older Python versions - # https://github.com/autogluon/autogluon/pull/3190#issuecomment-1540599280 - 'catboost>=1.1,<1.2 ; python_version <= "3.8" and sys_platform == "darwin"', 'catboost>=1.1,<1.3', ], # FIXME: Debug why xgboost 1.6 has 4x+ slower inference on multiclass datasets compared to 1.4 From ac8f1288dfd981f9668e57e00c10bc49bd949a6f Mon Sep 17 00:00:00 2001 From: ddelange <14880945+ddelange@users.noreply.github.com> Date: Mon, 19 Jun 2023 10:27:15 +0200 Subject: [PATCH 20/44] Bump onnx to 0.15.x ERROR: Could not find a version that satisfies the requirement onnxruntime-gpu<1.14.0,>=1.13.0; platform_system != "Darwin" and extra == "tests" (from autogluon-multimodal[tests]) (from versions: 1.15.0, 1.15.1) ERROR: No matching distribution found for onnxruntime-gpu<1.14.0,>=1.13.0; platform_system != "Darwin" and extra == "tests" --- multimodal/setup.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/multimodal/setup.py b/multimodal/setup.py index a417d6a4a88..e6944d1ca40 100644 --- a/multimodal/setup.py +++ b/multimodal/setup.py @@ -65,9 +65,9 @@ "black>=22.3,<23.0", "isort>=5.10", "datasets>=2.3.2,<=2.3.2", - "onnx>=1.13.0,<1.14.0", - "onnxruntime>=1.13.0,<1.14.0;platform_system=='Darwin'", - "onnxruntime-gpu>=1.13.0,<1.14.0;platform_system!='Darwin'", + "onnx>=1.15.0,<1.16.0", + "onnxruntime>=1.15.0,<1.16.0;platform_system=='Darwin'", + "onnxruntime-gpu>=1.15.0,<1.16.0;platform_system!='Darwin'", "tensorrt>=8.5.3.1,<8.5.4;platform_system=='Linux'", ] } From 8e7d838e2eea3682d3e21b7e1e215f51f294feba Mon Sep 17 00:00:00 2001 From: ddelange <14880945+ddelange@users.noreply.github.com> Date: Mon, 19 Jun 2023 10:31:08 +0200 Subject: [PATCH 21/44] Revert onnx version bump Only onnxruntime 0.15 was released, onnx is still on 0.14 --- multimodal/setup.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/multimodal/setup.py b/multimodal/setup.py index e6944d1ca40..f011ef02ef2 100644 --- a/multimodal/setup.py +++ b/multimodal/setup.py @@ -65,7 +65,7 @@ "black>=22.3,<23.0", "isort>=5.10", "datasets>=2.3.2,<=2.3.2", - "onnx>=1.15.0,<1.16.0", + "onnx>=1.14.0,<1.15.0", "onnxruntime>=1.15.0,<1.16.0;platform_system=='Darwin'", "onnxruntime-gpu>=1.15.0,<1.16.0;platform_system!='Darwin'", "tensorrt>=8.5.3.1,<8.5.4;platform_system=='Linux'", From 8024a1b29887963612a09e42ac2d6d08d83cd498 Mon Sep 17 00:00:00 2001 From: ddelange <14880945+ddelange@users.noreply.github.com> Date: Mon, 19 Jun 2023 10:34:10 +0200 Subject: [PATCH 22/44] Bump tensorrt for cp311 compatibility --- multimodal/setup.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/multimodal/setup.py b/multimodal/setup.py index f011ef02ef2..bbe31f9ad16 100644 --- a/multimodal/setup.py +++ b/multimodal/setup.py @@ -68,7 +68,7 @@ "onnx>=1.14.0,<1.15.0", "onnxruntime>=1.15.0,<1.16.0;platform_system=='Darwin'", "onnxruntime-gpu>=1.15.0,<1.16.0;platform_system!='Darwin'", - "tensorrt>=8.5.3.1,<8.5.4;platform_system=='Linux'", + "tensorrt>=8.6.1,<8.7.0;platform_system=='Linux'", ] } From cb98de7834ceae9ad0361e979a521406f98d61df Mon Sep 17 00:00:00 2001 From: ddelange <14880945+ddelange@users.noreply.github.com> Date: Mon, 19 Jun 2023 11:18:15 +0200 Subject: [PATCH 23/44] Update datasets and evaluate ref https://github.com/huggingface/datasets/commit/bde7504fbafa9a0cc9ae847ed55aafd4c0dbc9de --- multimodal/setup.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/multimodal/setup.py b/multimodal/setup.py index bbe31f9ad16..07810317681 100644 --- a/multimodal/setup.py +++ b/multimodal/setup.py @@ -30,7 +30,7 @@ "requests>=2.21,<3", "jsonschema>=4.14,<4.18", "seqeval>=1.2.2,<1.3.0", - "evaluate>=0.2.2,<0.4.0", + "evaluate>=0.4.0,<0.5.0", "accelerate>=0.9,<0.17", "timm>=0.9.2,<0.10.0", "torch>=1.11,<2.1", @@ -64,7 +64,7 @@ "tests": [ "black>=22.3,<23.0", "isort>=5.10", - "datasets>=2.3.2,<=2.3.2", + "datasets>=2.13.0,<=2.14.0", "onnx>=1.14.0,<1.15.0", "onnxruntime>=1.15.0,<1.16.0;platform_system=='Darwin'", "onnxruntime-gpu>=1.15.0,<1.16.0;platform_system!='Darwin'", From 75c13d023a3259065dfd71294d19e9f16253f84c Mon Sep 17 00:00:00 2001 From: ddelange <14880945+ddelange@users.noreply.github.com> Date: Mon, 19 Jun 2023 11:21:00 +0200 Subject: [PATCH 24/44] Fix typo on version range --- multimodal/setup.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/multimodal/setup.py b/multimodal/setup.py index 07810317681..006f0818150 100644 --- a/multimodal/setup.py +++ b/multimodal/setup.py @@ -64,7 +64,7 @@ "tests": [ "black>=22.3,<23.0", "isort>=5.10", - "datasets>=2.13.0,<=2.14.0", + "datasets>=2.13.0,<2.14.0", "onnx>=1.14.0,<1.15.0", "onnxruntime>=1.15.0,<1.16.0;platform_system=='Darwin'", "onnxruntime-gpu>=1.15.0,<1.16.0;platform_system!='Darwin'", From a4fabd7169bf4d858eed3e4655ed3b75868b844b Mon Sep 17 00:00:00 2001 From: ddelange <14880945+ddelange@users.noreply.github.com> Date: Mon, 3 Jul 2023 09:25:36 +0200 Subject: [PATCH 25/44] Bump ray version pytorch_lightning 2.0 support added in 2.5 https://github.com/ray-project/ray/pull/34967 ray_lightning deprecated https://github.com/ray-project/ray/pull/36400 --- .../src/autogluon/common/utils/try_import.py | 57 +------------------ core/setup.py | 5 +- 2 files changed, 5 insertions(+), 57 deletions(-) diff --git a/common/src/autogluon/common/utils/try_import.py b/common/src/autogluon/common/utils/try_import.py index e0b619bb611..970c00bcaa3 100644 --- a/common/src/autogluon/common/utils/try_import.py +++ b/common/src/autogluon/common/utils/try_import.py @@ -30,13 +30,13 @@ def try_import_mxboard(): def try_import_ray() -> ModuleType: - RAY_MAX_VERSION = "2.4.0" + RAY_MAX_VERSION = "2.7.0" # sync with core/setup.py ray_max_version_os_map = dict( Darwin=RAY_MAX_VERSION, Windows=RAY_MAX_VERSION, Linux=RAY_MAX_VERSION, ) - ray_min_version = "2.2.0" + ray_min_version = "2.5.1" current_os = platform.system() ray_max_version = ray_max_version_os_map.get(current_os, RAY_MAX_VERSION) try: @@ -60,57 +60,6 @@ def try_import_ray() -> ModuleType: ) -def try_import_ray_lightning(): - """This function tries to import ray lightning and check if the compatible pytorch lightning version is installed""" - supported_ray_lightning_min_version = "0.2.0" - supported_ray_lightning_max_version = "0.3.0" - ray_lightning_torch_lightning_compatibility_map = { - "0.2.x": "1.5.x", - } - ray_lightining_torch_lightning_compatibility_range_map = { - ("0.2.0", "0.3.0"): ("1.5.0", "1.6.0"), - } - try: - import pkg_resources - import pytorch_lightning - import ray_lightning - from packaging import version - - ray_lightning_version = pkg_resources.get_distribution("ray_lightning").version # ray_lightning doesn't have __version__... - - if not ( - version.parse(supported_ray_lightning_min_version) <= version.parse(ray_lightning_version) < version.parse(supported_ray_lightning_max_version) - ): - logger.log( - f"ray_lightning=={ray_lightning_version} detected. " - f"{supported_ray_lightning_min_version} <= ray_lighting < {supported_ray_lightning_max_version} is required." - "You can use pip to install certain version of ray_lightning." - f"Supported ray_lightning versions and the compatible torch lightning versions are {ray_lightning_torch_lightning_compatibility_map}." - ) - return False - - for ray_lightning_versions, torch_lightning_versions in ray_lightining_torch_lightning_compatibility_range_map.items(): - ray_lightning_min_version, ray_lightning_max_version = ray_lightning_versions - torch_lightning_min_version, torch_lightning_max_version = torch_lightning_versions - if version.parse(ray_lightning_min_version) <= version.parse(ray_lightning_version) < version.parse(ray_lightning_max_version): - if not ( - version.parse(torch_lightning_min_version) <= version.parse(pytorch_lightning.__version__) < version.parse(torch_lightning_max_version) - ): - logger.log( - f"Found ray_lightning {ray_lightning_version} that's not compatible with pytorch_lightning." - f"The compatible version of pytorch_lightning is >= {torch_lightning_min_version} and < {torch_lightning_max_version}." - ) - return False - return True - - except ImportError: - logger.info( - "You can enable each individual trial using multiple gpus by installing ray_lightning." - f"Supported ray_lightning versions and the compatible torch lightning versions are {ray_lightning_torch_lightning_compatibility_map}." - ) - return False - - def try_import_catboost(): try: import catboost @@ -175,7 +124,7 @@ def try_import_torch(): import torch except ImportError as e: raise ImportError( - "Unable to import dependency torch\n" "A quick tip is to install via `pip install torch`.\n" "The minimum torch version is currently 1.6." + "Unable to import dependency torch\n" "A quick tip is to install via `pip install torch`.\n" "The minimum torch version is currently 1.11." ) diff --git a/core/setup.py b/core/setup.py index a6535b43ada..779743315e8 100644 --- a/core/setup.py +++ b/core/setup.py @@ -47,7 +47,7 @@ extras_require = { "ray": [ - "ray[default]>=2.3,<2.4", + "ray[default]>=2.5.1,<2.7", # https://github.com/grpc/grpc/issues/31885 # version range set to align with ray's updated version range: # https://github.com/ray-project/ray/blob/master/python/setup.py#L259-L261 @@ -56,7 +56,7 @@ "pydantic>=1.10.4,<2.0", # https://github.com/ray-project/ray/issues/36990 ], "raytune": [ - "ray[tune]>=2.3,<2.4", + "ray[tune]>=2.5.1,<2.7", # TODO: consider alternatives as hyperopt is not actively maintained. "hyperopt>=0.2.7,<0.2.8", # This is needed for the bayes search to work. # 'GPy>=1.10.0,<1.11.0' # TODO: Enable this once PBT/PB2 are supported by ray lightning @@ -68,7 +68,6 @@ "types-requests", "types-setuptools", "pytest-mypy", - # TODO(Re-enable ray_lightning once it released 0.3.0) 'ray_lightning>=0.2.0,<0.3.0' ] all_requires = [] From 060a6cdf451fee0c184c62a651f44572708b31bd Mon Sep 17 00:00:00 2001 From: ddelange <14880945+ddelange@users.noreply.github.com> Date: Sun, 23 Jul 2023 17:16:42 +0200 Subject: [PATCH 26/44] Undo deletion from a merge commit --- .github/workflow_scripts/env_setup.sh | 1 + 1 file changed, 1 insertion(+) diff --git a/.github/workflow_scripts/env_setup.sh b/.github/workflow_scripts/env_setup.sh index 2342574df0e..72817f423e4 100644 --- a/.github/workflow_scripts/env_setup.sh +++ b/.github/workflow_scripts/env_setup.sh @@ -21,6 +21,7 @@ function setup_torch_gpu { } function setup_torch_cpu { + PIP_EXTRA_INDEX_URL=https://download.pytorch.org/whl/cpu reinstall_torch } function setup_torch_gpu_non_linux { From ca8d8ef22c1ea5d53e38812c4486e82425c9b907 Mon Sep 17 00:00:00 2001 From: ddelange <14880945+ddelange@users.noreply.github.com> Date: Sun, 23 Jul 2023 18:00:59 +0200 Subject: [PATCH 27/44] Unify torchmetrics version notation --- multimodal/setup.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/multimodal/setup.py b/multimodal/setup.py index 8f3fbe1a07f..ad82fd91d00 100644 --- a/multimodal/setup.py +++ b/multimodal/setup.py @@ -38,7 +38,7 @@ "scikit-image>=0.19.1,<0.21.0", "pytorch-lightning>=2.0.0,<2.1", "text-unidecode>=1.3,<1.4", - "torchmetrics~=1.0.0rc0", + "torchmetrics>=1.0.0,<1.1.0", "transformers[sentencepiece]>=4.23.0,<4.27.0", "nptyping>=1.4.4,<2.5.0", "omegaconf>=2.1.1,<2.3.0", From f0cbf1ca385c4ce85569c934d4b2eb98ae63c1ac Mon Sep 17 00:00:00 2001 From: ddelange <14880945+ddelange@users.noreply.github.com> Date: Tue, 8 Aug 2023 13:20:28 +0200 Subject: [PATCH 28/44] Revert merge remnant --- timeseries/setup.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/timeseries/setup.py b/timeseries/setup.py index 439a02606c2..ea9d933b1e8 100644 --- a/timeseries/setup.py +++ b/timeseries/setup.py @@ -51,9 +51,10 @@ "isort>=5.10", "black>=22.3,<23.0", ], - "all": [], } +extras_require["all"] = [] + install_requires = ag.get_dependency_version_ranges(install_requires) if __name__ == "__main__": From 983415b1001a8bb7805e1e8414d7e00f2fa279e5 Mon Sep 17 00:00:00 2001 From: ddelange <14880945+ddelange@users.noreply.github.com> Date: Tue, 8 Aug 2023 13:26:08 +0200 Subject: [PATCH 29/44] Revert lower bound bumps datasets needs a lower bound bump due to https://github.com/huggingface/datasets/commit/bde7504fbafa9a0cc9ae847ed55aafd4c0dbc9de --- core/setup.py | 4 ++-- multimodal/setup.py | 12 ++++++------ 2 files changed, 8 insertions(+), 8 deletions(-) diff --git a/core/setup.py b/core/setup.py index 779743315e8..30655fa576e 100644 --- a/core/setup.py +++ b/core/setup.py @@ -47,7 +47,7 @@ extras_require = { "ray": [ - "ray[default]>=2.5.1,<2.7", + "ray[default]>=2.3,<2.7", # https://github.com/grpc/grpc/issues/31885 # version range set to align with ray's updated version range: # https://github.com/ray-project/ray/blob/master/python/setup.py#L259-L261 @@ -56,7 +56,7 @@ "pydantic>=1.10.4,<2.0", # https://github.com/ray-project/ray/issues/36990 ], "raytune": [ - "ray[tune]>=2.5.1,<2.7", + "ray[tune]>=2.3,<2.7", # TODO: consider alternatives as hyperopt is not actively maintained. "hyperopt>=0.2.7,<0.2.8", # This is needed for the bayes search to work. # 'GPy>=1.10.0,<1.11.0' # TODO: Enable this once PBT/PB2 are supported by ray lightning diff --git a/multimodal/setup.py b/multimodal/setup.py index 8007923f51d..0cd407a5e29 100644 --- a/multimodal/setup.py +++ b/multimodal/setup.py @@ -32,7 +32,7 @@ "requests>=2.21,<3", "jsonschema>=4.14,<4.18", "seqeval>=1.2.2,<1.3.0", - "evaluate>=0.4.0,<0.5.0", + "evaluate>=0.2.2,<0.5.0", "accelerate>=0.9,<0.17", "timm>=0.9.2,<0.10.0", "torchvision>=0.14.0,<0.16.0", # torch 1.13 requires torchvision 0.14. Increase it to 0.15 when dropping the support of torch 1.13. @@ -60,11 +60,11 @@ tests_require = [ "black>=22.3,<23.0", "isort>=5.10", - "datasets>=2.13.0,<2.14.0", - "onnx>=1.14.0,<1.15.0", - "onnxruntime>=1.15.0,<1.16.0;platform_system=='Darwin'", - "onnxruntime-gpu>=1.15.0,<1.16.0;platform_system!='Darwin'", - "tensorrt>=8.6.1,<8.7.0;platform_system=='Linux'", + "datasets>=2.7.0,<2.14.0", + "onnx>=1.13.0,<1.15.0", + "onnxruntime>=1.13.0,<1.16.0;platform_system=='Darwin'", + "onnxruntime-gpu>=1.13.0,<1.16.0;platform_system!='Darwin'", + "tensorrt>=8.5.3.1,<8.7.0;platform_system=='Linux'", ] extras_require = { From 6f1d2fd987b56670719a30b9c9be2e56893745bf Mon Sep 17 00:00:00 2001 From: ddelange <14880945+ddelange@users.noreply.github.com> Date: Thu, 24 Aug 2023 22:21:01 +0200 Subject: [PATCH 30/44] Update torch version in error message --- common/src/autogluon/common/utils/try_import.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/common/src/autogluon/common/utils/try_import.py b/common/src/autogluon/common/utils/try_import.py index 17875767b2a..dea3d3628b2 100644 --- a/common/src/autogluon/common/utils/try_import.py +++ b/common/src/autogluon/common/utils/try_import.py @@ -124,7 +124,7 @@ def try_import_torch(): import torch except ImportError as e: raise ImportError( - "Unable to import dependency torch\n" "A quick tip is to install via `pip install torch`.\n" "The minimum torch version is currently 1.11." + "Unable to import dependency torch\n" "A quick tip is to install via `pip install torch`.\n" "The minimum torch version is currently 1.13." # sync with core/_setup_utils.py ) From ed3f969825143e047a91a1cc26f7bccde6883977 Mon Sep 17 00:00:00 2001 From: ddelange <14880945+ddelange@users.noreply.github.com> Date: Thu, 24 Aug 2023 22:24:04 +0200 Subject: [PATCH 31/44] Add cp311 specifier --- core/src/autogluon/core/_setup_utils.py | 1 + 1 file changed, 1 insertion(+) diff --git a/core/src/autogluon/core/_setup_utils.py b/core/src/autogluon/core/_setup_utils.py index 9fec6e2b9d4..40f3818d053 100644 --- a/core/src/autogluon/core/_setup_utils.py +++ b/core/src/autogluon/core/_setup_utils.py @@ -137,6 +137,7 @@ def default_setup_args(*, version, submodule): "Programming Language :: Python :: 3.8", "Programming Language :: Python :: 3.9", "Programming Language :: Python :: 3.10", + "Programming Language :: Python :: 3.11", "Topic :: Software Development", "Topic :: Scientific/Engineering :: Artificial Intelligence", "Topic :: Scientific/Engineering :: Information Analysis", From 07246b86428e8bed679b69a394dfe22c8ce5dee7 Mon Sep 17 00:00:00 2001 From: ddelange <14880945+ddelange@users.noreply.github.com> Date: Thu, 24 Aug 2023 22:46:28 +0200 Subject: [PATCH 32/44] Lint --- common/src/autogluon/common/utils/try_import.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/common/src/autogluon/common/utils/try_import.py b/common/src/autogluon/common/utils/try_import.py index dea3d3628b2..692807927ff 100644 --- a/common/src/autogluon/common/utils/try_import.py +++ b/common/src/autogluon/common/utils/try_import.py @@ -124,7 +124,9 @@ def try_import_torch(): import torch except ImportError as e: raise ImportError( - "Unable to import dependency torch\n" "A quick tip is to install via `pip install torch`.\n" "The minimum torch version is currently 1.13." # sync with core/_setup_utils.py + "Unable to import dependency torch\n" + "A quick tip is to install via `pip install torch`.\n" + "The minimum torch version is currently 1.13." # sync with core/_setup_utils.py ) From 11652e9ee08eaf7df5f887986243889bc99487da Mon Sep 17 00:00:00 2001 From: ddelange Date: Mon, 11 Sep 2023 23:05:15 +0200 Subject: [PATCH 33/44] Allow torchmetrics 1.1.* --- multimodal/setup.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/multimodal/setup.py b/multimodal/setup.py index 617ca7df3b9..b75f1a35799 100644 --- a/multimodal/setup.py +++ b/multimodal/setup.py @@ -39,7 +39,7 @@ "torchvision>=0.14.0,<0.16.0", # torch 1.13 requires torchvision 0.14. Increase it to 0.15 when dropping the support of torch 1.13. "scikit-image>=0.19.1,<0.21.0", "text-unidecode>=1.3,<1.4", - "torchmetrics>=1.0.0,<1.1.0", + "torchmetrics>=1.0.0,<1.2.0", "nptyping>=1.4.4,<2.5.0", "omegaconf>=2.1.1,<2.3.0", f"autogluon.core[raytune]=={version}", From fba061c43cd906160622e7d23eba2a7ec6d9ff2e Mon Sep 17 00:00:00 2001 From: ddelange <14880945+ddelange@users.noreply.github.com> Date: Mon, 2 Oct 2023 19:17:11 +0200 Subject: [PATCH 34/44] Bump ray to 2.7.0 --- common/src/autogluon/common/utils/try_import.py | 6 +++--- core/setup.py | 6 +++--- core/src/autogluon/core/_setup_utils.py | 2 +- 3 files changed, 7 insertions(+), 7 deletions(-) diff --git a/common/src/autogluon/common/utils/try_import.py b/common/src/autogluon/common/utils/try_import.py index d083ba75faf..94a6404db57 100644 --- a/common/src/autogluon/common/utils/try_import.py +++ b/common/src/autogluon/common/utils/try_import.py @@ -30,13 +30,13 @@ def try_import_mxboard(): def try_import_ray() -> ModuleType: - RAY_MAX_VERSION = "2.7.0" # sync with core/setup.py + RAY_MAX_VERSION = "2.8.0" # sync with core/setup.py ray_max_version_os_map = dict( Darwin=RAY_MAX_VERSION, Windows=RAY_MAX_VERSION, Linux=RAY_MAX_VERSION, ) - ray_min_version = "2.6.3" + ray_min_version = "2.7.0" current_os = platform.system() ray_max_version = ray_max_version_os_map.get(current_os, RAY_MAX_VERSION) try: @@ -124,7 +124,7 @@ def try_import_torch(): raise ImportError( "Unable to import dependency torch\n" "A quick tip is to install via `pip install torch`.\n" - "The minimum torch version is currently 1.13." # sync with core/_setup_utils.py + "The minimum torch version is currently 2.0." # sync with core/_setup_utils.py ) diff --git a/core/setup.py b/core/setup.py index 994cf8149eb..750ac5f8116 100644 --- a/core/setup.py +++ b/core/setup.py @@ -47,11 +47,11 @@ extras_require = { "ray": [ - "ray[default]>=2.6.3,<2.7", - "pydantic>=1.10.4,<2.0", # https://github.com/ray-project/ray/issues/36990 + # sync version with common/src/autogluon/common/utils/try_import.py + "ray[default]>=2.7.0,<2.8", ], "raytune": [ - "ray[tune]>=2.6.3,<2.7", + "ray[default,tune]>=2.7.0,<2.8", # TODO: consider alternatives as hyperopt is not actively maintained. "hyperopt>=0.2.7,<0.2.8", # This is needed for the bayes search to work. # 'GPy>=1.10.0,<1.11.0' # TODO: Enable this once PBT/PB2 are supported by ray lightning diff --git a/core/src/autogluon/core/_setup_utils.py b/core/src/autogluon/core/_setup_utils.py index 17721d1af95..4f010a51a25 100644 --- a/core/src/autogluon/core/_setup_utils.py +++ b/core/src/autogluon/core/_setup_utils.py @@ -26,7 +26,7 @@ "networkx": ">=3.0,<4", # Major version cap "tqdm": ">=4.38,<5", # Major version cap "Pillow": ">=9.3,<9.6", # "<{N+2}" upper cap - "torch": ">=2.0,<2.1", # "<{N+1}" upper cap + "torch": ">=2.0,<2.1", # "<{N+1}" upper cap, sync with common/src/autogluon/common/utils/try_import.py "lightning": ">=2.0.0,<2.1", # "<{N+1}" upper cap } if LITE_MODE: From 6deb7828f552cc7c792805a8fac853de6aa0eb5d Mon Sep 17 00:00:00 2001 From: Weisu Yin Date: Tue, 3 Oct 2023 17:19:20 +0000 Subject: [PATCH 35/44] test --- common/src/autogluon/common/utils/try_import.py | 4 ++-- core/setup.py | 5 ++--- multimodal/setup.py | 2 +- 3 files changed, 5 insertions(+), 6 deletions(-) diff --git a/common/src/autogluon/common/utils/try_import.py b/common/src/autogluon/common/utils/try_import.py index 94a6404db57..e269cabde0e 100644 --- a/common/src/autogluon/common/utils/try_import.py +++ b/common/src/autogluon/common/utils/try_import.py @@ -30,13 +30,13 @@ def try_import_mxboard(): def try_import_ray() -> ModuleType: - RAY_MAX_VERSION = "2.8.0" # sync with core/setup.py + RAY_MAX_VERSION = "2.7.0" # sync with core/setup.py ray_max_version_os_map = dict( Darwin=RAY_MAX_VERSION, Windows=RAY_MAX_VERSION, Linux=RAY_MAX_VERSION, ) - ray_min_version = "2.7.0" + ray_min_version = "2.6.3" current_os = platform.system() ray_max_version = ray_max_version_os_map.get(current_os, RAY_MAX_VERSION) try: diff --git a/core/setup.py b/core/setup.py index 750ac5f8116..389eda08e9c 100644 --- a/core/setup.py +++ b/core/setup.py @@ -47,11 +47,10 @@ extras_require = { "ray": [ - # sync version with common/src/autogluon/common/utils/try_import.py - "ray[default]>=2.7.0,<2.8", + "ray[default]>=2.6.3,<2.7", ], "raytune": [ - "ray[default,tune]>=2.7.0,<2.8", + "ray[default,tune]>=2.6.3,<2.7", # TODO: consider alternatives as hyperopt is not actively maintained. "hyperopt>=0.2.7,<0.2.8", # This is needed for the bayes search to work. # 'GPy>=1.10.0,<1.11.0' # TODO: Enable this once PBT/PB2 are supported by ray lightning diff --git a/multimodal/setup.py b/multimodal/setup.py index 28ecab8dbee..178162fe436 100644 --- a/multimodal/setup.py +++ b/multimodal/setup.py @@ -61,7 +61,7 @@ "black>=22.3,<23.0", "isort>=5.10", "datasets>=2.10.0,<2.15.0", - "onnx>=1.13.0,<1.15.0", + "onnx>=1.13.0,<1.14.0", "onnxruntime>=1.15.0,<1.16.0;platform_system=='Darwin'", "onnxruntime-gpu>=1.15.0,<1.16.0;platform_system!='Darwin'", "tensorrt>=8.5.3.1,<8.7.0;platform_system=='Linux'", From 2e62c43631b843ba43081b6dc6130de86870f745 Mon Sep 17 00:00:00 2001 From: Weisu Yin Date: Tue, 3 Oct 2023 20:48:30 +0000 Subject: [PATCH 36/44] disable tensorrt --- multimodal/setup.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/multimodal/setup.py b/multimodal/setup.py index 178162fe436..96710bb1527 100644 --- a/multimodal/setup.py +++ b/multimodal/setup.py @@ -64,7 +64,7 @@ "onnx>=1.13.0,<1.14.0", "onnxruntime>=1.15.0,<1.16.0;platform_system=='Darwin'", "onnxruntime-gpu>=1.15.0,<1.16.0;platform_system!='Darwin'", - "tensorrt>=8.5.3.1,<8.7.0;platform_system=='Linux'", + # "tensorrt>=8.5.3.1,<8.7.0;platform_system=='Linux'", ] extras_require = { From 5227c72b85afa996e4452c2ca9578de230d84d48 Mon Sep 17 00:00:00 2001 From: Weisu Yin Date: Wed, 4 Oct 2023 17:37:25 +0000 Subject: [PATCH 37/44] fix tests --- common/src/autogluon/common/utils/try_import.py | 8 +++++++- .../tests/unittests/others/test_deployment_onnx.py | 4 ++++ tabular/setup.py | 9 +++++++-- tabular/tests/unittests/models/test_vowpalwabbit.py | 6 ++++++ 4 files changed, 24 insertions(+), 3 deletions(-) diff --git a/common/src/autogluon/common/utils/try_import.py b/common/src/autogluon/common/utils/try_import.py index e269cabde0e..5893e1a0bc6 100644 --- a/common/src/autogluon/common/utils/try_import.py +++ b/common/src/autogluon/common/utils/try_import.py @@ -1,5 +1,6 @@ import logging import platform +import sys from types import ModuleType from ..version import __version__ @@ -62,7 +63,12 @@ def try_import_catboost(): try: import catboost except ImportError as e: - raise ImportError("`import catboost` failed. " f"A quick tip is to install via `pip install autogluon.tabular[catboost]=={__version__}`.") + error_msg = "`import catboost` failed. " + if sys.version_info >= (3, 11) and sys.platform == "darwin": + error_msg += f"Detected your env as {sys.platform}. Please either downgrade your python version to below 3.11 or move to another platform. Then install via ``pip install autogluon.tabular[catboost]=={__version__}``" + else: + error_msg += f"A quick tip is to install via `pip install autogluon.tabular[catboost]=={__version__}`." + raise ImportError() except ValueError as e: raise ImportError( "Import catboost failed. Numpy version may be outdated, " diff --git a/multimodal/tests/unittests/others/test_deployment_onnx.py b/multimodal/tests/unittests/others/test_deployment_onnx.py index 23c7924c8f8..687d034f907 100644 --- a/multimodal/tests/unittests/others/test_deployment_onnx.py +++ b/multimodal/tests/unittests/others/test_deployment_onnx.py @@ -1,11 +1,13 @@ import os import shutil +import tensorrt import numpy as np import numpy.testing import pytest import torch from datasets import load_dataset +from packaging import version from scipy.stats import pearsonr, spearmanr from sklearn.metrics.pairwise import paired_cosine_distances from torch import FloatTensor @@ -192,6 +194,8 @@ def test_onnx_export_timm_image(checkpoint_name, num_gpus): ), ], ) + +@pytest.mark.skipif(version.parse(tensorrt.__version__) >= version.parse("8.5.4"), reason="tensorrt above 8.5.4 cause segfault, but is required to support py311") def test_onnx_optimize_for_inference(dataset_name, model_names, text_backbone, image_backbone): dataset = ALL_DATASETS[dataset_name] hyperparameters = { diff --git a/tabular/setup.py b/tabular/setup.py index 3b56a6302cf..ef4fcbb1fa2 100644 --- a/tabular/setup.py +++ b/tabular/setup.py @@ -36,7 +36,11 @@ "lightgbm>=3.3,<3.4", ], "catboost": [ - "catboost>=1.1,<1.3", + # CatBoost wheel build is not working correctly on darwin for CatBoost 1.2, so use old version in this case. + # https://github.com/autogluon/autogluon/pull/3190#issuecomment-1540599280 + # Catboost 1.2 doesn't have wheel for python 3.11 + "catboost>=1.1,<1.2 ; sys_platform == 'darwin' and python_version < '3.11'", + "catboost>=1.1,<1.3; sys_platform != 'darwin'", ], # FIXME: Debug why xgboost 1.6 has 4x+ slower inference on multiclass datasets compared to 1.4 # It is possibly only present on MacOS, haven't tested linux. @@ -63,7 +67,8 @@ ], "vowpalwabbit": [ # FIXME: 9.5+ causes VW to save an empty model which always predicts 0. Confirmed on MacOS (Intel CPU). Unknown how to fix. - "vowpalwabbit>=9,<9.9", + # No vowpalwabbit wheel for python 3.11 or above yet + "vowpalwabbit>=9,<9.9; python_version < '3.11'", ], "skl2onnx": [ "skl2onnx>=1.15.0,<1.16.0", diff --git a/tabular/tests/unittests/models/test_vowpalwabbit.py b/tabular/tests/unittests/models/test_vowpalwabbit.py index c41c39d3ee1..b2b08d88a70 100644 --- a/tabular/tests/unittests/models/test_vowpalwabbit.py +++ b/tabular/tests/unittests/models/test_vowpalwabbit.py @@ -1,6 +1,10 @@ +import pytest +import sys + from autogluon.tabular.models.vowpalwabbit.vowpalwabbit_model import VowpalWabbitModel +@pytest.mark.skipif(sys.version_info >= (3, 11), reason="vowpalwabbit doesn't support python 3.11 and above yet") def test_vowpalwabbit_binary(fit_helper): fit_args = dict( hyperparameters={VowpalWabbitModel: {}}, @@ -9,6 +13,7 @@ def test_vowpalwabbit_binary(fit_helper): fit_helper.fit_and_validate_dataset(dataset_name=dataset_name, fit_args=fit_args) +@pytest.mark.skipif(sys.version_info >= (3, 11), reason="vowpalwabbit doesn't support python 3.11 and above yet") def test_vowpalwabbit_multiclass(fit_helper): fit_args = dict( hyperparameters={VowpalWabbitModel: {}}, @@ -17,6 +22,7 @@ def test_vowpalwabbit_multiclass(fit_helper): fit_helper.fit_and_validate_dataset(dataset_name=dataset_name, fit_args=fit_args) +@pytest.mark.skipif(sys.version_info >= (3, 11), reason="vowpalwabbit doesn't support python 3.11 and above yet") def test_vowpalwabbit_regression(fit_helper): fit_args = dict( hyperparameters={VowpalWabbitModel: {}}, From 84592cc35fe24f8c7087088391d93839d74f4c40 Mon Sep 17 00:00:00 2001 From: Weisu Yin Date: Wed, 4 Oct 2023 17:38:07 +0000 Subject: [PATCH 38/44] isort --- tabular/tests/unittests/models/test_vowpalwabbit.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/tabular/tests/unittests/models/test_vowpalwabbit.py b/tabular/tests/unittests/models/test_vowpalwabbit.py index b2b08d88a70..072135a89ec 100644 --- a/tabular/tests/unittests/models/test_vowpalwabbit.py +++ b/tabular/tests/unittests/models/test_vowpalwabbit.py @@ -1,6 +1,7 @@ -import pytest import sys +import pytest + from autogluon.tabular.models.vowpalwabbit.vowpalwabbit_model import VowpalWabbitModel From 4315d6e6baad23a4c5a0da6ed9965c70dfb0eb0c Mon Sep 17 00:00:00 2001 From: Weisu Yin Date: Wed, 4 Oct 2023 18:04:20 +0000 Subject: [PATCH 39/44] lint --- multimodal/tests/unittests/others/test_deployment_onnx.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/multimodal/tests/unittests/others/test_deployment_onnx.py b/multimodal/tests/unittests/others/test_deployment_onnx.py index 687d034f907..17a5314c2c9 100644 --- a/multimodal/tests/unittests/others/test_deployment_onnx.py +++ b/multimodal/tests/unittests/others/test_deployment_onnx.py @@ -194,8 +194,10 @@ def test_onnx_export_timm_image(checkpoint_name, num_gpus): ), ], ) - -@pytest.mark.skipif(version.parse(tensorrt.__version__) >= version.parse("8.5.4"), reason="tensorrt above 8.5.4 cause segfault, but is required to support py311") +@pytest.mark.skipif( + version.parse(tensorrt.__version__) >= version.parse("8.5.4"), + reason="tensorrt above 8.5.4 cause segfault, but is required to support py311", +) def test_onnx_optimize_for_inference(dataset_name, model_names, text_backbone, image_backbone): dataset = ALL_DATASETS[dataset_name] hyperparameters = { From 5586f484e3f450a810031d58ad85e232da5a8e0f Mon Sep 17 00:00:00 2001 From: Weisu Yin Date: Wed, 4 Oct 2023 18:12:25 +0000 Subject: [PATCH 40/44] isort --- multimodal/tests/unittests/others/test_deployment_onnx.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/multimodal/tests/unittests/others/test_deployment_onnx.py b/multimodal/tests/unittests/others/test_deployment_onnx.py index 17a5314c2c9..2f2e2420f4e 100644 --- a/multimodal/tests/unittests/others/test_deployment_onnx.py +++ b/multimodal/tests/unittests/others/test_deployment_onnx.py @@ -1,10 +1,10 @@ import os import shutil -import tensorrt import numpy as np import numpy.testing import pytest +import tensorrt import torch from datasets import load_dataset from packaging import version From 681709d4e640d43862afd7addbc9cc2bd0333bb1 Mon Sep 17 00:00:00 2001 From: Weisu Yin Date: Wed, 4 Oct 2023 20:04:36 +0000 Subject: [PATCH 41/44] fix --- multimodal/setup.py | 2 +- multimodal/tests/unittests/others/test_deployment_onnx.py | 8 ++++++-- 2 files changed, 7 insertions(+), 3 deletions(-) diff --git a/multimodal/setup.py b/multimodal/setup.py index 96710bb1527..72da7b8eadd 100644 --- a/multimodal/setup.py +++ b/multimodal/setup.py @@ -64,7 +64,7 @@ "onnx>=1.13.0,<1.14.0", "onnxruntime>=1.15.0,<1.16.0;platform_system=='Darwin'", "onnxruntime-gpu>=1.15.0,<1.16.0;platform_system!='Darwin'", - # "tensorrt>=8.5.3.1,<8.7.0;platform_system=='Linux'", + "tensorrt>=8.5.3.1,<8.5.4;platform_system=='Linux' and python_version<'3.11'", # tensorrt > 8.5.4 cause segfault ] extras_require = { diff --git a/multimodal/tests/unittests/others/test_deployment_onnx.py b/multimodal/tests/unittests/others/test_deployment_onnx.py index 2f2e2420f4e..33f231616f2 100644 --- a/multimodal/tests/unittests/others/test_deployment_onnx.py +++ b/multimodal/tests/unittests/others/test_deployment_onnx.py @@ -4,7 +4,6 @@ import numpy as np import numpy.testing import pytest -import tensorrt import torch from datasets import load_dataset from packaging import version @@ -25,6 +24,11 @@ "ae": AEDataset(), } +try: + import tensorrt +except ImportError: + tensorrt = None + def evaluate(predictor, df, onnx_session=None): labels = df["score"].to_numpy() @@ -195,7 +199,7 @@ def test_onnx_export_timm_image(checkpoint_name, num_gpus): ], ) @pytest.mark.skipif( - version.parse(tensorrt.__version__) >= version.parse("8.5.4"), + tensorrt is None or version.parse(tensorrt.__version__) >= version.parse("8.5.4"), reason="tensorrt above 8.5.4 cause segfault, but is required to support py311", ) def test_onnx_optimize_for_inference(dataset_name, model_names, text_backbone, image_backbone): From c30990da00ab0776c5cadbce24ddee1c9bbdd708 Mon Sep 17 00:00:00 2001 From: Weisu Yin Date: Wed, 4 Oct 2023 20:13:08 +0000 Subject: [PATCH 42/44] fix catboost --- tabular/tests/unittests/models/test_catboost.py | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/tabular/tests/unittests/models/test_catboost.py b/tabular/tests/unittests/models/test_catboost.py index 9e5c4ceaa45..23e35f448a9 100644 --- a/tabular/tests/unittests/models/test_catboost.py +++ b/tabular/tests/unittests/models/test_catboost.py @@ -1,6 +1,11 @@ +import sys + +import pytest + from autogluon.tabular.models.catboost.catboost_model import CatBoostModel +@pytest.mark.skipif(sys.version_info >= (3, 11) and sys.platform == "darwin", reason="catboost has no wheel for py311 darwin") def test_catboost_binary(fit_helper): fit_args = dict( hyperparameters={CatBoostModel: {}}, @@ -9,6 +14,7 @@ def test_catboost_binary(fit_helper): fit_helper.fit_and_validate_dataset(dataset_name=dataset_name, fit_args=fit_args) +@pytest.mark.skipif(sys.version_info >= (3, 11) and sys.platform == "darwin", reason="catboost has no wheel for py311 darwin") def test_catboost_multiclass(fit_helper): fit_args = dict( hyperparameters={CatBoostModel: {}}, @@ -17,6 +23,7 @@ def test_catboost_multiclass(fit_helper): fit_helper.fit_and_validate_dataset(dataset_name=dataset_name, fit_args=fit_args) +@pytest.mark.skipif(sys.version_info >= (3, 11) and sys.platform == "darwin", reason="catboost has no wheel for py311 darwin") def test_catboost_regression(fit_helper): fit_args = dict( hyperparameters={CatBoostModel: {}}, @@ -26,6 +33,7 @@ def test_catboost_regression(fit_helper): fit_helper.fit_and_validate_dataset(dataset_name=dataset_name, fit_args=fit_args) +@pytest.mark.skipif(sys.version_info >= (3, 11) and sys.platform == "darwin", reason="catboost has no wheel for py311 darwin") def test_catboost_quantile(fit_helper): fit_args = dict( hyperparameters={"CAT": {}}, From b82d13742aeeff5c62427f3701df9a90bdcb06c3 Mon Sep 17 00:00:00 2001 From: Weisu Yin Date: Thu, 5 Oct 2023 00:51:30 +0000 Subject: [PATCH 43/44] fix --- tabular/tests/unittests/test_tabular.py | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/tabular/tests/unittests/test_tabular.py b/tabular/tests/unittests/test_tabular.py index 48c5ba4e556..387b1325070 100644 --- a/tabular/tests/unittests/test_tabular.py +++ b/tabular/tests/unittests/test_tabular.py @@ -21,6 +21,7 @@ """ import os import shutil +import sys import tempfile import warnings from random import seed @@ -34,6 +35,7 @@ from autogluon.core.constants import BINARY, MULTICLASS, PROBLEM_TYPES_CLASSIFICATION, QUANTILE, REGRESSION from autogluon.core.utils import download, unzip from autogluon.tabular import TabularDataset, TabularPredictor +from autogluon.tabular.configs.hyperparameter_configs import get_hyperparameter_config PARALLEL_LOCAL_BAGGING = "parallel_local" SEQUENTIAL_LOCAL_BAGGING = "sequential_local" @@ -47,7 +49,7 @@ def test_tabular(): subsample_size = None hyperparameter_tune_kwargs = None verbosity = 2 # how much output to print - hyperparameters = None + hyperparameters = get_hyperparameter_config("default") time_limit = None fast_benchmark = True # False # If True, run a faster benchmark (subsample training sets, less epochs, etc), @@ -59,6 +61,10 @@ def test_tabular(): subsample_size = 100 time_limit = 60 + # Catboost > 1.2 is required for python 3.11 but cannot be correctly installed on macos + if sys.version_info >= (3, 11) and sys.platform == "darwin": + hyperparameters.pop("CAT") + fit_args = {"verbosity": verbosity} if hyperparameter_tune_kwargs is not None: fit_args["hyperparameter_tune_kwargs"] = hyperparameter_tune_kwargs From bd6b173e213239795a4fdc32ea4f2aa2e9ab6548 Mon Sep 17 00:00:00 2001 From: Weisu Yin Date: Tue, 10 Oct 2023 19:56:10 +0000 Subject: [PATCH 44/44] address comments --- .../advanced_topics/customization.ipynb | 23 +++++++++++++++++++ .../configs/optimization/adamw.yaml | 1 + 2 files changed, 24 insertions(+) diff --git a/docs/tutorials/multimodal/advanced_topics/customization.ipynb b/docs/tutorials/multimodal/advanced_topics/customization.ipynb index 395ac552410..2565ca0a5ca 100644 --- a/docs/tutorials/multimodal/advanced_topics/customization.ipynb +++ b/docs/tutorials/multimodal/advanced_topics/customization.ipynb @@ -360,6 +360,29 @@ "```\n" ] }, + { + "cell_type": "markdown", + "id": "02d07866", + "metadata": {}, + "source": [ + "### optimization.track_grad_norm\n", + "\n", + "Track the p-norm of gradients during training. May be set to ‘inf’ infinity-norm. If using Automatic Mixed Precision (AMP), the gradients will be unscaled before logging them." + ] + }, + { + "cell_type": "markdown", + "id": "1b60c371", + "metadata": {}, + "source": [ + "```\n", + "# default used by AutoMM (no tracking)\n", + "predictor.fit(hyperparameters={\"optimization.track_grad_norm\": -1})\n", + "# track the 2-norm\n", + "predictor.fit(hyperparameters={\"optimization.track_grad_norm\": 2})\n", + "```\n" + ] + }, { "cell_type": "markdown", "id": "abe87d32", diff --git a/multimodal/src/autogluon/multimodal/configs/optimization/adamw.yaml b/multimodal/src/autogluon/multimodal/configs/optimization/adamw.yaml index 2def18adce8..4f24e51dc5a 100644 --- a/multimodal/src/autogluon/multimodal/configs/optimization/adamw.yaml +++ b/multimodal/src/autogluon/multimodal/configs/optimization/adamw.yaml @@ -16,6 +16,7 @@ optimization: skip_final_val: False # Flag to skip the last validation gradient_clip_val: 1 gradient_clip_algorithm: "norm" + track_grad_norm: -1 # Whether to check gradient norm. We can set it to 2 to check for gradient norm. log_every_n_steps: 10 top_k: 3 top_k_average_method: