From fa4e31eff00eae90e00a2f702df24bef950826f8 Mon Sep 17 00:00:00 2001
From: ddelange <14880945+ddelange@users.noreply.github.com>
Date: Tue, 2 May 2023 15:36:00 +0200
Subject: [PATCH 01/44] Add support for python 3.11

---
 .github/workflow_scripts/env_setup.sh        | 14 ++++++++------
 .github/workflows/platform_tests-command.yml | 12 ++++++------
 core/src/autogluon/core/_setup_utils.py      |  2 +-
 docs/install-cpu-pip.md                      |  2 +-
 docs/install-cpu-source.md                   |  2 +-
 docs/install-gpu-pip.md                      |  2 +-
 docs/install-gpu-source.md                   |  2 +-
 docs/install-windows-gpu.md                  |  2 +-
 multimodal/setup.py                          | 12 +++++++-----
 tabular/setup.py                             |  2 +-
 10 files changed, 28 insertions(+), 24 deletions(-)

diff --git a/.github/workflow_scripts/env_setup.sh b/.github/workflow_scripts/env_setup.sh
index 809f967a654..33eed43a7b8 100644
--- a/.github/workflow_scripts/env_setup.sh
+++ b/.github/workflow_scripts/env_setup.sh
@@ -16,21 +16,23 @@ function setup_build_contrib_env {
 }
 
 function setup_torch_gpu {
-    # Security-patched torch.
-    python3 -m pip install torch==1.13.1+cu116 torchvision==0.14.1+cu116 torchaudio==0.13.1+cu116 --extra-index-url https://download.pytorch.org/whl/cu116
+    PIP_EXTRA_INDEX_URL=https://download.pytorch.org/whl/cu118 reinstall_torch
 }
 
 function setup_torch_cpu {
-    # Security-patched torch
-    python3 -m pip install torch==1.13.1 torchvision==0.14.1 torchaudio==0.13.1 --extra-index-url https://download.pytorch.org/whl/cpu
+    PIP_EXTRA_INDEX_URL=https://download.pytorch.org/whl/cpu reinstall_torch
 }
 
 function setup_torch_gpu_non_linux {
-    pip3 install torch==1.13.1+cu116 torchvision==0.14.1+cu116 --extra-index-url https://download.pytorch.org/whl/cu116
+    setup_torch_gpu
 }
 
 function setup_torch_cpu_non_linux {
-    pip3 install torch==1.13.1 torchvision==0.14.1
+    setup_torch_cpu
+}
+
+function reinstall_torch {
+    pip3 install --force-reinstall torchvision~=0.15.1
 }
 
 function setup_hf_model_mirror {
diff --git a/.github/workflows/platform_tests-command.yml b/.github/workflows/platform_tests-command.yml
index 49627a18f9b..ccd250dccaa 100644
--- a/.github/workflows/platform_tests-command.yml
+++ b/.github/workflows/platform_tests-command.yml
@@ -45,7 +45,7 @@ jobs:
       fail-fast: false
       matrix:
         os: [macos-latest, windows-latest, ubuntu-latest]
-        python: ["3.8", "3.9", "3.10"]
+        python: ["3.8", "3.9", "3.10", "3.11"]
     steps:  
       - name: Checkout repository for PR
         if: (github.event_name == 'workflow_dispatch')
@@ -77,7 +77,7 @@ jobs:
       fail-fast: false
       matrix:
         os: [macos-latest, windows-latest, ubuntu-latest]
-        python: ["3.8", "3.9", "3.10"]
+        python: ["3.8", "3.9", "3.10", "3.11"]
     steps:
       - name: Checkout repository for PR
         if: (github.event_name == 'workflow_dispatch')
@@ -109,7 +109,7 @@ jobs:
       fail-fast: false
       matrix:
         os: [macos-latest, windows-latest, ubuntu-latest]
-        python: ["3.8", "3.9", "3.10"]
+        python: ["3.8", "3.9", "3.10", "3.11"]
     steps:
       - name: Checkout repository for PR
         if: (github.event_name == 'workflow_dispatch')
@@ -141,7 +141,7 @@ jobs:
       fail-fast: false
       matrix:
         os: [macos-latest, windows-latest, ubuntu-latest]
-        python: ["3.8", "3.9", "3.10"]
+        python: ["3.8", "3.9", "3.10", "3.11"]
     steps:
       - name: Checkout repository for PR
         if: (github.event_name == 'workflow_dispatch')
@@ -174,7 +174,7 @@ jobs:
       fail-fast: false
       matrix:
         os: [macos-latest, windows-latest, ubuntu-latest]
-        python: ["3.8", "3.9", "3.10"]
+        python: ["3.8", "3.9", "3.10", "3.11"]
     steps:
       - name: Checkout repository for PR
         if: (github.event_name == 'workflow_dispatch')
@@ -214,7 +214,7 @@ jobs:
       fail-fast: false
       matrix:
         os: [macos-latest, windows-latest, ubuntu-latest]
-        python: ["3.8", "3.9", "3.10"]
+        python: ["3.8", "3.9", "3.10", "3.11"]
     steps:
       - name: Checkout repository for PR
         if: (github.event_name == 'workflow_dispatch')
diff --git a/core/src/autogluon/core/_setup_utils.py b/core/src/autogluon/core/_setup_utils.py
index 52dfc5e9b57..4d21fdea889 100644
--- a/core/src/autogluon/core/_setup_utils.py
+++ b/core/src/autogluon/core/_setup_utils.py
@@ -13,7 +13,7 @@
     os.path.join(os.path.dirname(os.path.abspath(__file__)), '..', '..', '..', '..')
 )
 
-PYTHON_REQUIRES = '>=3.8, <3.11'
+PYTHON_REQUIRES = '>=3.8, <3.12'
 
 
 # Only put packages here that would otherwise appear multiple times across different module's setup.py files.
diff --git a/docs/install-cpu-pip.md b/docs/install-cpu-pip.md
index f04890f2541..04d2600c3e7 100644
--- a/docs/install-cpu-pip.md
+++ b/docs/install-cpu-pip.md
@@ -4,7 +4,7 @@ pip install -U setuptools wheel
 
 # CPU version of pytorch has smaller footprint - see installation instructions in
 # pytorch documentation - https://pytorch.org/get-started/locally/
-pip install torch==1.13.1+cpu torchvision==0.14.1+cpu -f https://download.pytorch.org/whl/cpu/torch_stable.html
+pip install torchvision~=0.15.1 --force-reinstall --extra-index-url https://download.pytorch.org/whl/cpu
 
 pip install autogluon
 ```
diff --git a/docs/install-cpu-source.md b/docs/install-cpu-source.md
index fbb7d4dcd42..68859b8682b 100644
--- a/docs/install-cpu-source.md
+++ b/docs/install-cpu-source.md
@@ -4,7 +4,7 @@ pip install -U setuptools wheel
 
 # CPU version of pytorch has smaller footprint - see installation instructions in
 # pytorch documentation - https://pytorch.org/get-started/locally/
-pip install torch==1.13.1+cpu torchvision==0.14.1+cpu --extra-index-url https://download.pytorch.org/whl/cpu
+pip install torchvision~=0.15.1 --force-reinstall --extra-index-url https://download.pytorch.org/whl/cpu
 
 git clone https://github.com/autogluon/autogluon
 cd autogluon && ./full_install.sh
diff --git a/docs/install-gpu-pip.md b/docs/install-gpu-pip.md
index 8a9234f1b92..5b5e141b2f0 100644
--- a/docs/install-gpu-pip.md
+++ b/docs/install-gpu-pip.md
@@ -3,7 +3,7 @@ pip install -U pip
 pip install -U setuptools wheel
 
 # Install the proper version of PyTorch following https://pytorch.org/get-started/locally/
-pip install torch==1.13.1+cu116 torchvision==0.14.1+cu116 --extra-index-url https://download.pytorch.org/whl/cu116
+pip install torchvision~=0.15.1 --force-reinstall --extra-index-url https://download.pytorch.org/whl/cu118
 
 pip install autogluon
 ```
diff --git a/docs/install-gpu-source.md b/docs/install-gpu-source.md
index d7c578fb698..7db68dc6dad 100644
--- a/docs/install-gpu-source.md
+++ b/docs/install-gpu-source.md
@@ -3,7 +3,7 @@ pip install -U pip
 pip install -U setuptools wheel
 
 # Install the proper version of PyTorch following https://pytorch.org/get-started/locally/
-pip install torch==1.13.1+cu116 torchvision==0.14.1+cu116 --extra-index-url https://download.pytorch.org/whl/cu116
+pip install torchvision~=0.15.1 --force-reinstall --extra-index-url https://download.pytorch.org/whl/cu118
 
 git clone https://github.com/autogluon/autogluon
 cd autogluon && ./full_install.sh
diff --git a/docs/install-windows-gpu.md b/docs/install-windows-gpu.md
index ed051cfdaf3..2de07bb2333 100644
--- a/docs/install-windows-gpu.md
+++ b/docs/install-windows-gpu.md
@@ -11,7 +11,7 @@ conda activate myenv
 4. Install the proper GPU PyTorch version by following the [PyTorch Install Documentation](https://pytorch.org/get-started/locally/) (Recommended). Alternatively, use the following command:
 
 ```bash
-pip install torch==1.13.1+cu116 torchvision==0.14.1+cu116 --extra-index-url https://download.pytorch.org/whl/cu116
+pip install torchvision~=0.15.1 --force-reinstall --extra-index-url https://download.pytorch.org/whl/cu118
 ```
 
 5. Sanity check that your installation is valid and can detect your GPU via testing in Python:
diff --git a/multimodal/setup.py b/multimodal/setup.py
index a204c233c34..fcd5c7aef5e 100644
--- a/multimodal/setup.py
+++ b/multimodal/setup.py
@@ -33,13 +33,15 @@
     "evaluate>=0.2.2,<0.4.0",
     "accelerate>=0.9,<0.17",
     "timm>=0.6.12,<0.7.0",
-    "torch>=1.9,<1.14",
-    "torchvision<0.15.0",
+    "torch>=1.9,<2.1",
+    "torchvision>=0.10.0,<0.16",
     "fairscale>=0.4.5,<0.4.14",
-    "scikit-image>=0.19.1,<0.20.0",
-    "pytorch-lightning>=1.9.0,<1.10.0",
+    "scikit-image>=0.19.1,<0.21.0",
+    "pytorch-lightning>=1.9.0,<2.1",
     "text-unidecode>=1.3,<1.4",
-    "torchmetrics>=0.11.0,<0.12.0",
+    # temporary for testing ref https://github.com/autogluon/autogluon/issues/2687#issuecomment-1531122312
+    # "torchmetrics>=0.11.0,<0.12.0",
+    "torchmetrics @ https://github.com/Lightning-AI/torchmetrics/archive/95599c9a7bd5e6c8f0a8039562dbbc832b434031.zip"
     "transformers>=4.23.0,<4.27.0",
     "nptyping>=1.4.4,<2.5.0",
     "omegaconf>=2.1.1,<2.3.0",
diff --git a/tabular/setup.py b/tabular/setup.py
index 929710248b6..508d5355980 100644
--- a/tabular/setup.py
+++ b/tabular/setup.py
@@ -34,7 +34,7 @@
         'lightgbm>=3.3,<3.4',
     ],
     'catboost': [
-        'catboost>=1.1,<1.2',
+        'catboost>=1.1,<1.3',
     ],
     # FIXME: Debug why xgboost 1.6 has 4x+ slower inference on multiclass datasets compared to 1.4
     #  It is possibly only present on MacOS, haven't tested linux.

From e7e6bbce96ad7b1a59164161b57b26f90e64e2a7 Mon Sep 17 00:00:00 2001
From: ddelange <14880945+ddelange@users.noreply.github.com>
Date: Tue, 2 May 2023 15:49:21 +0200
Subject: [PATCH 02/44] Add missing comma

---
 multimodal/setup.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/multimodal/setup.py b/multimodal/setup.py
index fcd5c7aef5e..560605f2a5e 100644
--- a/multimodal/setup.py
+++ b/multimodal/setup.py
@@ -41,7 +41,7 @@
     "text-unidecode>=1.3,<1.4",
     # temporary for testing ref https://github.com/autogluon/autogluon/issues/2687#issuecomment-1531122312
     # "torchmetrics>=0.11.0,<0.12.0",
-    "torchmetrics @ https://github.com/Lightning-AI/torchmetrics/archive/95599c9a7bd5e6c8f0a8039562dbbc832b434031.zip"
+    "torchmetrics @ https://github.com/Lightning-AI/torchmetrics/archive/95599c9a7bd5e6c8f0a8039562dbbc832b434031.zip",
     "transformers>=4.23.0,<4.27.0",
     "nptyping>=1.4.4,<2.5.0",
     "omegaconf>=2.1.1,<2.3.0",

From 14e6fee69956d833ad41675ce7cf44bb1b198939 Mon Sep 17 00:00:00 2001
From: ddelange <14880945+ddelange@users.noreply.github.com>
Date: Tue, 2 May 2023 16:25:20 +0200
Subject: [PATCH 03/44] Deprecate track_grad_norm

ref https://github.com/Lightning-AI/lightning/pull/16745
---
 .../src/autogluon/multimodal/configs/optimization/adamw.yaml     | 1 -
 multimodal/src/autogluon/multimodal/matcher.py                   | 1 -
 multimodal/src/autogluon/multimodal/predictor.py                 | 1 -
 3 files changed, 3 deletions(-)

diff --git a/multimodal/src/autogluon/multimodal/configs/optimization/adamw.yaml b/multimodal/src/autogluon/multimodal/configs/optimization/adamw.yaml
index d7ace45ed70..a0a180c903d 100644
--- a/multimodal/src/autogluon/multimodal/configs/optimization/adamw.yaml
+++ b/multimodal/src/autogluon/multimodal/configs/optimization/adamw.yaml
@@ -16,7 +16,6 @@ optimization:
   skip_final_val: False # Flag to skip the last validation
   gradient_clip_val: 1
   gradient_clip_algorithm: "norm"
-  track_grad_norm: -1 # Whether to check gradient norm. We can set it to 2 to check for gradient norm.
   log_every_n_steps: 10
   val_metric: null
   top_k: 3
diff --git a/multimodal/src/autogluon/multimodal/matcher.py b/multimodal/src/autogluon/multimodal/matcher.py
index bdf22996378..f3132d9a962 100644
--- a/multimodal/src/autogluon/multimodal/matcher.py
+++ b/multimodal/src/autogluon/multimodal/matcher.py
@@ -910,7 +910,6 @@ def _fit(
                 log_every_n_steps=OmegaConf.select(config, "optimization.log_every_n_steps", default=10),
                 enable_progress_bar=enable_progress_bar,
                 fast_dev_run=config.env.fast_dev_run,
-                track_grad_norm=OmegaConf.select(config, "optimization.track_grad_norm", default=-1),
                 val_check_interval=config.optimization.val_check_interval,
                 check_val_every_n_epoch=config.optimization.check_val_every_n_epoch
                 if hasattr(config.optimization, "check_val_every_n_epoch")
diff --git a/multimodal/src/autogluon/multimodal/predictor.py b/multimodal/src/autogluon/multimodal/predictor.py
index fd03a6ec76e..6b172a1fa81 100644
--- a/multimodal/src/autogluon/multimodal/predictor.py
+++ b/multimodal/src/autogluon/multimodal/predictor.py
@@ -1475,7 +1475,6 @@ def _fit(
                 log_every_n_steps=OmegaConf.select(config, "optimization.log_every_n_steps", default=10),
                 enable_progress_bar=enable_progress_bar,
                 fast_dev_run=config.env.fast_dev_run,
-                track_grad_norm=OmegaConf.select(config, "optimization.track_grad_norm", default=-1),
                 val_check_interval=config.optimization.val_check_interval,
                 check_val_every_n_epoch=config.optimization.check_val_every_n_epoch
                 if hasattr(config.optimization, "check_val_every_n_epoch")

From dd96a190bb5d0f5320bd51764ed91a7dbbe10e89 Mon Sep 17 00:00:00 2001
From: ddelange <14880945+ddelange@users.noreply.github.com>
Date: Tue, 2 May 2023 17:24:31 +0200
Subject: [PATCH 04/44] Use 'auto' instead of None defaults

---
 multimodal/src/autogluon/multimodal/matcher.py   | 8 ++++----
 multimodal/src/autogluon/multimodal/predictor.py | 8 ++++----
 2 files changed, 8 insertions(+), 8 deletions(-)

diff --git a/multimodal/src/autogluon/multimodal/matcher.py b/multimodal/src/autogluon/multimodal/matcher.py
index f3132d9a962..73b78ac12f1 100644
--- a/multimodal/src/autogluon/multimodal/matcher.py
+++ b/multimodal/src/autogluon/multimodal/matcher.py
@@ -864,7 +864,7 @@ def _fit(
 
         if not hpo_mode:
             if num_gpus <= 1:
-                strategy = None
+                strategy = "auto"
             else:
                 strategy = config.env.strategy
         else:
@@ -872,7 +872,7 @@ def _fit(
             if use_ray_lightning:
                 strategy = hpo_kwargs.get("_ray_lightning_plugin")
             else:
-                strategy = None
+                strategy = "auto"
                 num_gpus = min(num_gpus, 1)
 
         config.env.num_gpus = num_gpus
@@ -886,12 +886,12 @@ def _fit(
         log_filter = LogFilter(blacklist_msgs)
         with apply_log_filter(log_filter):
             trainer = pl.Trainer(
-                accelerator="gpu" if num_gpus > 0 else None,
+                accelerator="gpu" if num_gpus > 0 else "auto",
                 devices=get_available_devices(
                     num_gpus=num_gpus,
                     auto_select_gpus=config.env.auto_select_gpus,
                     use_ray_lightning=use_ray_lightning,
-                ),
+                ) or "auto",
                 num_nodes=config.env.num_nodes,
                 precision=precision,
                 strategy=strategy,
diff --git a/multimodal/src/autogluon/multimodal/predictor.py b/multimodal/src/autogluon/multimodal/predictor.py
index 6b172a1fa81..88db84c693b 100644
--- a/multimodal/src/autogluon/multimodal/predictor.py
+++ b/multimodal/src/autogluon/multimodal/predictor.py
@@ -1429,7 +1429,7 @@ def _fit(
                         reduce_bucket_size=config.env.deepspeed_allreduce_size,
                     )
                 else:
-                    strategy = None
+                    strategy = "auto"
             else:
                 strategy = config.env.strategy
         else:
@@ -1437,7 +1437,7 @@ def _fit(
             if use_ray_lightning:
                 strategy = hpo_kwargs.get("_ray_lightning_plugin")
             else:
-                strategy = None
+                strategy = "auto"
                 num_gpus = min(num_gpus, 1)
 
         config.env.num_gpus = num_gpus
@@ -1451,12 +1451,12 @@ def _fit(
         log_filter = LogFilter(blacklist_msgs)
         with apply_log_filter(log_filter):
             trainer = pl.Trainer(
-                accelerator="gpu" if num_gpus > 0 else None,
+                accelerator="gpu" if num_gpus > 0 else "auto",
                 devices=get_available_devices(
                     num_gpus=num_gpus,
                     auto_select_gpus=config.env.auto_select_gpus,
                     use_ray_lightning=use_ray_lightning,
-                ),
+                ) or "auto",
                 num_nodes=config.env.num_nodes,
                 precision=precision,
                 strategy=strategy,

From 9c922b9f22ce3c5be8e2171d29be8e02d66b5174 Mon Sep 17 00:00:00 2001
From: ddelange <14880945+ddelange@users.noreply.github.com>
Date: Tue, 2 May 2023 17:28:08 +0200
Subject: [PATCH 05/44] Revert devices='auto'

---
 multimodal/src/autogluon/multimodal/matcher.py   | 2 +-
 multimodal/src/autogluon/multimodal/predictor.py | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/multimodal/src/autogluon/multimodal/matcher.py b/multimodal/src/autogluon/multimodal/matcher.py
index 73b78ac12f1..aa298064f75 100644
--- a/multimodal/src/autogluon/multimodal/matcher.py
+++ b/multimodal/src/autogluon/multimodal/matcher.py
@@ -891,7 +891,7 @@ def _fit(
                     num_gpus=num_gpus,
                     auto_select_gpus=config.env.auto_select_gpus,
                     use_ray_lightning=use_ray_lightning,
-                ) or "auto",
+                ),
                 num_nodes=config.env.num_nodes,
                 precision=precision,
                 strategy=strategy,
diff --git a/multimodal/src/autogluon/multimodal/predictor.py b/multimodal/src/autogluon/multimodal/predictor.py
index 88db84c693b..80cd9027900 100644
--- a/multimodal/src/autogluon/multimodal/predictor.py
+++ b/multimodal/src/autogluon/multimodal/predictor.py
@@ -1456,7 +1456,7 @@ def _fit(
                     num_gpus=num_gpus,
                     auto_select_gpus=config.env.auto_select_gpus,
                     use_ray_lightning=use_ray_lightning,
-                ) or "auto",
+                ),
                 num_nodes=config.env.num_nodes,
                 precision=precision,
                 strategy=strategy,

From cf8d1537a578e1716f1a08a25728a9b27a50b502 Mon Sep 17 00:00:00 2001
From: Alexander Shirkov <ashyrkou@amazon.com>
Date: Tue, 2 May 2023 18:09:23 -0700
Subject: [PATCH 06/44] fixes to inference; updated timeseries to align with
 the torch version

---
 multimodal/src/autogluon/multimodal/utils/inference.py | 2 +-
 tabular/setup.py                                       | 2 +-
 timeseries/setup.py                                    | 7 +++----
 3 files changed, 5 insertions(+), 6 deletions(-)

diff --git a/multimodal/src/autogluon/multimodal/utils/inference.py b/multimodal/src/autogluon/multimodal/utils/inference.py
index e8ae5a2b7ba..b47483e5c31 100644
--- a/multimodal/src/autogluon/multimodal/utils/inference.py
+++ b/multimodal/src/autogluon/multimodal/utils/inference.py
@@ -487,7 +487,7 @@ def predict(
 
     if num_gpus <= 1:
         # Force set strategy to be None if it's cpu-only or we have only one GPU.
-        strategy = None
+        strategy = "auto"
 
     precision = infer_precision(num_gpus=num_gpus, precision=predictor._config.env.precision, cpu_only_warning=False)
 
diff --git a/tabular/setup.py b/tabular/setup.py
index 508d5355980..17dcc42b0e6 100644
--- a/tabular/setup.py
+++ b/tabular/setup.py
@@ -43,7 +43,7 @@
         'xgboost>=1.6,<1.8',
     ],
     'fastai': [
-        'torch>=1.9,<1.14',
+        'torch>=1.9,<2.1',
         'fastai>=2.3.1,<2.8',
     ],
     'ray': [
diff --git a/timeseries/setup.py b/timeseries/setup.py
index cc4f6c759ec..52b1d608ecf 100644
--- a/timeseries/setup.py
+++ b/timeseries/setup.py
@@ -29,8 +29,8 @@
     "pandas",  # version range defined in `core/_setup_utils.py`
     "statsmodels>=0.13.0,<0.14",
     "gluonts>=0.12.4,<0.13",
-    "torch>=1.9,<1.14",
-    "pytorch-lightning>=1.7.4,<1.10.0",
+    "torch>=1.9,<2.1",
+    "pytorch-lightning>=1.9.0,<2.1",
     "networkx",  # version range defined in `core/_setup_utils.py`
     "statsforecast>=1.4.0,<1.5",
     "mlforecast>=0.7.0,<0.8.0",
@@ -50,10 +50,9 @@
         "isort>=5.10",
         "black>=22.3,<23.0",
     ],
+    "all": [],
 }
 
-extras_require["all"] = []
-
 install_requires = ag.get_dependency_version_ranges(install_requires)
 
 if __name__ == "__main__":

From 28ca9de6f3a9c3923c9db98e7865c28ab7bb3c79 Mon Sep 17 00:00:00 2001
From: ddelange <14880945+ddelange@users.noreply.github.com>
Date: Wed, 3 May 2023 21:14:07 +0200
Subject: [PATCH 07/44] Deprecate compute_on_step

https://github.com/Lightning-AI/torchmetrics/issues/789
---
 multimodal/src/autogluon/multimodal/utils/map.py | 4 ----
 1 file changed, 4 deletions(-)

diff --git a/multimodal/src/autogluon/multimodal/utils/map.py b/multimodal/src/autogluon/multimodal/utils/map.py
index f40d739c740..507aac6eddf 100644
--- a/multimodal/src/autogluon/multimodal/utils/map.py
+++ b/multimodal/src/autogluon/multimodal/utils/map.py
@@ -166,8 +166,6 @@ class MeanAveragePrecision(Metric):
         Args:
             class_metrics:
                 Option to enable per-class metrics for mAP and mAR_100. Has a performance impact. default: False
-            compute_on_step:
-                Forward only calls ``update()`` and return ``None`` if this is set to ``False``.
             dist_sync_on_step:
                 Synchronize metric state across processes at each ``forward()``
                 before returning the value at the step
@@ -190,7 +188,6 @@ class MeanAveragePrecision(Metric):
         def __init__(
             self,
             class_metrics: bool = False,
-            compute_on_step: bool = True,
             dist_sync_on_step: bool = False,
             process_group: Optional[Any] = None,
             dist_sync_fn: Callable = None,
@@ -198,7 +195,6 @@ def __init__(
             iou_type: str = None,
         ) -> None:  # type: ignore
             super().__init__(
-                compute_on_step=compute_on_step,
                 dist_sync_on_step=dist_sync_on_step,
                 process_group=process_group,
                 dist_sync_fn=dist_sync_fn,

From 0d4f8a49aeae2ae7084a2e7ffd92fd351d1eccda Mon Sep 17 00:00:00 2001
From: ddelange <14880945+ddelange@users.noreply.github.com>
Date: Thu, 4 May 2023 10:05:48 +0200
Subject: [PATCH 08/44] Rename validation_epoch_end -> on_validation_epoch_end

ref https://github.com/Lightning-AI/lightning/pull/16520
---
 multimodal/src/autogluon/multimodal/optimization/lit_mmdet.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/multimodal/src/autogluon/multimodal/optimization/lit_mmdet.py b/multimodal/src/autogluon/multimodal/optimization/lit_mmdet.py
index 4fc499cdd38..9ac18212dbc 100644
--- a/multimodal/src/autogluon/multimodal/optimization/lit_mmdet.py
+++ b/multimodal/src/autogluon/multimodal/optimization/lit_mmdet.py
@@ -230,7 +230,7 @@ def validation_step(self, batch, batch_idx, dataloader_idx=0):
         else:
             self.evaluate(batch, "val")
 
-    def validation_epoch_end(self, validation_step_outputs):
+    def on_validation_epoch_end(self):
         val_result = self.validation_metric.compute()
         if self.use_loss:
             self.log_dict({"val_direct_loss": val_result}, sync_dist=True)

From 18fbb5f095fb39df1d1cdae07d16dccf4615cca8 Mon Sep 17 00:00:00 2001
From: ddelange <14880945+ddelange@users.noreply.github.com>
Date: Thu, 4 May 2023 10:46:33 +0200
Subject: [PATCH 09/44] Remove property overwrite attempt

Can't overwrite property ref https://github.com/Lightning-AI/lightning/blob/2.0.2/src/lightning/pytorch/trainer/trainer.py#L1384

So replaced with just an early return None ref https://github.com/Lightning-AI/lightning/blob/2.0.2/src/lightning/pytorch/core/hooks.py#L48-L49
---
 multimodal/src/autogluon/multimodal/optimization/lit_module.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/multimodal/src/autogluon/multimodal/optimization/lit_module.py b/multimodal/src/autogluon/multimodal/optimization/lit_module.py
index 42a909b576d..c2f7c1a1f8e 100644
--- a/multimodal/src/autogluon/multimodal/optimization/lit_module.py
+++ b/multimodal/src/autogluon/multimodal/optimization/lit_module.py
@@ -239,13 +239,13 @@ def training_step(self, batch, batch_idx):
 
     def on_validation_start(self) -> None:
         if self.skip_final_val and self.trainer.should_stop:
-            self.trainer.val_dataloaders = []  # skip the final validation by setting val_dataloaders empty
             self.log(
                 self.validation_metric_name,
                 self.validation_metric,
                 on_step=False,
                 on_epoch=True,
             )
+            return None
         return super().on_validation_start()
 
     def validation_step(self, batch, batch_idx):

From 6e626af6a2ce70b3255ea495b07ae12325a7e92d Mon Sep 17 00:00:00 2001
From: ddelange <14880945+ddelange@users.noreply.github.com>
Date: Thu, 4 May 2023 12:12:49 +0200
Subject: [PATCH 10/44] Update k -> top_k

ref https://github.com/Lightning-AI/torchmetrics/pull/1504
---
 multimodal/tests/unittests/others/test_metrics.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/multimodal/tests/unittests/others/test_metrics.py b/multimodal/tests/unittests/others/test_metrics.py
index 5f6e296c571..e8dd9e1ac9d 100644
--- a/multimodal/tests/unittests/others/test_metrics.py
+++ b/multimodal/tests/unittests/others/test_metrics.py
@@ -90,7 +90,7 @@ def ref_symmetric_hit_rate(features_a, features_b, logit_scale, top_ks=[1, 5, 10
         )
         target = torch.eye(num_elements, dtype=bool).reshape(-1)
         for k in top_ks:
-            hr_k = RetrievalHitRate(k=k)
+            hr_k = RetrievalHitRate(top_k=k)
             hit_rate += hr_k(preds, target, indexes=indexes)
     return hit_rate / (2 * len(top_ks))
 

From 06b41f235237f893c1f2ee1d1c11bd2bbe02b652 Mon Sep 17 00:00:00 2001
From: ddelange <14880945+ddelange@users.noreply.github.com>
Date: Thu, 4 May 2023 12:14:02 +0200
Subject: [PATCH 11/44] Bump torchmetrics

---
 multimodal/setup.py | 4 +---
 1 file changed, 1 insertion(+), 3 deletions(-)

diff --git a/multimodal/setup.py b/multimodal/setup.py
index 560605f2a5e..1d8e84039b3 100644
--- a/multimodal/setup.py
+++ b/multimodal/setup.py
@@ -39,9 +39,7 @@
     "scikit-image>=0.19.1,<0.21.0",
     "pytorch-lightning>=1.9.0,<2.1",
     "text-unidecode>=1.3,<1.4",
-    # temporary for testing ref https://github.com/autogluon/autogluon/issues/2687#issuecomment-1531122312
-    # "torchmetrics>=0.11.0,<0.12.0",
-    "torchmetrics @ https://github.com/Lightning-AI/torchmetrics/archive/95599c9a7bd5e6c8f0a8039562dbbc832b434031.zip",
+    "torchmetrics~=1.0.0rc0",
     "transformers>=4.23.0,<4.27.0",
     "nptyping>=1.4.4,<2.5.0",
     "omegaconf>=2.1.1,<2.3.0",

From 6a0387e6ef8cfe838e27093831061909dd7b3985 Mon Sep 17 00:00:00 2001
From: ddelange <14880945+ddelange@users.noreply.github.com>
Date: Thu, 4 May 2023 12:24:19 +0200
Subject: [PATCH 12/44] Bump pytorch-lightning

commit https://github.com/autogluon/autogluon/pull/3190/commits/dd96a190bb5d0f5320bd51764ed91a7dbbe10e89 forces a minimum version bump of pytorch-lightning to 2.0.0 ref https://github.com/Lightning-AI/lightning/pull/16847
---
 multimodal/setup.py | 2 +-
 timeseries/setup.py | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/multimodal/setup.py b/multimodal/setup.py
index 1d8e84039b3..17d887dc64a 100644
--- a/multimodal/setup.py
+++ b/multimodal/setup.py
@@ -37,7 +37,7 @@
     "torchvision>=0.10.0,<0.16",
     "fairscale>=0.4.5,<0.4.14",
     "scikit-image>=0.19.1,<0.21.0",
-    "pytorch-lightning>=1.9.0,<2.1",
+    "pytorch-lightning>=2.0.0,<2.1",
     "text-unidecode>=1.3,<1.4",
     "torchmetrics~=1.0.0rc0",
     "transformers>=4.23.0,<4.27.0",
diff --git a/timeseries/setup.py b/timeseries/setup.py
index 52b1d608ecf..57092cfc755 100644
--- a/timeseries/setup.py
+++ b/timeseries/setup.py
@@ -30,7 +30,7 @@
     "statsmodels>=0.13.0,<0.14",
     "gluonts>=0.12.4,<0.13",
     "torch>=1.9,<2.1",
-    "pytorch-lightning>=1.9.0,<2.1",
+    "pytorch-lightning>=2.0.0,<2.1",
     "networkx",  # version range defined in `core/_setup_utils.py`
     "statsforecast>=1.4.0,<1.5",
     "mlforecast>=0.7.0,<0.8.0",

From 2e65ea8873609aab2fda92a837491344aa2697b4 Mon Sep 17 00:00:00 2001
From: ddelange <14880945+ddelange@users.noreply.github.com>
Date: Thu, 4 May 2023 13:38:12 +0200
Subject: [PATCH 13/44] Remove map.py

---
 .../automm/object_detection/detection_eval.py |   3 -
 .../object_detection/detection_train.py       |   3 -
 .../multimodal/optimization/utils.py          |   2 +-
 .../autogluon/multimodal/utils/__init__.py    |   1 -
 .../src/autogluon/multimodal/utils/map.py     | 411 ------------------
 5 files changed, 1 insertion(+), 419 deletions(-)
 delete mode 100644 multimodal/src/autogluon/multimodal/utils/map.py

diff --git a/examples/automm/object_detection/detection_eval.py b/examples/automm/object_detection/detection_eval.py
index f7b355be618..2b13a0a885d 100644
--- a/examples/automm/object_detection/detection_eval.py
+++ b/examples/automm/object_detection/detection_eval.py
@@ -10,9 +10,6 @@
     python detection_eval.py \
         --test_path ./VOCdevkit/VOC2007/Annotations/test_cocoformat.json \
         --checkpoint_name faster_rcnn_r50_fpn_1x_voc0712
-
-Note that for now it's required to install nightly build torchmetrics.
-This will be solved in next pr. (MeanAveragePrecision will be moved to AG temporarily.)
 """
 
 import argparse
diff --git a/examples/automm/object_detection/detection_train.py b/examples/automm/object_detection/detection_train.py
index ba70a75906b..16c5f505338 100644
--- a/examples/automm/object_detection/detection_train.py
+++ b/examples/automm/object_detection/detection_train.py
@@ -23,9 +23,6 @@
         --lr <learning_rate> \
         --wd <weight_decay> \
         --epochs <epochs>
-
-Note that for now it's required to install nightly build torchmetrics.
-This will be solved in next pr. (MeanAveragePrecision will be moved to AG temporarily.)
 """
 
 import argparse
diff --git a/multimodal/src/autogluon/multimodal/optimization/utils.py b/multimodal/src/autogluon/multimodal/optimization/utils.py
index e1d023c2262..916dde96463 100644
--- a/multimodal/src/autogluon/multimodal/optimization/utils.py
+++ b/multimodal/src/autogluon/multimodal/optimization/utils.py
@@ -11,6 +11,7 @@
 from pytorch_metric_learning import distances, losses, miners
 from torch import nn, optim
 from torch.nn import functional as F
+from torchmetrics.detection.mean_ap import MeanAveragePrecision
 from transformers import Adafactor
 from transformers.trainer_pt_utils import get_parameter_names
 
@@ -62,7 +63,6 @@
     ROOT_MEAN_SQUARED_ERROR,
     SPEARMANR,
 )
-from ..utils.map import MeanAveragePrecision
 from .losses import FocalLoss, MultiNegativesSoftmaxLoss, SoftTargetCrossEntropy
 from .lr_scheduler import (
     get_cosine_schedule_with_warmup,
diff --git a/multimodal/src/autogluon/multimodal/utils/__init__.py b/multimodal/src/autogluon/multimodal/utils/__init__.py
index a705a37c367..0dc0ce2ff48 100644
--- a/multimodal/src/autogluon/multimodal/utils/__init__.py
+++ b/multimodal/src/autogluon/multimodal/utils/__init__.py
@@ -46,7 +46,6 @@
 from .inference import extract_from_output, infer_batch, predict, process_batch, use_realtime
 from .load import CustomUnpickler, load_text_tokenizers
 from .log import LogFilter, apply_log_filter, get_fit_complete_message, get_fit_start_message, make_exp_dir
-from .map import MeanAveragePrecision
 from .matcher import compute_semantic_similarity, convert_data_for_ranking, create_siamese_model, semantic_search
 from .metric import compute_ranking_score, compute_score, get_minmax_mode, get_stopping_threshold, infer_metrics
 from .misc import logits_to_prob, merge_bio_format, shopee_dataset, tensor_to_ndarray, visualize_ner
diff --git a/multimodal/src/autogluon/multimodal/utils/map.py b/multimodal/src/autogluon/multimodal/utils/map.py
deleted file mode 100644
index 507aac6eddf..00000000000
--- a/multimodal/src/autogluon/multimodal/utils/map.py
+++ /dev/null
@@ -1,411 +0,0 @@
-import torchmetrics
-from packaging import version
-
-# There is a bug >=0.9, <=0.11.0
-# And the slow speed problem is still not fixed in 0.11.1
-if version.parse(torchmetrics.__version__) > version.parse("0.12.0"):
-    from torchmetrics.detection.mean_ap import MeanAveragePrecision
-else:
-    import logging
-    import sys
-    from dataclasses import dataclass
-    from typing import Any, Callable, Dict, List, Optional, Sequence, Union
-
-    import torch
-    from torch import Tensor
-    from torchmetrics.metric import Metric
-    from torchmetrics.utilities.imports import (
-        _PYCOCOTOOLS_AVAILABLE,
-        _TORCHVISION_AVAILABLE,
-        _TORCHVISION_GREATER_EQUAL_0_8,
-    )
-
-    if _TORCHVISION_AVAILABLE and _TORCHVISION_GREATER_EQUAL_0_8:
-        from torchvision.ops import box_convert
-    else:
-        box_convert = None
-
-    if _PYCOCOTOOLS_AVAILABLE:
-        from pycocotools.coco import COCO
-        from pycocotools.cocoeval import COCOeval
-    else:
-        COCO, COCOeval = None, None
-
-    log = logging.getLogger(__name__)
-
-    @dataclass
-    class MAPMetricResults:
-        """Dataclass to wrap the final mAP results."""
-
-        map: Tensor
-        map_50: Tensor
-        map_75: Tensor
-        map_small: Tensor
-        map_medium: Tensor
-        map_large: Tensor
-        mar_1: Tensor
-        mar_10: Tensor
-        mar_100: Tensor
-        mar_small: Tensor
-        mar_medium: Tensor
-        mar_large: Tensor
-        map_per_class: Tensor
-        mar_100_per_class: Tensor
-
-        def __getitem__(self, key: str) -> Union[Tensor, List[Tensor]]:
-            return getattr(self, key)
-
-    # noinspection PyMethodMayBeStatic
-    class WriteToLog:
-        """Logging class to move logs to log.debug()."""
-
-        def write(self, buf: str) -> None:  # skipcq: PY-D0003, PYL-R0201
-            for line in buf.rstrip().splitlines():
-                log.debug(line.rstrip())
-
-        def flush(self) -> None:  # skipcq: PY-D0003, PYL-R0201
-            for handler in log.handlers:
-                handler.flush()
-
-        def close(self) -> None:  # skipcq: PY-D0003, PYL-R0201
-            for handler in log.handlers:
-                handler.close()
-
-    class _hide_prints:
-        """Internal helper context to suppress the default output of the pycocotools package."""
-
-        def __init__(self) -> None:
-            self._original_stdout = None
-
-        def __enter__(self) -> None:
-            self._original_stdout = sys.stdout  # type: ignore
-            sys.stdout = WriteToLog()  # type: ignore
-
-        def __exit__(self, exc_type, exc_val, exc_tb) -> None:  # type: ignore
-            sys.stdout.close()
-            sys.stdout = self._original_stdout  # type: ignore
-
-    def _input_validator(preds: List[Dict[str, torch.Tensor]], targets: List[Dict[str, torch.Tensor]]) -> None:
-        """Ensure the correct input format of `preds` and `targets`"""
-        if not isinstance(preds, Sequence):
-            raise ValueError("Expected argument `preds` to be of type List")
-        if not isinstance(targets, Sequence):
-            raise ValueError("Expected argument `target` to be of type List")
-        if len(preds) != len(targets):
-            raise ValueError("Expected argument `preds` and `target` to have the same length")
-
-        for k in ["boxes", "scores", "labels"]:
-            if any(k not in p for p in preds):
-                raise ValueError(f"Expected all dicts in `preds` to contain the `{k}` key")
-
-        for k in ["boxes", "labels"]:
-            if any(k not in p for p in targets):
-                raise ValueError(f"Expected all dicts in `target` to contain the `{k}` key")
-
-        if any(type(pred["boxes"]) is not torch.Tensor for pred in preds):
-            raise ValueError("Expected all boxes in `preds` to be of type torch.Tensor")
-        if any(type(pred["scores"]) is not torch.Tensor for pred in preds):
-            raise ValueError("Expected all scores in `preds` to be of type torch.Tensor")
-        if any(type(pred["labels"]) is not torch.Tensor for pred in preds):
-            raise ValueError("Expected all labels in `preds` to be of type torch.Tensor")
-        if any(type(target["boxes"]) is not torch.Tensor for target in targets):
-            raise ValueError("Expected all boxes in `target` to be of type torch.Tensor")
-        if any(type(target["labels"]) is not torch.Tensor for target in targets):
-            raise ValueError("Expected all labels in `target` to be of type torch.Tensor")
-
-        for i, item in enumerate(targets):
-            if item["boxes"].size(0) != item["labels"].size(0):
-                raise ValueError(
-                    f"Input boxes and labels of sample {i} in targets have a"
-                    f" different length (expected {item['boxes'].size(0)} labels, got {item['labels'].size(0)})"
-                )
-        for i, item in enumerate(preds):
-            if item["boxes"].size(0) != item["labels"].size(0) != item["scores"].size(0):
-                raise ValueError(
-                    f"Input boxes, labels and scores of sample {i} in preds have a"
-                    f" different length (expected {item['boxes'].size(0)} labels and scores,"
-                    f" got {item['labels'].size(0)} labels and {item['scores'].size(0)})"
-                )
-
-    def _fix_empty_tensors(boxes: torch.Tensor) -> torch.Tensor:
-        """Empty tensors can cause problems in DDP mode, this methods corrects them."""
-        if boxes.numel() == 0 and boxes.ndim == 1:
-            return boxes.unsqueeze(0)
-        return boxes
-
-    class MeanAveragePrecision(Metric):
-        r"""
-        Computes the `Mean-Average-Precision (mAP) and Mean-Average-Recall (mAR)\
-        <https://jonathan-hui.medium.com/map-mean-average-precision-for-object-detection-45c121a31173>`_\
-        for object detection predictions.
-        Optionally, the mAP and mAR values can be calculated per class.
-
-        Predicted boxes and targets have to be in Pascal VOC format
-        (xmin-top left, ymin-top left, xmax-bottom right, ymax-bottom right).
-        See the :meth:`update` method for more information about the input format to this metric.
-
-        For an example on how to use this metric check the `torchmetrics examples\
-        <https://github.com/PyTorchLightning/metrics/blob/master/tm_examples/detection_map.py>`_
-
-        .. note::
-            This metric is a wrapper for the
-            `pycocotools <https://github.com/cocodataset/cocoapi/tree/master/PythonAPI/pycocotools>`_,
-            which is a standard implementation for the mAP metric for object detection. Using this metric
-            therefore requires you to have `pycocotools` installed. Please install with ``pip install pycocotools`` or
-            ``pip install torchmetrics[detection]``.
-
-        .. note::
-            This metric requires you to have `torchvision` version 0.8.0 or newer installed (with corresponding
-            version 1.7.0 of torch or newer). Please install with ``pip install torchvision`` or
-            ``pip install torchmetrics[detection]``.
-
-        .. note::
-            As the pycocotools library cannot deal with tensors directly, all results have to be transferred
-            to the CPU, this might have an performance impact on your training.
-
-        Args:
-            class_metrics:
-                Option to enable per-class metrics for mAP and mAR_100. Has a performance impact. default: False
-            dist_sync_on_step:
-                Synchronize metric state across processes at each ``forward()``
-                before returning the value at the step
-            process_group:
-                Specify the process group on which synchronization is called.
-                default: ``None`` (which selects the entire world)
-            dist_sync_fn:
-                Callback that performs the allgather operation on the metric state. When ``None``, DDP
-                will be used to perform the allgather
-
-        Raises:
-            ImportError:
-                If ``pycocotools`` is not installed
-            ImportError:
-                If ``torchvision`` is not installed or version installed is lower than 0.8.0
-            ValueError:
-                If ``class_metrics`` is not a boolean
-        """
-
-        def __init__(
-            self,
-            class_metrics: bool = False,
-            dist_sync_on_step: bool = False,
-            process_group: Optional[Any] = None,
-            dist_sync_fn: Callable = None,
-            box_format: str = None,
-            iou_type: str = None,
-        ) -> None:  # type: ignore
-            super().__init__(
-                dist_sync_on_step=dist_sync_on_step,
-                process_group=process_group,
-                dist_sync_fn=dist_sync_fn,
-            )
-
-            if not _PYCOCOTOOLS_AVAILABLE:
-                raise ImportError(
-                    "`MAP` metric requires that `pycocotools` installed."
-                    " Please install with `pip install pycocotools` or `pip install torchmetrics[detection]`"
-                )
-            if not (_TORCHVISION_AVAILABLE and _TORCHVISION_GREATER_EQUAL_0_8):
-                raise ImportError(
-                    "`MAP` metric requires that `torchvision` version 0.8.0 or newer is installed."
-                    " Please install with `pip install torchvision` or `pip install torchmetrics[detection]`"
-                )
-
-            if not isinstance(class_metrics, bool):
-                raise ValueError("Expected argument `class_metrics` to be a boolean")
-            self.class_metrics = class_metrics
-
-            self.add_state("detection_boxes", default=[], dist_reduce_fx=None)
-            self.add_state("detection_scores", default=[], dist_reduce_fx=None)
-            self.add_state("detection_labels", default=[], dist_reduce_fx=None)
-            self.add_state("groundtruth_boxes", default=[], dist_reduce_fx=None)
-            self.add_state("groundtruth_labels", default=[], dist_reduce_fx=None)
-
-        def update(self, preds: List[Dict[str, Tensor]], target: List[Dict[str, Tensor]]) -> None:  # type: ignore
-            """Add detections and groundtruth to the metric.
-
-            Args:
-                preds: A list consisting of dictionaries each containing the key-values\
-                (each dictionary corresponds to a single image):
-                - ``boxes``: torch.FloatTensor of shape
-                    [num_boxes, 4] containing `num_boxes` detection boxes of the format
-                    [xmin, ymin, xmax, ymax] in absolute image coordinates.
-                - ``scores``: torch.FloatTensor of shape
-                    [num_boxes] containing detection scores for the boxes.
-                - ``labels``: torch.IntTensor of shape
-                    [num_boxes] containing 0-indexed detection classes for the boxes.
-
-                target: A list consisting of dictionaries each containing the key-values\
-                (each dictionary corresponds to a single image):
-                - ``boxes``: torch.FloatTensor of shape
-                    [num_boxes, 4] containing `num_boxes` groundtruth boxes of the format
-                    [xmin, ymin, xmax, ymax] in absolute image coordinates.
-                - ``labels``: torch.IntTensor of shape
-                    [num_boxes] containing 1-indexed groundtruth classes for the boxes.
-
-            Raises:
-                ValueError:
-                    If ``preds`` is not of type List[Dict[str, torch.Tensor]]
-                ValueError:
-                    If ``target`` is not of type List[Dict[str, torch.Tensor]]
-                ValueError:
-                    If ``preds`` and ``target`` are not of the same length
-                ValueError:
-                    If any of ``preds.boxes``, ``preds.scores``
-                    and ``preds.labels`` are not of the same length
-                ValueError:
-                    If any of ``target.boxes`` and ``target.labels`` are not of the same length
-                ValueError:
-                    If any box is not type float and of length 4
-                ValueError:
-                    If any class is not type int and of length 1
-                ValueError:
-                    If any score is not type float and of length 1
-            """
-            _input_validator(preds, target)
-
-            for item in preds:
-                self.detection_boxes.append(_fix_empty_tensors(item["boxes"]))
-                self.detection_scores.append(item["scores"])
-                self.detection_labels.append(item["labels"])
-
-            for item in target:
-                self.groundtruth_boxes.append(_fix_empty_tensors(item["boxes"]))
-                self.groundtruth_labels.append(item["labels"])
-
-        def compute(self) -> dict:
-            """Compute the `Mean-Average-Precision (mAP) and Mean-Average-Recall (mAR)` scores. All detections added in
-            the `update()` method are included.
-
-            Note:
-                Main `map` score is calculated with @[ IoU=0.50:0.95 | area=all | maxDets=100 ]
-
-            Returns:
-                dict containing
-
-                - map: ``torch.Tensor``
-                - map_50: ``torch.Tensor``
-                - map_75: ``torch.Tensor``
-                - map_small: ``torch.Tensor``
-                - map_medium: ``torch.Tensor``
-                - map_large: ``torch.Tensor``
-                - mar_1: ``torch.Tensor``
-                - mar_10: ``torch.Tensor``
-                - mar_100: ``torch.Tensor``
-                - mar_small: ``torch.Tensor``
-                - mar_medium: ``torch.Tensor``
-                - mar_large: ``torch.Tensor``
-                - map_per_class: ``torch.Tensor`` (-1 if class metrics are disabled)
-                - mar_100_per_class: ``torch.Tensor`` (-1 if class metrics are disabled)
-            """
-            coco_target, coco_preds = COCO(), COCO()
-            coco_target.dataset = self._get_coco_format(self.groundtruth_boxes, self.groundtruth_labels)
-            coco_preds.dataset = self._get_coco_format(
-                self.detection_boxes, self.detection_labels, self.detection_scores
-            )
-
-            with _hide_prints():
-                coco_target.createIndex()
-                coco_preds.createIndex()
-                coco_eval = COCOeval(coco_target, coco_preds, "bbox")
-                coco_eval.evaluate()
-                coco_eval.accumulate()
-                coco_eval.summarize()
-                stats = coco_eval.stats
-
-            map_per_class_values: Tensor = torch.Tensor([-1])
-            mar_100_per_class_values: Tensor = torch.Tensor([-1])
-            # if class mode is enabled, evaluate metrics per class
-            if self.class_metrics:
-                map_per_class_list = []
-                mar_100_per_class_list = []
-                for class_id in self._get_classes():
-                    coco_eval.params.catIds = [class_id]
-                    with _hide_prints():
-                        coco_eval.evaluate()
-                        coco_eval.accumulate()
-                        coco_eval.summarize()
-                        class_stats = coco_eval.stats
-
-                    map_per_class_list.append(torch.Tensor([class_stats[0]]))
-                    mar_100_per_class_list.append(torch.Tensor([class_stats[8]]))
-                map_per_class_values = torch.Tensor(map_per_class_list)
-                mar_100_per_class_values = torch.Tensor(mar_100_per_class_list)
-
-            metrics = MAPMetricResults(
-                map=torch.Tensor([stats[0]]),
-                map_50=torch.Tensor([stats[1]]),
-                map_75=torch.Tensor([stats[2]]),
-                map_small=torch.Tensor([stats[3]]),
-                map_medium=torch.Tensor([stats[4]]),
-                map_large=torch.Tensor([stats[5]]),
-                mar_1=torch.Tensor([stats[6]]),
-                mar_10=torch.Tensor([stats[7]]),
-                mar_100=torch.Tensor([stats[8]]),
-                mar_small=torch.Tensor([stats[9]]),
-                mar_medium=torch.Tensor([stats[10]]),
-                mar_large=torch.Tensor([stats[11]]),
-                map_per_class=map_per_class_values,
-                mar_100_per_class=mar_100_per_class_values,
-            )
-            return metrics.__dict__
-
-        def _get_coco_format(
-            self, boxes: List[torch.Tensor], labels: List[torch.Tensor], scores: Optional[List[torch.Tensor]] = None
-        ) -> Dict:
-            """Transforms and returns all cached targets or predictions in COCO format.
-
-            Format is defined at https://cocodataset.org/#format-data
-            """
-            images = []
-            annotations = []
-            annotation_id = 1  # has to start with 1, otherwise COCOEval results are wrong
-
-            boxes = [
-                box_convert(box, in_fmt="xyxy", out_fmt="xywh") if box.ndim > 1 and box.size(1) == 4 else box
-                for box in boxes
-            ]
-            for image_id, (image_boxes, image_labels) in enumerate(zip(boxes, labels)):
-                image_boxes = image_boxes.cpu().tolist()
-                image_labels = image_labels.cpu().tolist()
-
-                images.append({"id": image_id})
-                for k, (image_box, image_label) in enumerate(zip(image_boxes, image_labels)):
-                    if len(image_box) != 4:
-                        raise ValueError(
-                            f"Invalid input box of sample {image_id}, element {k} (expected 4 values, got {len(image_box)})"
-                        )
-
-                    if type(image_label) != int:
-                        raise ValueError(
-                            f"Invalid input class of sample {image_id}, element {k}"
-                            f" (expected value of type integer, got type {type(image_label)})"
-                        )
-
-                    annotation = {
-                        "id": annotation_id,
-                        "image_id": image_id,
-                        "bbox": image_box,
-                        "category_id": image_label,
-                        "area": image_box[2] * image_box[3],
-                        "iscrowd": 0,
-                    }
-                    if scores is not None:
-                        score = scores[image_id][k].cpu().tolist()
-                        if type(score) != float:
-                            raise ValueError(
-                                f"Invalid input score of sample {image_id}, element {k}"
-                                f" (expected value of type float, got type {type(score)})"
-                            )
-                        annotation["score"] = score
-                    annotations.append(annotation)
-                    annotation_id += 1
-
-            classes = [{"id": i, "name": str(i)} for i in self._get_classes()]
-            return {"images": images, "annotations": annotations, "categories": classes}
-
-        def _get_classes(self) -> list:
-            """Get list of unique classes depending on groundtruth_labels and detection_labels."""
-            if len(self.detection_labels) > 0 or len(self.groundtruth_labels) > 0:
-                return torch.cat(self.detection_labels + self.groundtruth_labels).unique().cpu().tolist()
-            return []

From ef9fb68da31718248cbd4ef416286e397b1f4359 Mon Sep 17 00:00:00 2001
From: ddelange <14880945+ddelange@users.noreply.github.com>
Date: Thu, 4 May 2023 14:17:31 +0200
Subject: [PATCH 14/44] Remove map.py leftover

---
 multimodal/src/autogluon/multimodal/utils/object_detection.py | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/multimodal/src/autogluon/multimodal/utils/object_detection.py b/multimodal/src/autogluon/multimodal/utils/object_detection.py
index d2ddf738477..edfb5ad6b44 100644
--- a/multimodal/src/autogluon/multimodal/utils/object_detection.py
+++ b/multimodal/src/autogluon/multimodal/utils/object_detection.py
@@ -807,8 +807,7 @@ def cocoeval_torchmetrics(outputs: List):
     """
 
     import torch
-
-    from . import MeanAveragePrecision
+    from torchmetrics.detection.mean_ap import MeanAveragePrecision
 
     map_metric = MeanAveragePrecision(box_format="xyxy", iou_type="bbox", class_metrics=False)
 

From cdbd78e837083b98bc1e866a929924237880e383 Mon Sep 17 00:00:00 2001
From: ddelange <14880945+ddelange@users.noreply.github.com>
Date: Fri, 5 May 2023 07:31:32 +0200
Subject: [PATCH 15/44] Omit classes key from log_dict call

ref https://github.com/Lightning-AI/torchmetrics/commit/ac64e630148671b2ce03855713562cbd1f32fd3e
---
 multimodal/src/autogluon/multimodal/optimization/lit_mmdet.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/multimodal/src/autogluon/multimodal/optimization/lit_mmdet.py b/multimodal/src/autogluon/multimodal/optimization/lit_mmdet.py
index 9ac18212dbc..62ef4aff54d 100644
--- a/multimodal/src/autogluon/multimodal/optimization/lit_mmdet.py
+++ b/multimodal/src/autogluon/multimodal/optimization/lit_mmdet.py
@@ -236,6 +236,7 @@ def on_validation_epoch_end(self):
             self.log_dict({"val_direct_loss": val_result}, sync_dist=True)
         else:
             # TODO: add mAP/mAR_per_class
+            val_result.pop("classes", None)  # introduced in torchmetrics v1.0.0
             mAPs = {"val_" + k: v for k, v in val_result.items()}
             mAPs["val_mAP"] = mAPs["val_map"]
             self.log_dict(mAPs, sync_dist=True)

From 4c94b714c5b78b13c7f44624c80c98b524cde003 Mon Sep 17 00:00:00 2001
From: ddelange <14880945+ddelange@users.noreply.github.com>
Date: Fri, 5 May 2023 08:38:47 +0200
Subject: [PATCH 16/44] Sync torch and torchvision versions with
 pytorch-lightning

ref https://github.com/Lightning-AI/lightning/blob/2.0.2/requirements/pytorch/base.txt#L5
ref https://github.com/autogluon/autogluon/pull/3190#discussion_r1185718069
---
 multimodal/setup.py | 4 ++--
 tabular/setup.py    | 2 +-
 timeseries/setup.py | 2 +-
 3 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/multimodal/setup.py b/multimodal/setup.py
index 17d887dc64a..596cfcbc81c 100644
--- a/multimodal/setup.py
+++ b/multimodal/setup.py
@@ -33,8 +33,8 @@
     "evaluate>=0.2.2,<0.4.0",
     "accelerate>=0.9,<0.17",
     "timm>=0.6.12,<0.7.0",
-    "torch>=1.9,<2.1",
-    "torchvision>=0.10.0,<0.16",
+    "torch>=1.11,<2.1",
+    "torchvision>=0.12.0,<0.16",
     "fairscale>=0.4.5,<0.4.14",
     "scikit-image>=0.19.1,<0.21.0",
     "pytorch-lightning>=2.0.0,<2.1",
diff --git a/tabular/setup.py b/tabular/setup.py
index 17dcc42b0e6..9f5e55edf5f 100644
--- a/tabular/setup.py
+++ b/tabular/setup.py
@@ -43,7 +43,7 @@
         'xgboost>=1.6,<1.8',
     ],
     'fastai': [
-        'torch>=1.9,<2.1',
+        'torch>=1.11,<2.1',
         'fastai>=2.3.1,<2.8',
     ],
     'ray': [
diff --git a/timeseries/setup.py b/timeseries/setup.py
index 57092cfc755..0c0c4c81014 100644
--- a/timeseries/setup.py
+++ b/timeseries/setup.py
@@ -29,7 +29,7 @@
     "pandas",  # version range defined in `core/_setup_utils.py`
     "statsmodels>=0.13.0,<0.14",
     "gluonts>=0.12.4,<0.13",
-    "torch>=1.9,<2.1",
+    "torch>=1.11,<2.1",
     "pytorch-lightning>=2.0.0,<2.1",
     "networkx",  # version range defined in `core/_setup_utils.py`
     "statsforecast>=1.4.0,<1.5",

From 88d06643d520f7daec300b002429cad4485a860b Mon Sep 17 00:00:00 2001
From: ddelange <14880945+ddelange@users.noreply.github.com>
Date: Sat, 6 May 2023 09:43:24 +0200
Subject: [PATCH 17/44] Remove track_grad_norm references

https://github.com/autogluon/autogluon/pull/3190#discussion_r1186631732
---
 .../advanced_topics/customization.ipynb       | 25 +------------------
 1 file changed, 1 insertion(+), 24 deletions(-)

diff --git a/docs/tutorials/multimodal/advanced_topics/customization.ipynb b/docs/tutorials/multimodal/advanced_topics/customization.ipynb
index c0bd10d9886..073bffd82cb 100644
--- a/docs/tutorials/multimodal/advanced_topics/customization.ipynb
+++ b/docs/tutorials/multimodal/advanced_topics/customization.ipynb
@@ -360,29 +360,6 @@
     "```\n"
    ]
   },
-  {
-   "cell_type": "markdown",
-   "id": "02d07866",
-   "metadata": {},
-   "source": [
-    "### optimization.track_grad_norm\n",
-    "\n",
-    "Track the p-norm of gradients during training. May be set to ‘inf’ infinity-norm. If using Automatic Mixed Precision (AMP), the gradients will be unscaled before logging them."
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "id": "1b60c371",
-   "metadata": {},
-   "source": [
-    "```\n",
-    "# default used by AutoMM (no tracking)\n",
-    "predictor.fit(hyperparameters={\"optimization.track_grad_norm\": -1})\n",
-    "# track the 2-norm\n",
-    "predictor.fit(hyperparameters={\"optimization.track_grad_norm\": 2})\n",
-    "```\n"
-   ]
-  },
   {
    "cell_type": "markdown",
    "id": "abe87d32",
@@ -1574,4 +1551,4 @@
  },
  "nbformat": 4,
  "nbformat_minor": 5
-}
\ No newline at end of file
+}

From 43e4e1674fac889b316015b9f83442ce27931e9b Mon Sep 17 00:00:00 2001
From: ddelange <14880945+ddelange@users.noreply.github.com>
Date: Fri, 26 May 2023 07:21:19 +0200
Subject: [PATCH 18/44] Fix catboost installation error for Github macos
 runners

- https://github.com/autogluon/autogluon/pull/3190#issuecomment-1540599280
- https://github.com/catboost/catboost/issues/2371
- https://github.com/actions/setup-python/issues/654#issuecomment-1533455583
---
 tabular/setup.py | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/tabular/setup.py b/tabular/setup.py
index 9f5e55edf5f..5c33f252ada 100644
--- a/tabular/setup.py
+++ b/tabular/setup.py
@@ -34,6 +34,8 @@
         'lightgbm>=3.3,<3.4',
     ],
     'catboost': [
+        # https://github.com/autogluon/autogluon/pull/3190#issuecomment-1540599280
+        'catboost>=1.1,<1.2 ; python_version <= "3.8" and sys_platform == "darwin"',
         'catboost>=1.1,<1.3',
     ],
     # FIXME: Debug why xgboost 1.6 has 4x+ slower inference on multiclass datasets compared to 1.4

From 6f1935dbb33ea44bda69bbd236d78c1a62b555d7 Mon Sep 17 00:00:00 2001
From: ddelange <14880945+ddelange@users.noreply.github.com>
Date: Fri, 16 Jun 2023 10:25:01 +0200
Subject: [PATCH 19/44] Remove catboost hotfix

3.8.17 github runner was released ref https://github.com/actions/python-versions/blob/225ba42747d0f5e3dbd90ba15b9c7409a4b8c735/versions-manifest.json#L3370-L3396
this is built on macos 11, so it is properly detected
---
 tabular/setup.py | 3 ---
 1 file changed, 3 deletions(-)

diff --git a/tabular/setup.py b/tabular/setup.py
index 42ed04df9d5..6dab8797a25 100644
--- a/tabular/setup.py
+++ b/tabular/setup.py
@@ -34,9 +34,6 @@
         'lightgbm>=3.3,<3.4',
     ],
     'catboost': [
-        # catboost>=1.2 no longer has macosx 10.x wheels available, which are needed for Github CI on older Python versions
-        # https://github.com/autogluon/autogluon/pull/3190#issuecomment-1540599280
-        'catboost>=1.1,<1.2 ; python_version <= "3.8" and sys_platform == "darwin"',
         'catboost>=1.1,<1.3',
     ],
     # FIXME: Debug why xgboost 1.6 has 4x+ slower inference on multiclass datasets compared to 1.4

From ac8f1288dfd981f9668e57e00c10bc49bd949a6f Mon Sep 17 00:00:00 2001
From: ddelange <14880945+ddelange@users.noreply.github.com>
Date: Mon, 19 Jun 2023 10:27:15 +0200
Subject: [PATCH 20/44] Bump onnx to 0.15.x

ERROR: Could not find a version that satisfies the requirement onnxruntime-gpu<1.14.0,>=1.13.0; platform_system != "Darwin" and extra == "tests" (from autogluon-multimodal[tests]) (from versions: 1.15.0, 1.15.1)

ERROR: No matching distribution found for onnxruntime-gpu<1.14.0,>=1.13.0; platform_system != "Darwin" and extra == "tests"
---
 multimodal/setup.py | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/multimodal/setup.py b/multimodal/setup.py
index a417d6a4a88..e6944d1ca40 100644
--- a/multimodal/setup.py
+++ b/multimodal/setup.py
@@ -65,9 +65,9 @@
         "black>=22.3,<23.0",
         "isort>=5.10",
         "datasets>=2.3.2,<=2.3.2",
-        "onnx>=1.13.0,<1.14.0",
-        "onnxruntime>=1.13.0,<1.14.0;platform_system=='Darwin'",
-        "onnxruntime-gpu>=1.13.0,<1.14.0;platform_system!='Darwin'",
+        "onnx>=1.15.0,<1.16.0",
+        "onnxruntime>=1.15.0,<1.16.0;platform_system=='Darwin'",
+        "onnxruntime-gpu>=1.15.0,<1.16.0;platform_system!='Darwin'",
         "tensorrt>=8.5.3.1,<8.5.4;platform_system=='Linux'",
     ]
 }

From 8e7d838e2eea3682d3e21b7e1e215f51f294feba Mon Sep 17 00:00:00 2001
From: ddelange <14880945+ddelange@users.noreply.github.com>
Date: Mon, 19 Jun 2023 10:31:08 +0200
Subject: [PATCH 21/44] Revert onnx version bump

Only onnxruntime 0.15 was released, onnx is still on 0.14
---
 multimodal/setup.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/multimodal/setup.py b/multimodal/setup.py
index e6944d1ca40..f011ef02ef2 100644
--- a/multimodal/setup.py
+++ b/multimodal/setup.py
@@ -65,7 +65,7 @@
         "black>=22.3,<23.0",
         "isort>=5.10",
         "datasets>=2.3.2,<=2.3.2",
-        "onnx>=1.15.0,<1.16.0",
+        "onnx>=1.14.0,<1.15.0",
         "onnxruntime>=1.15.0,<1.16.0;platform_system=='Darwin'",
         "onnxruntime-gpu>=1.15.0,<1.16.0;platform_system!='Darwin'",
         "tensorrt>=8.5.3.1,<8.5.4;platform_system=='Linux'",

From 8024a1b29887963612a09e42ac2d6d08d83cd498 Mon Sep 17 00:00:00 2001
From: ddelange <14880945+ddelange@users.noreply.github.com>
Date: Mon, 19 Jun 2023 10:34:10 +0200
Subject: [PATCH 22/44] Bump tensorrt for cp311 compatibility

---
 multimodal/setup.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/multimodal/setup.py b/multimodal/setup.py
index f011ef02ef2..bbe31f9ad16 100644
--- a/multimodal/setup.py
+++ b/multimodal/setup.py
@@ -68,7 +68,7 @@
         "onnx>=1.14.0,<1.15.0",
         "onnxruntime>=1.15.0,<1.16.0;platform_system=='Darwin'",
         "onnxruntime-gpu>=1.15.0,<1.16.0;platform_system!='Darwin'",
-        "tensorrt>=8.5.3.1,<8.5.4;platform_system=='Linux'",
+        "tensorrt>=8.6.1,<8.7.0;platform_system=='Linux'",
     ]
 }
 

From cb98de7834ceae9ad0361e979a521406f98d61df Mon Sep 17 00:00:00 2001
From: ddelange <14880945+ddelange@users.noreply.github.com>
Date: Mon, 19 Jun 2023 11:18:15 +0200
Subject: [PATCH 23/44] Update datasets and evaluate

ref https://github.com/huggingface/datasets/commit/bde7504fbafa9a0cc9ae847ed55aafd4c0dbc9de
---
 multimodal/setup.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/multimodal/setup.py b/multimodal/setup.py
index bbe31f9ad16..07810317681 100644
--- a/multimodal/setup.py
+++ b/multimodal/setup.py
@@ -30,7 +30,7 @@
     "requests>=2.21,<3",
     "jsonschema>=4.14,<4.18",
     "seqeval>=1.2.2,<1.3.0",
-    "evaluate>=0.2.2,<0.4.0",
+    "evaluate>=0.4.0,<0.5.0",
     "accelerate>=0.9,<0.17",
     "timm>=0.9.2,<0.10.0",
     "torch>=1.11,<2.1",
@@ -64,7 +64,7 @@
     "tests": [
         "black>=22.3,<23.0",
         "isort>=5.10",
-        "datasets>=2.3.2,<=2.3.2",
+        "datasets>=2.13.0,<=2.14.0",
         "onnx>=1.14.0,<1.15.0",
         "onnxruntime>=1.15.0,<1.16.0;platform_system=='Darwin'",
         "onnxruntime-gpu>=1.15.0,<1.16.0;platform_system!='Darwin'",

From 75c13d023a3259065dfd71294d19e9f16253f84c Mon Sep 17 00:00:00 2001
From: ddelange <14880945+ddelange@users.noreply.github.com>
Date: Mon, 19 Jun 2023 11:21:00 +0200
Subject: [PATCH 24/44] Fix typo on version range

---
 multimodal/setup.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/multimodal/setup.py b/multimodal/setup.py
index 07810317681..006f0818150 100644
--- a/multimodal/setup.py
+++ b/multimodal/setup.py
@@ -64,7 +64,7 @@
     "tests": [
         "black>=22.3,<23.0",
         "isort>=5.10",
-        "datasets>=2.13.0,<=2.14.0",
+        "datasets>=2.13.0,<2.14.0",
         "onnx>=1.14.0,<1.15.0",
         "onnxruntime>=1.15.0,<1.16.0;platform_system=='Darwin'",
         "onnxruntime-gpu>=1.15.0,<1.16.0;platform_system!='Darwin'",

From a4fabd7169bf4d858eed3e4655ed3b75868b844b Mon Sep 17 00:00:00 2001
From: ddelange <14880945+ddelange@users.noreply.github.com>
Date: Mon, 3 Jul 2023 09:25:36 +0200
Subject: [PATCH 25/44] Bump ray version

pytorch_lightning 2.0 support added in 2.5 https://github.com/ray-project/ray/pull/34967
ray_lightning deprecated https://github.com/ray-project/ray/pull/36400
---
 .../src/autogluon/common/utils/try_import.py  | 57 +------------------
 core/setup.py                                 |  5 +-
 2 files changed, 5 insertions(+), 57 deletions(-)

diff --git a/common/src/autogluon/common/utils/try_import.py b/common/src/autogluon/common/utils/try_import.py
index e0b619bb611..970c00bcaa3 100644
--- a/common/src/autogluon/common/utils/try_import.py
+++ b/common/src/autogluon/common/utils/try_import.py
@@ -30,13 +30,13 @@ def try_import_mxboard():
 
 
 def try_import_ray() -> ModuleType:
-    RAY_MAX_VERSION = "2.4.0"
+    RAY_MAX_VERSION = "2.7.0"  # sync with core/setup.py
     ray_max_version_os_map = dict(
         Darwin=RAY_MAX_VERSION,
         Windows=RAY_MAX_VERSION,
         Linux=RAY_MAX_VERSION,
     )
-    ray_min_version = "2.2.0"
+    ray_min_version = "2.5.1"
     current_os = platform.system()
     ray_max_version = ray_max_version_os_map.get(current_os, RAY_MAX_VERSION)
     try:
@@ -60,57 +60,6 @@ def try_import_ray() -> ModuleType:
         )
 
 
-def try_import_ray_lightning():
-    """This function tries to import ray lightning and check if the compatible pytorch lightning version is installed"""
-    supported_ray_lightning_min_version = "0.2.0"
-    supported_ray_lightning_max_version = "0.3.0"
-    ray_lightning_torch_lightning_compatibility_map = {
-        "0.2.x": "1.5.x",
-    }
-    ray_lightining_torch_lightning_compatibility_range_map = {
-        ("0.2.0", "0.3.0"): ("1.5.0", "1.6.0"),
-    }
-    try:
-        import pkg_resources
-        import pytorch_lightning
-        import ray_lightning
-        from packaging import version
-
-        ray_lightning_version = pkg_resources.get_distribution("ray_lightning").version  # ray_lightning doesn't have __version__...
-
-        if not (
-            version.parse(supported_ray_lightning_min_version) <= version.parse(ray_lightning_version) < version.parse(supported_ray_lightning_max_version)
-        ):
-            logger.log(
-                f"ray_lightning=={ray_lightning_version} detected. "
-                f"{supported_ray_lightning_min_version} <= ray_lighting < {supported_ray_lightning_max_version} is required."
-                "You can use pip to install certain version of ray_lightning."
-                f"Supported ray_lightning versions and the compatible torch lightning versions are {ray_lightning_torch_lightning_compatibility_map}."
-            )
-            return False
-
-        for ray_lightning_versions, torch_lightning_versions in ray_lightining_torch_lightning_compatibility_range_map.items():
-            ray_lightning_min_version, ray_lightning_max_version = ray_lightning_versions
-            torch_lightning_min_version, torch_lightning_max_version = torch_lightning_versions
-            if version.parse(ray_lightning_min_version) <= version.parse(ray_lightning_version) < version.parse(ray_lightning_max_version):
-                if not (
-                    version.parse(torch_lightning_min_version) <= version.parse(pytorch_lightning.__version__) < version.parse(torch_lightning_max_version)
-                ):
-                    logger.log(
-                        f"Found ray_lightning {ray_lightning_version} that's not compatible with pytorch_lightning."
-                        f"The compatible version of pytorch_lightning is >= {torch_lightning_min_version} and < {torch_lightning_max_version}."
-                    )
-                    return False
-        return True
-
-    except ImportError:
-        logger.info(
-            "You can enable each individual trial using multiple gpus by installing ray_lightning."
-            f"Supported ray_lightning versions and the compatible torch lightning versions are {ray_lightning_torch_lightning_compatibility_map}."
-        )
-        return False
-
-
 def try_import_catboost():
     try:
         import catboost
@@ -175,7 +124,7 @@ def try_import_torch():
         import torch
     except ImportError as e:
         raise ImportError(
-            "Unable to import dependency torch\n" "A quick tip is to install via `pip install torch`.\n" "The minimum torch version is currently 1.6."
+            "Unable to import dependency torch\n" "A quick tip is to install via `pip install torch`.\n" "The minimum torch version is currently 1.11."
         )
 
 
diff --git a/core/setup.py b/core/setup.py
index a6535b43ada..779743315e8 100644
--- a/core/setup.py
+++ b/core/setup.py
@@ -47,7 +47,7 @@
 
 extras_require = {
     "ray": [
-        "ray[default]>=2.3,<2.4",
+        "ray[default]>=2.5.1,<2.7",
         # https://github.com/grpc/grpc/issues/31885
         # version range set to align with ray's updated version range:
         # https://github.com/ray-project/ray/blob/master/python/setup.py#L259-L261
@@ -56,7 +56,7 @@
         "pydantic>=1.10.4,<2.0",  # https://github.com/ray-project/ray/issues/36990
     ],
     "raytune": [
-        "ray[tune]>=2.3,<2.4",
+        "ray[tune]>=2.5.1,<2.7",
         # TODO: consider alternatives as hyperopt is not actively maintained.
         "hyperopt>=0.2.7,<0.2.8",  # This is needed for the bayes search to work.
         # 'GPy>=1.10.0,<1.11.0'  # TODO: Enable this once PBT/PB2 are supported by ray lightning
@@ -68,7 +68,6 @@
     "types-requests",
     "types-setuptools",
     "pytest-mypy",
-    # TODO(Re-enable ray_lightning once it released 0.3.0) 'ray_lightning>=0.2.0,<0.3.0'
 ]
 
 all_requires = []

From 060a6cdf451fee0c184c62a651f44572708b31bd Mon Sep 17 00:00:00 2001
From: ddelange <14880945+ddelange@users.noreply.github.com>
Date: Sun, 23 Jul 2023 17:16:42 +0200
Subject: [PATCH 26/44] Undo deletion from a merge commit

---
 .github/workflow_scripts/env_setup.sh | 1 +
 1 file changed, 1 insertion(+)

diff --git a/.github/workflow_scripts/env_setup.sh b/.github/workflow_scripts/env_setup.sh
index 2342574df0e..72817f423e4 100644
--- a/.github/workflow_scripts/env_setup.sh
+++ b/.github/workflow_scripts/env_setup.sh
@@ -21,6 +21,7 @@ function setup_torch_gpu {
 }
 
 function setup_torch_cpu {
+    PIP_EXTRA_INDEX_URL=https://download.pytorch.org/whl/cpu reinstall_torch
 }
 
 function setup_torch_gpu_non_linux {

From ca8d8ef22c1ea5d53e38812c4486e82425c9b907 Mon Sep 17 00:00:00 2001
From: ddelange <14880945+ddelange@users.noreply.github.com>
Date: Sun, 23 Jul 2023 18:00:59 +0200
Subject: [PATCH 27/44] Unify torchmetrics version notation

---
 multimodal/setup.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/multimodal/setup.py b/multimodal/setup.py
index 8f3fbe1a07f..ad82fd91d00 100644
--- a/multimodal/setup.py
+++ b/multimodal/setup.py
@@ -38,7 +38,7 @@
     "scikit-image>=0.19.1,<0.21.0",
     "pytorch-lightning>=2.0.0,<2.1",
     "text-unidecode>=1.3,<1.4",
-    "torchmetrics~=1.0.0rc0",
+    "torchmetrics>=1.0.0,<1.1.0",
     "transformers[sentencepiece]>=4.23.0,<4.27.0",
     "nptyping>=1.4.4,<2.5.0",
     "omegaconf>=2.1.1,<2.3.0",

From f0cbf1ca385c4ce85569c934d4b2eb98ae63c1ac Mon Sep 17 00:00:00 2001
From: ddelange <14880945+ddelange@users.noreply.github.com>
Date: Tue, 8 Aug 2023 13:20:28 +0200
Subject: [PATCH 28/44] Revert merge remnant

---
 timeseries/setup.py | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/timeseries/setup.py b/timeseries/setup.py
index 439a02606c2..ea9d933b1e8 100644
--- a/timeseries/setup.py
+++ b/timeseries/setup.py
@@ -51,9 +51,10 @@
         "isort>=5.10",
         "black>=22.3,<23.0",
     ],
-    "all": [],
 }
 
+extras_require["all"] = []
+
 install_requires = ag.get_dependency_version_ranges(install_requires)
 
 if __name__ == "__main__":

From 983415b1001a8bb7805e1e8414d7e00f2fa279e5 Mon Sep 17 00:00:00 2001
From: ddelange <14880945+ddelange@users.noreply.github.com>
Date: Tue, 8 Aug 2023 13:26:08 +0200
Subject: [PATCH 29/44] Revert lower bound bumps

datasets needs a lower bound bump due to https://github.com/huggingface/datasets/commit/bde7504fbafa9a0cc9ae847ed55aafd4c0dbc9de
---
 core/setup.py       |  4 ++--
 multimodal/setup.py | 12 ++++++------
 2 files changed, 8 insertions(+), 8 deletions(-)

diff --git a/core/setup.py b/core/setup.py
index 779743315e8..30655fa576e 100644
--- a/core/setup.py
+++ b/core/setup.py
@@ -47,7 +47,7 @@
 
 extras_require = {
     "ray": [
-        "ray[default]>=2.5.1,<2.7",
+        "ray[default]>=2.3,<2.7",
         # https://github.com/grpc/grpc/issues/31885
         # version range set to align with ray's updated version range:
         # https://github.com/ray-project/ray/blob/master/python/setup.py#L259-L261
@@ -56,7 +56,7 @@
         "pydantic>=1.10.4,<2.0",  # https://github.com/ray-project/ray/issues/36990
     ],
     "raytune": [
-        "ray[tune]>=2.5.1,<2.7",
+        "ray[tune]>=2.3,<2.7",
         # TODO: consider alternatives as hyperopt is not actively maintained.
         "hyperopt>=0.2.7,<0.2.8",  # This is needed for the bayes search to work.
         # 'GPy>=1.10.0,<1.11.0'  # TODO: Enable this once PBT/PB2 are supported by ray lightning
diff --git a/multimodal/setup.py b/multimodal/setup.py
index 8007923f51d..0cd407a5e29 100644
--- a/multimodal/setup.py
+++ b/multimodal/setup.py
@@ -32,7 +32,7 @@
     "requests>=2.21,<3",
     "jsonschema>=4.14,<4.18",
     "seqeval>=1.2.2,<1.3.0",
-    "evaluate>=0.4.0,<0.5.0",
+    "evaluate>=0.2.2,<0.5.0",
     "accelerate>=0.9,<0.17",
     "timm>=0.9.2,<0.10.0",
     "torchvision>=0.14.0,<0.16.0",  # torch 1.13 requires torchvision 0.14. Increase it to 0.15 when dropping the support of torch 1.13.
@@ -60,11 +60,11 @@
 tests_require = [
     "black>=22.3,<23.0",
     "isort>=5.10",
-    "datasets>=2.13.0,<2.14.0",
-    "onnx>=1.14.0,<1.15.0",
-    "onnxruntime>=1.15.0,<1.16.0;platform_system=='Darwin'",
-    "onnxruntime-gpu>=1.15.0,<1.16.0;platform_system!='Darwin'",
-    "tensorrt>=8.6.1,<8.7.0;platform_system=='Linux'",
+    "datasets>=2.7.0,<2.14.0",
+    "onnx>=1.13.0,<1.15.0",
+    "onnxruntime>=1.13.0,<1.16.0;platform_system=='Darwin'",
+    "onnxruntime-gpu>=1.13.0,<1.16.0;platform_system!='Darwin'",
+    "tensorrt>=8.5.3.1,<8.7.0;platform_system=='Linux'",
 ]
 
 extras_require = {

From 6f1d2fd987b56670719a30b9c9be2e56893745bf Mon Sep 17 00:00:00 2001
From: ddelange <14880945+ddelange@users.noreply.github.com>
Date: Thu, 24 Aug 2023 22:21:01 +0200
Subject: [PATCH 30/44] Update torch version in error message

---
 common/src/autogluon/common/utils/try_import.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/common/src/autogluon/common/utils/try_import.py b/common/src/autogluon/common/utils/try_import.py
index 17875767b2a..dea3d3628b2 100644
--- a/common/src/autogluon/common/utils/try_import.py
+++ b/common/src/autogluon/common/utils/try_import.py
@@ -124,7 +124,7 @@ def try_import_torch():
         import torch
     except ImportError as e:
         raise ImportError(
-            "Unable to import dependency torch\n" "A quick tip is to install via `pip install torch`.\n" "The minimum torch version is currently 1.11."
+            "Unable to import dependency torch\n" "A quick tip is to install via `pip install torch`.\n" "The minimum torch version is currently 1.13."  # sync with core/_setup_utils.py
         )
 
 

From ed3f969825143e047a91a1cc26f7bccde6883977 Mon Sep 17 00:00:00 2001
From: ddelange <14880945+ddelange@users.noreply.github.com>
Date: Thu, 24 Aug 2023 22:24:04 +0200
Subject: [PATCH 31/44] Add cp311 specifier

---
 core/src/autogluon/core/_setup_utils.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/core/src/autogluon/core/_setup_utils.py b/core/src/autogluon/core/_setup_utils.py
index 9fec6e2b9d4..40f3818d053 100644
--- a/core/src/autogluon/core/_setup_utils.py
+++ b/core/src/autogluon/core/_setup_utils.py
@@ -137,6 +137,7 @@ def default_setup_args(*, version, submodule):
             "Programming Language :: Python :: 3.8",
             "Programming Language :: Python :: 3.9",
             "Programming Language :: Python :: 3.10",
+            "Programming Language :: Python :: 3.11",
             "Topic :: Software Development",
             "Topic :: Scientific/Engineering :: Artificial Intelligence",
             "Topic :: Scientific/Engineering :: Information Analysis",

From 07246b86428e8bed679b69a394dfe22c8ce5dee7 Mon Sep 17 00:00:00 2001
From: ddelange <14880945+ddelange@users.noreply.github.com>
Date: Thu, 24 Aug 2023 22:46:28 +0200
Subject: [PATCH 32/44] Lint

---
 common/src/autogluon/common/utils/try_import.py | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/common/src/autogluon/common/utils/try_import.py b/common/src/autogluon/common/utils/try_import.py
index dea3d3628b2..692807927ff 100644
--- a/common/src/autogluon/common/utils/try_import.py
+++ b/common/src/autogluon/common/utils/try_import.py
@@ -124,7 +124,9 @@ def try_import_torch():
         import torch
     except ImportError as e:
         raise ImportError(
-            "Unable to import dependency torch\n" "A quick tip is to install via `pip install torch`.\n" "The minimum torch version is currently 1.13."  # sync with core/_setup_utils.py
+            "Unable to import dependency torch\n"
+            "A quick tip is to install via `pip install torch`.\n"
+            "The minimum torch version is currently 1.13."  # sync with core/_setup_utils.py
         )
 
 

From 11652e9ee08eaf7df5f887986243889bc99487da Mon Sep 17 00:00:00 2001
From: ddelange <ddelange@users.noreply.github.com>
Date: Mon, 11 Sep 2023 23:05:15 +0200
Subject: [PATCH 33/44] Allow torchmetrics 1.1.*

---
 multimodal/setup.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/multimodal/setup.py b/multimodal/setup.py
index 617ca7df3b9..b75f1a35799 100644
--- a/multimodal/setup.py
+++ b/multimodal/setup.py
@@ -39,7 +39,7 @@
     "torchvision>=0.14.0,<0.16.0",  # torch 1.13 requires torchvision 0.14. Increase it to 0.15 when dropping the support of torch 1.13.
     "scikit-image>=0.19.1,<0.21.0",
     "text-unidecode>=1.3,<1.4",
-    "torchmetrics>=1.0.0,<1.1.0",
+    "torchmetrics>=1.0.0,<1.2.0",
     "nptyping>=1.4.4,<2.5.0",
     "omegaconf>=2.1.1,<2.3.0",
     f"autogluon.core[raytune]=={version}",

From fba061c43cd906160622e7d23eba2a7ec6d9ff2e Mon Sep 17 00:00:00 2001
From: ddelange <14880945+ddelange@users.noreply.github.com>
Date: Mon, 2 Oct 2023 19:17:11 +0200
Subject: [PATCH 34/44] Bump ray to 2.7.0

---
 common/src/autogluon/common/utils/try_import.py | 6 +++---
 core/setup.py                                   | 6 +++---
 core/src/autogluon/core/_setup_utils.py         | 2 +-
 3 files changed, 7 insertions(+), 7 deletions(-)

diff --git a/common/src/autogluon/common/utils/try_import.py b/common/src/autogluon/common/utils/try_import.py
index d083ba75faf..94a6404db57 100644
--- a/common/src/autogluon/common/utils/try_import.py
+++ b/common/src/autogluon/common/utils/try_import.py
@@ -30,13 +30,13 @@ def try_import_mxboard():
 
 
 def try_import_ray() -> ModuleType:
-    RAY_MAX_VERSION = "2.7.0"  # sync with core/setup.py
+    RAY_MAX_VERSION = "2.8.0"  # sync with core/setup.py
     ray_max_version_os_map = dict(
         Darwin=RAY_MAX_VERSION,
         Windows=RAY_MAX_VERSION,
         Linux=RAY_MAX_VERSION,
     )
-    ray_min_version = "2.6.3"
+    ray_min_version = "2.7.0"
     current_os = platform.system()
     ray_max_version = ray_max_version_os_map.get(current_os, RAY_MAX_VERSION)
     try:
@@ -124,7 +124,7 @@ def try_import_torch():
         raise ImportError(
             "Unable to import dependency torch\n"
             "A quick tip is to install via `pip install torch`.\n"
-            "The minimum torch version is currently 1.13."  # sync with core/_setup_utils.py
+            "The minimum torch version is currently 2.0."  # sync with core/_setup_utils.py
         )
 
 
diff --git a/core/setup.py b/core/setup.py
index 994cf8149eb..750ac5f8116 100644
--- a/core/setup.py
+++ b/core/setup.py
@@ -47,11 +47,11 @@
 
 extras_require = {
     "ray": [
-        "ray[default]>=2.6.3,<2.7",
-        "pydantic>=1.10.4,<2.0",  # https://github.com/ray-project/ray/issues/36990
+        # sync version with common/src/autogluon/common/utils/try_import.py
+        "ray[default]>=2.7.0,<2.8",
     ],
     "raytune": [
-        "ray[tune]>=2.6.3,<2.7",
+        "ray[default,tune]>=2.7.0,<2.8",
         # TODO: consider alternatives as hyperopt is not actively maintained.
         "hyperopt>=0.2.7,<0.2.8",  # This is needed for the bayes search to work.
         # 'GPy>=1.10.0,<1.11.0'  # TODO: Enable this once PBT/PB2 are supported by ray lightning
diff --git a/core/src/autogluon/core/_setup_utils.py b/core/src/autogluon/core/_setup_utils.py
index 17721d1af95..4f010a51a25 100644
--- a/core/src/autogluon/core/_setup_utils.py
+++ b/core/src/autogluon/core/_setup_utils.py
@@ -26,7 +26,7 @@
     "networkx": ">=3.0,<4",  # Major version cap
     "tqdm": ">=4.38,<5",  # Major version cap
     "Pillow": ">=9.3,<9.6",  # "<{N+2}" upper cap
-    "torch": ">=2.0,<2.1",  # "<{N+1}" upper cap
+    "torch": ">=2.0,<2.1",  # "<{N+1}" upper cap, sync with common/src/autogluon/common/utils/try_import.py
     "lightning": ">=2.0.0,<2.1",  # "<{N+1}" upper cap
 }
 if LITE_MODE:

From 6deb7828f552cc7c792805a8fac853de6aa0eb5d Mon Sep 17 00:00:00 2001
From: Weisu Yin <weisuyin96@gmail.com>
Date: Tue, 3 Oct 2023 17:19:20 +0000
Subject: [PATCH 35/44] test

---
 common/src/autogluon/common/utils/try_import.py | 4 ++--
 core/setup.py                                   | 5 ++---
 multimodal/setup.py                             | 2 +-
 3 files changed, 5 insertions(+), 6 deletions(-)

diff --git a/common/src/autogluon/common/utils/try_import.py b/common/src/autogluon/common/utils/try_import.py
index 94a6404db57..e269cabde0e 100644
--- a/common/src/autogluon/common/utils/try_import.py
+++ b/common/src/autogluon/common/utils/try_import.py
@@ -30,13 +30,13 @@ def try_import_mxboard():
 
 
 def try_import_ray() -> ModuleType:
-    RAY_MAX_VERSION = "2.8.0"  # sync with core/setup.py
+    RAY_MAX_VERSION = "2.7.0"  # sync with core/setup.py
     ray_max_version_os_map = dict(
         Darwin=RAY_MAX_VERSION,
         Windows=RAY_MAX_VERSION,
         Linux=RAY_MAX_VERSION,
     )
-    ray_min_version = "2.7.0"
+    ray_min_version = "2.6.3"
     current_os = platform.system()
     ray_max_version = ray_max_version_os_map.get(current_os, RAY_MAX_VERSION)
     try:
diff --git a/core/setup.py b/core/setup.py
index 750ac5f8116..389eda08e9c 100644
--- a/core/setup.py
+++ b/core/setup.py
@@ -47,11 +47,10 @@
 
 extras_require = {
     "ray": [
-        # sync version with common/src/autogluon/common/utils/try_import.py
-        "ray[default]>=2.7.0,<2.8",
+        "ray[default]>=2.6.3,<2.7",
     ],
     "raytune": [
-        "ray[default,tune]>=2.7.0,<2.8",
+        "ray[default,tune]>=2.6.3,<2.7",
         # TODO: consider alternatives as hyperopt is not actively maintained.
         "hyperopt>=0.2.7,<0.2.8",  # This is needed for the bayes search to work.
         # 'GPy>=1.10.0,<1.11.0'  # TODO: Enable this once PBT/PB2 are supported by ray lightning
diff --git a/multimodal/setup.py b/multimodal/setup.py
index 28ecab8dbee..178162fe436 100644
--- a/multimodal/setup.py
+++ b/multimodal/setup.py
@@ -61,7 +61,7 @@
     "black>=22.3,<23.0",
     "isort>=5.10",
     "datasets>=2.10.0,<2.15.0",
-    "onnx>=1.13.0,<1.15.0",
+    "onnx>=1.13.0,<1.14.0",
     "onnxruntime>=1.15.0,<1.16.0;platform_system=='Darwin'",
     "onnxruntime-gpu>=1.15.0,<1.16.0;platform_system!='Darwin'",
     "tensorrt>=8.5.3.1,<8.7.0;platform_system=='Linux'",

From 2e62c43631b843ba43081b6dc6130de86870f745 Mon Sep 17 00:00:00 2001
From: Weisu Yin <weisuyin96@gmail.com>
Date: Tue, 3 Oct 2023 20:48:30 +0000
Subject: [PATCH 36/44] disable tensorrt

---
 multimodal/setup.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/multimodal/setup.py b/multimodal/setup.py
index 178162fe436..96710bb1527 100644
--- a/multimodal/setup.py
+++ b/multimodal/setup.py
@@ -64,7 +64,7 @@
     "onnx>=1.13.0,<1.14.0",
     "onnxruntime>=1.15.0,<1.16.0;platform_system=='Darwin'",
     "onnxruntime-gpu>=1.15.0,<1.16.0;platform_system!='Darwin'",
-    "tensorrt>=8.5.3.1,<8.7.0;platform_system=='Linux'",
+    # "tensorrt>=8.5.3.1,<8.7.0;platform_system=='Linux'",
 ]
 
 extras_require = {

From 5227c72b85afa996e4452c2ca9578de230d84d48 Mon Sep 17 00:00:00 2001
From: Weisu Yin <weisuyin96@gmail.com>
Date: Wed, 4 Oct 2023 17:37:25 +0000
Subject: [PATCH 37/44] fix tests

---
 common/src/autogluon/common/utils/try_import.py          | 8 +++++++-
 .../tests/unittests/others/test_deployment_onnx.py       | 4 ++++
 tabular/setup.py                                         | 9 +++++++--
 tabular/tests/unittests/models/test_vowpalwabbit.py      | 6 ++++++
 4 files changed, 24 insertions(+), 3 deletions(-)

diff --git a/common/src/autogluon/common/utils/try_import.py b/common/src/autogluon/common/utils/try_import.py
index e269cabde0e..5893e1a0bc6 100644
--- a/common/src/autogluon/common/utils/try_import.py
+++ b/common/src/autogluon/common/utils/try_import.py
@@ -1,5 +1,6 @@
 import logging
 import platform
+import sys
 from types import ModuleType
 
 from ..version import __version__
@@ -62,7 +63,12 @@ def try_import_catboost():
     try:
         import catboost
     except ImportError as e:
-        raise ImportError("`import catboost` failed. " f"A quick tip is to install via `pip install autogluon.tabular[catboost]=={__version__}`.")
+        error_msg = "`import catboost` failed. "
+        if sys.version_info >= (3, 11) and sys.platform == "darwin":
+            error_msg += f"Detected your env as {sys.platform}. Please either downgrade your python version to below 3.11 or move to another platform. Then install via ``pip install autogluon.tabular[catboost]=={__version__}``"
+        else:
+            error_msg += f"A quick tip is to install via `pip install autogluon.tabular[catboost]=={__version__}`."
+        raise ImportError()
     except ValueError as e:
         raise ImportError(
             "Import catboost failed. Numpy version may be outdated, "
diff --git a/multimodal/tests/unittests/others/test_deployment_onnx.py b/multimodal/tests/unittests/others/test_deployment_onnx.py
index 23c7924c8f8..687d034f907 100644
--- a/multimodal/tests/unittests/others/test_deployment_onnx.py
+++ b/multimodal/tests/unittests/others/test_deployment_onnx.py
@@ -1,11 +1,13 @@
 import os
 import shutil
+import tensorrt
 
 import numpy as np
 import numpy.testing
 import pytest
 import torch
 from datasets import load_dataset
+from packaging import version
 from scipy.stats import pearsonr, spearmanr
 from sklearn.metrics.pairwise import paired_cosine_distances
 from torch import FloatTensor
@@ -192,6 +194,8 @@ def test_onnx_export_timm_image(checkpoint_name, num_gpus):
         ),
     ],
 )
+
+@pytest.mark.skipif(version.parse(tensorrt.__version__) >= version.parse("8.5.4"), reason="tensorrt above 8.5.4 cause segfault, but is required to support py311")
 def test_onnx_optimize_for_inference(dataset_name, model_names, text_backbone, image_backbone):
     dataset = ALL_DATASETS[dataset_name]
     hyperparameters = {
diff --git a/tabular/setup.py b/tabular/setup.py
index 3b56a6302cf..ef4fcbb1fa2 100644
--- a/tabular/setup.py
+++ b/tabular/setup.py
@@ -36,7 +36,11 @@
         "lightgbm>=3.3,<3.4",
     ],
     "catboost": [
-        "catboost>=1.1,<1.3",
+        # CatBoost wheel build is not working correctly on darwin for CatBoost 1.2, so use old version in this case.
+        # https://github.com/autogluon/autogluon/pull/3190#issuecomment-1540599280
+        # Catboost 1.2 doesn't have wheel for python 3.11
+        "catboost>=1.1,<1.2 ; sys_platform == 'darwin' and python_version < '3.11'",
+        "catboost>=1.1,<1.3; sys_platform != 'darwin'",
     ],
     # FIXME: Debug why xgboost 1.6 has 4x+ slower inference on multiclass datasets compared to 1.4
     #  It is possibly only present on MacOS, haven't tested linux.
@@ -63,7 +67,8 @@
     ],
     "vowpalwabbit": [
         # FIXME: 9.5+ causes VW to save an empty model which always predicts 0. Confirmed on MacOS (Intel CPU). Unknown how to fix.
-        "vowpalwabbit>=9,<9.9",
+        # No vowpalwabbit wheel for python 3.11 or above yet
+        "vowpalwabbit>=9,<9.9; python_version < '3.11'",
     ],
     "skl2onnx": [
         "skl2onnx>=1.15.0,<1.16.0",
diff --git a/tabular/tests/unittests/models/test_vowpalwabbit.py b/tabular/tests/unittests/models/test_vowpalwabbit.py
index c41c39d3ee1..b2b08d88a70 100644
--- a/tabular/tests/unittests/models/test_vowpalwabbit.py
+++ b/tabular/tests/unittests/models/test_vowpalwabbit.py
@@ -1,6 +1,10 @@
+import pytest
+import sys
+
 from autogluon.tabular.models.vowpalwabbit.vowpalwabbit_model import VowpalWabbitModel
 
 
+@pytest.mark.skipif(sys.version_info >= (3, 11), reason="vowpalwabbit doesn't support python 3.11 and above yet")
 def test_vowpalwabbit_binary(fit_helper):
     fit_args = dict(
         hyperparameters={VowpalWabbitModel: {}},
@@ -9,6 +13,7 @@ def test_vowpalwabbit_binary(fit_helper):
     fit_helper.fit_and_validate_dataset(dataset_name=dataset_name, fit_args=fit_args)
 
 
+@pytest.mark.skipif(sys.version_info >= (3, 11), reason="vowpalwabbit doesn't support python 3.11 and above yet")
 def test_vowpalwabbit_multiclass(fit_helper):
     fit_args = dict(
         hyperparameters={VowpalWabbitModel: {}},
@@ -17,6 +22,7 @@ def test_vowpalwabbit_multiclass(fit_helper):
     fit_helper.fit_and_validate_dataset(dataset_name=dataset_name, fit_args=fit_args)
 
 
+@pytest.mark.skipif(sys.version_info >= (3, 11), reason="vowpalwabbit doesn't support python 3.11 and above yet")
 def test_vowpalwabbit_regression(fit_helper):
     fit_args = dict(
         hyperparameters={VowpalWabbitModel: {}},

From 84592cc35fe24f8c7087088391d93839d74f4c40 Mon Sep 17 00:00:00 2001
From: Weisu Yin <weisuyin96@gmail.com>
Date: Wed, 4 Oct 2023 17:38:07 +0000
Subject: [PATCH 38/44] isort

---
 tabular/tests/unittests/models/test_vowpalwabbit.py | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/tabular/tests/unittests/models/test_vowpalwabbit.py b/tabular/tests/unittests/models/test_vowpalwabbit.py
index b2b08d88a70..072135a89ec 100644
--- a/tabular/tests/unittests/models/test_vowpalwabbit.py
+++ b/tabular/tests/unittests/models/test_vowpalwabbit.py
@@ -1,6 +1,7 @@
-import pytest
 import sys
 
+import pytest
+
 from autogluon.tabular.models.vowpalwabbit.vowpalwabbit_model import VowpalWabbitModel
 
 

From 4315d6e6baad23a4c5a0da6ed9965c70dfb0eb0c Mon Sep 17 00:00:00 2001
From: Weisu Yin <weisuyin96@gmail.com>
Date: Wed, 4 Oct 2023 18:04:20 +0000
Subject: [PATCH 39/44] lint

---
 multimodal/tests/unittests/others/test_deployment_onnx.py | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/multimodal/tests/unittests/others/test_deployment_onnx.py b/multimodal/tests/unittests/others/test_deployment_onnx.py
index 687d034f907..17a5314c2c9 100644
--- a/multimodal/tests/unittests/others/test_deployment_onnx.py
+++ b/multimodal/tests/unittests/others/test_deployment_onnx.py
@@ -194,8 +194,10 @@ def test_onnx_export_timm_image(checkpoint_name, num_gpus):
         ),
     ],
 )
-
-@pytest.mark.skipif(version.parse(tensorrt.__version__) >= version.parse("8.5.4"), reason="tensorrt above 8.5.4 cause segfault, but is required to support py311")
+@pytest.mark.skipif(
+    version.parse(tensorrt.__version__) >= version.parse("8.5.4"),
+    reason="tensorrt above 8.5.4 cause segfault, but is required to support py311",
+)
 def test_onnx_optimize_for_inference(dataset_name, model_names, text_backbone, image_backbone):
     dataset = ALL_DATASETS[dataset_name]
     hyperparameters = {

From 5586f484e3f450a810031d58ad85e232da5a8e0f Mon Sep 17 00:00:00 2001
From: Weisu Yin <weisuyin96@gmail.com>
Date: Wed, 4 Oct 2023 18:12:25 +0000
Subject: [PATCH 40/44] isort

---
 multimodal/tests/unittests/others/test_deployment_onnx.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/multimodal/tests/unittests/others/test_deployment_onnx.py b/multimodal/tests/unittests/others/test_deployment_onnx.py
index 17a5314c2c9..2f2e2420f4e 100644
--- a/multimodal/tests/unittests/others/test_deployment_onnx.py
+++ b/multimodal/tests/unittests/others/test_deployment_onnx.py
@@ -1,10 +1,10 @@
 import os
 import shutil
-import tensorrt
 
 import numpy as np
 import numpy.testing
 import pytest
+import tensorrt
 import torch
 from datasets import load_dataset
 from packaging import version

From 681709d4e640d43862afd7addbc9cc2bd0333bb1 Mon Sep 17 00:00:00 2001
From: Weisu Yin <weisuyin96@gmail.com>
Date: Wed, 4 Oct 2023 20:04:36 +0000
Subject: [PATCH 41/44] fix

---
 multimodal/setup.py                                       | 2 +-
 multimodal/tests/unittests/others/test_deployment_onnx.py | 8 ++++++--
 2 files changed, 7 insertions(+), 3 deletions(-)

diff --git a/multimodal/setup.py b/multimodal/setup.py
index 96710bb1527..72da7b8eadd 100644
--- a/multimodal/setup.py
+++ b/multimodal/setup.py
@@ -64,7 +64,7 @@
     "onnx>=1.13.0,<1.14.0",
     "onnxruntime>=1.15.0,<1.16.0;platform_system=='Darwin'",
     "onnxruntime-gpu>=1.15.0,<1.16.0;platform_system!='Darwin'",
-    # "tensorrt>=8.5.3.1,<8.7.0;platform_system=='Linux'",
+    "tensorrt>=8.5.3.1,<8.5.4;platform_system=='Linux' and python_version<'3.11'",  # tensorrt > 8.5.4 cause segfault
 ]
 
 extras_require = {
diff --git a/multimodal/tests/unittests/others/test_deployment_onnx.py b/multimodal/tests/unittests/others/test_deployment_onnx.py
index 2f2e2420f4e..33f231616f2 100644
--- a/multimodal/tests/unittests/others/test_deployment_onnx.py
+++ b/multimodal/tests/unittests/others/test_deployment_onnx.py
@@ -4,7 +4,6 @@
 import numpy as np
 import numpy.testing
 import pytest
-import tensorrt
 import torch
 from datasets import load_dataset
 from packaging import version
@@ -25,6 +24,11 @@
     "ae": AEDataset(),
 }
 
+try:
+    import tensorrt
+except ImportError:
+    tensorrt = None
+
 
 def evaluate(predictor, df, onnx_session=None):
     labels = df["score"].to_numpy()
@@ -195,7 +199,7 @@ def test_onnx_export_timm_image(checkpoint_name, num_gpus):
     ],
 )
 @pytest.mark.skipif(
-    version.parse(tensorrt.__version__) >= version.parse("8.5.4"),
+    tensorrt is None or version.parse(tensorrt.__version__) >= version.parse("8.5.4"),
     reason="tensorrt above 8.5.4 cause segfault, but is required to support py311",
 )
 def test_onnx_optimize_for_inference(dataset_name, model_names, text_backbone, image_backbone):

From c30990da00ab0776c5cadbce24ddee1c9bbdd708 Mon Sep 17 00:00:00 2001
From: Weisu Yin <weisuyin96@gmail.com>
Date: Wed, 4 Oct 2023 20:13:08 +0000
Subject: [PATCH 42/44] fix catboost

---
 tabular/tests/unittests/models/test_catboost.py | 8 ++++++++
 1 file changed, 8 insertions(+)

diff --git a/tabular/tests/unittests/models/test_catboost.py b/tabular/tests/unittests/models/test_catboost.py
index 9e5c4ceaa45..23e35f448a9 100644
--- a/tabular/tests/unittests/models/test_catboost.py
+++ b/tabular/tests/unittests/models/test_catboost.py
@@ -1,6 +1,11 @@
+import sys
+
+import pytest
+
 from autogluon.tabular.models.catboost.catboost_model import CatBoostModel
 
 
+@pytest.mark.skipif(sys.version_info >= (3, 11) and sys.platform == "darwin", reason="catboost has no wheel for py311 darwin")
 def test_catboost_binary(fit_helper):
     fit_args = dict(
         hyperparameters={CatBoostModel: {}},
@@ -9,6 +14,7 @@ def test_catboost_binary(fit_helper):
     fit_helper.fit_and_validate_dataset(dataset_name=dataset_name, fit_args=fit_args)
 
 
+@pytest.mark.skipif(sys.version_info >= (3, 11) and sys.platform == "darwin", reason="catboost has no wheel for py311 darwin")
 def test_catboost_multiclass(fit_helper):
     fit_args = dict(
         hyperparameters={CatBoostModel: {}},
@@ -17,6 +23,7 @@ def test_catboost_multiclass(fit_helper):
     fit_helper.fit_and_validate_dataset(dataset_name=dataset_name, fit_args=fit_args)
 
 
+@pytest.mark.skipif(sys.version_info >= (3, 11) and sys.platform == "darwin", reason="catboost has no wheel for py311 darwin")
 def test_catboost_regression(fit_helper):
     fit_args = dict(
         hyperparameters={CatBoostModel: {}},
@@ -26,6 +33,7 @@ def test_catboost_regression(fit_helper):
     fit_helper.fit_and_validate_dataset(dataset_name=dataset_name, fit_args=fit_args)
 
 
+@pytest.mark.skipif(sys.version_info >= (3, 11) and sys.platform == "darwin", reason="catboost has no wheel for py311 darwin")
 def test_catboost_quantile(fit_helper):
     fit_args = dict(
         hyperparameters={"CAT": {}},

From b82d13742aeeff5c62427f3701df9a90bdcb06c3 Mon Sep 17 00:00:00 2001
From: Weisu Yin <weisuyin96@gmail.com>
Date: Thu, 5 Oct 2023 00:51:30 +0000
Subject: [PATCH 43/44] fix

---
 tabular/tests/unittests/test_tabular.py | 8 +++++++-
 1 file changed, 7 insertions(+), 1 deletion(-)

diff --git a/tabular/tests/unittests/test_tabular.py b/tabular/tests/unittests/test_tabular.py
index 48c5ba4e556..387b1325070 100644
--- a/tabular/tests/unittests/test_tabular.py
+++ b/tabular/tests/unittests/test_tabular.py
@@ -21,6 +21,7 @@
 """
 import os
 import shutil
+import sys
 import tempfile
 import warnings
 from random import seed
@@ -34,6 +35,7 @@
 from autogluon.core.constants import BINARY, MULTICLASS, PROBLEM_TYPES_CLASSIFICATION, QUANTILE, REGRESSION
 from autogluon.core.utils import download, unzip
 from autogluon.tabular import TabularDataset, TabularPredictor
+from autogluon.tabular.configs.hyperparameter_configs import get_hyperparameter_config
 
 PARALLEL_LOCAL_BAGGING = "parallel_local"
 SEQUENTIAL_LOCAL_BAGGING = "sequential_local"
@@ -47,7 +49,7 @@ def test_tabular():
     subsample_size = None
     hyperparameter_tune_kwargs = None
     verbosity = 2  # how much output to print
-    hyperparameters = None
+    hyperparameters = get_hyperparameter_config("default")
     time_limit = None
     fast_benchmark = True  # False
     # If True, run a faster benchmark (subsample training sets, less epochs, etc),
@@ -59,6 +61,10 @@ def test_tabular():
         subsample_size = 100
         time_limit = 60
 
+    # Catboost > 1.2 is required for python 3.11 but cannot be correctly installed on macos
+    if sys.version_info >= (3, 11) and sys.platform == "darwin":
+        hyperparameters.pop("CAT")
+
     fit_args = {"verbosity": verbosity}
     if hyperparameter_tune_kwargs is not None:
         fit_args["hyperparameter_tune_kwargs"] = hyperparameter_tune_kwargs

From bd6b173e213239795a4fdc32ea4f2aa2e9ab6548 Mon Sep 17 00:00:00 2001
From: Weisu Yin <weisuyin96@gmail.com>
Date: Tue, 10 Oct 2023 19:56:10 +0000
Subject: [PATCH 44/44] address comments

---
 .../advanced_topics/customization.ipynb       | 23 +++++++++++++++++++
 .../configs/optimization/adamw.yaml           |  1 +
 2 files changed, 24 insertions(+)

diff --git a/docs/tutorials/multimodal/advanced_topics/customization.ipynb b/docs/tutorials/multimodal/advanced_topics/customization.ipynb
index 395ac552410..2565ca0a5ca 100644
--- a/docs/tutorials/multimodal/advanced_topics/customization.ipynb
+++ b/docs/tutorials/multimodal/advanced_topics/customization.ipynb
@@ -360,6 +360,29 @@
     "```\n"
    ]
   },
+  {
+   "cell_type": "markdown",
+   "id": "02d07866",
+   "metadata": {},
+   "source": [
+    "### optimization.track_grad_norm\n",
+    "\n",
+    "Track the p-norm of gradients during training. May be set to ‘inf’ infinity-norm. If using Automatic Mixed Precision (AMP), the gradients will be unscaled before logging them."
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "1b60c371",
+   "metadata": {},
+   "source": [
+    "```\n",
+    "# default used by AutoMM (no tracking)\n",
+    "predictor.fit(hyperparameters={\"optimization.track_grad_norm\": -1})\n",
+    "# track the 2-norm\n",
+    "predictor.fit(hyperparameters={\"optimization.track_grad_norm\": 2})\n",
+    "```\n"
+   ]
+  },
   {
    "cell_type": "markdown",
    "id": "abe87d32",
diff --git a/multimodal/src/autogluon/multimodal/configs/optimization/adamw.yaml b/multimodal/src/autogluon/multimodal/configs/optimization/adamw.yaml
index 2def18adce8..4f24e51dc5a 100644
--- a/multimodal/src/autogluon/multimodal/configs/optimization/adamw.yaml
+++ b/multimodal/src/autogluon/multimodal/configs/optimization/adamw.yaml
@@ -16,6 +16,7 @@ optimization:
   skip_final_val: False # Flag to skip the last validation
   gradient_clip_val: 1
   gradient_clip_algorithm: "norm"
+  track_grad_norm: -1 # Whether to check gradient norm. We can set it to 2 to check for gradient norm.
   log_every_n_steps: 10
   top_k: 3
   top_k_average_method: