autogluon · yinweisu · Oct 11, 2023 · May 2, 2023 · May 2, 2023 · May 2, 2023
diff --git a/.github/workflow_scripts/env_setup.sh b/.github/workflow_scripts/env_setup.sh
@@ -16,21 +16,23 @@ function setup_build_contrib_env {
 }
 
 function setup_torch_gpu {
-    # Security-patched torch.
-    python3 -m pip install torch==1.13.1+cu116 torchvision==0.14.1+cu116 torchaudio==0.13.1+cu116 --extra-index-url https://download.pytorch.org/whl/cu116
+    PIP_EXTRA_INDEX_URL=https://download.pytorch.org/whl/cu118 reinstall_torch
 }
 
 function setup_torch_cpu {
-    # Security-patched torch
-    python3 -m pip install torch==1.13.1 torchvision==0.14.1 torchaudio==0.13.1 --extra-index-url https://download.pytorch.org/whl/cpu
+    PIP_EXTRA_INDEX_URL=https://download.pytorch.org/whl/cpu reinstall_torch
 }
 
 function setup_torch_gpu_non_linux {
-    pip3 install torch==1.13.1+cu116 torchvision==0.14.1+cu116 --extra-index-url https://download.pytorch.org/whl/cu116
+    setup_torch_gpu
 }
 
 function setup_torch_cpu_non_linux {
-    pip3 install torch==1.13.1 torchvision==0.14.1
+    setup_torch_cpu
+}
+
+function reinstall_torch {
+    pip3 install --force-reinstall torchvision~=0.15.1
 }
 
 function setup_hf_model_mirror {

diff --git a/.github/workflows/platform_tests-command.yml b/.github/workflows/platform_tests-command.yml
@@ -45,7 +45,7 @@ jobs:
       fail-fast: false
       matrix:
         os: [macos-latest, windows-latest, ubuntu-latest]
-        python: ["3.8", "3.9", "3.10"]
+        python: ["3.8", "3.9", "3.10", "3.11"]
     steps:  
       - name: Checkout repository for PR
         if: (github.event_name == 'workflow_dispatch')
@@ -77,7 +77,7 @@ jobs:
       fail-fast: false
       matrix:
         os: [macos-latest, windows-latest, ubuntu-latest]
-        python: ["3.8", "3.9", "3.10"]
+        python: ["3.8", "3.9", "3.10", "3.11"]
     steps:
       - name: Checkout repository for PR
         if: (github.event_name == 'workflow_dispatch')
@@ -109,7 +109,7 @@ jobs:
       fail-fast: false
       matrix:
         os: [macos-latest, windows-latest, ubuntu-latest]
-        python: ["3.8", "3.9", "3.10"]
+        python: ["3.8", "3.9", "3.10", "3.11"]
     steps:
       - name: Checkout repository for PR
         if: (github.event_name == 'workflow_dispatch')
@@ -141,7 +141,7 @@ jobs:
       fail-fast: false
       matrix:
         os: [macos-latest, windows-latest, ubuntu-latest]
-        python: ["3.8", "3.9", "3.10"]
+        python: ["3.8", "3.9", "3.10", "3.11"]
     steps:
       - name: Checkout repository for PR
         if: (github.event_name == 'workflow_dispatch')
@@ -174,7 +174,7 @@ jobs:
       fail-fast: false
       matrix:
         os: [macos-latest, windows-latest, ubuntu-latest]
-        python: ["3.8", "3.9", "3.10"]
+        python: ["3.8", "3.9", "3.10", "3.11"]
     steps:
       - name: Checkout repository for PR
         if: (github.event_name == 'workflow_dispatch')
@@ -214,7 +214,7 @@ jobs:
       fail-fast: false
       matrix:
         os: [macos-latest, windows-latest, ubuntu-latest]
-        python: ["3.8", "3.9", "3.10"]
+        python: ["3.8", "3.9", "3.10", "3.11"]
     steps:
       - name: Checkout repository for PR
         if: (github.event_name == 'workflow_dispatch')

diff --git a/core/src/autogluon/core/_setup_utils.py b/core/src/autogluon/core/_setup_utils.py
@@ -13,7 +13,7 @@
     os.path.join(os.path.dirname(os.path.abspath(__file__)), '..', '..', '..', '..')
 )
 
-PYTHON_REQUIRES = '>=3.8, <3.11'
+PYTHON_REQUIRES = '>=3.8, <3.12'
 
 
 # Only put packages here that would otherwise appear multiple times across different module's setup.py files.

diff --git a/docs/install-cpu-pip.md b/docs/install-cpu-pip.md
@@ -4,7 +4,7 @@ pip install -U setuptools wheel
 
 # CPU version of pytorch has smaller footprint - see installation instructions in
 # pytorch documentation - https://pytorch.org/get-started/locally/
-pip install torch==1.13.1+cpu torchvision==0.14.1+cpu -f https://download.pytorch.org/whl/cpu/torch_stable.html
+pip install torchvision~=0.15.1 --force-reinstall --extra-index-url https://download.pytorch.org/whl/cpu
 
 pip install autogluon
 ```
diff --git a/docs/install-cpu-source.md b/docs/install-cpu-source.md
@@ -4,7 +4,7 @@ pip install -U setuptools wheel
 
 # CPU version of pytorch has smaller footprint - see installation instructions in
 # pytorch documentation - https://pytorch.org/get-started/locally/
-pip install torch==1.13.1+cpu torchvision==0.14.1+cpu --extra-index-url https://download.pytorch.org/whl/cpu
+pip install torchvision~=0.15.1 --force-reinstall --extra-index-url https://download.pytorch.org/whl/cpu
 
 git clone https://github.com/autogluon/autogluon
 cd autogluon && ./full_install.sh

diff --git a/docs/install-gpu-pip.md b/docs/install-gpu-pip.md
@@ -3,7 +3,7 @@ pip install -U pip
 pip install -U setuptools wheel
 
 # Install the proper version of PyTorch following https://pytorch.org/get-started/locally/
-pip install torch==1.13.1+cu116 torchvision==0.14.1+cu116 --extra-index-url https://download.pytorch.org/whl/cu116
+pip install torchvision~=0.15.1 --force-reinstall --extra-index-url https://download.pytorch.org/whl/cu118
 
 pip install autogluon
 ```

diff --git a/docs/install-gpu-source.md b/docs/install-gpu-source.md
@@ -3,7 +3,7 @@ pip install -U pip
 pip install -U setuptools wheel
 
 # Install the proper version of PyTorch following https://pytorch.org/get-started/locally/
-pip install torch==1.13.1+cu116 torchvision==0.14.1+cu116 --extra-index-url https://download.pytorch.org/whl/cu116
+pip install torchvision~=0.15.1 --force-reinstall --extra-index-url https://download.pytorch.org/whl/cu118
 
 git clone https://github.com/autogluon/autogluon
 cd autogluon && ./full_install.sh

diff --git a/docs/install-windows-gpu.md b/docs/install-windows-gpu.md
@@ -11,7 +11,7 @@ conda activate myenv
 4. Install the proper GPU PyTorch version by following the [PyTorch Install Documentation](https://pytorch.org/get-started/locally/) (Recommended). Alternatively, use the following command:
 
 ```bash
-pip install torch==1.13.1+cu116 torchvision==0.14.1+cu116 --extra-index-url https://download.pytorch.org/whl/cu116
+pip install torchvision~=0.15.1 --force-reinstall --extra-index-url https://download.pytorch.org/whl/cu118
 ```
 
 5. Sanity check that your installation is valid and can detect your GPU via testing in Python:

diff --git a/examples/automm/object_detection/detection_eval.py b/examples/automm/object_detection/detection_eval.py
@@ -10,9 +10,6 @@
     python detection_eval.py \
         --test_path ./VOCdevkit/VOC2007/Annotations/test_cocoformat.json \
         --checkpoint_name faster_rcnn_r50_fpn_1x_voc0712
-
-Note that for now it's required to install nightly build torchmetrics.
-This will be solved in next pr. (MeanAveragePrecision will be moved to AG temporarily.)
 """
 
 import argparse

diff --git a/examples/automm/object_detection/detection_train.py b/examples/automm/object_detection/detection_train.py
@@ -23,9 +23,6 @@
         --lr <learning_rate> \
         --wd <weight_decay> \
         --epochs <epochs>
-
-Note that for now it's required to install nightly build torchmetrics.
-This will be solved in next pr. (MeanAveragePrecision will be moved to AG temporarily.)
 """
 
 import argparse

diff --git a/multimodal/setup.py b/multimodal/setup.py
@@ -33,13 +33,13 @@
     "evaluate>=0.2.2,<0.4.0",
     "accelerate>=0.9,<0.17",
     "timm>=0.6.12,<0.7.0",
-    "torch>=1.9,<1.14",
-    "torchvision<0.15.0",
+    "torch>=1.11,<2.1",
+    "torchvision>=0.12.0,<0.16",
     "fairscale>=0.4.5,<0.4.14",
-    "scikit-image>=0.19.1,<0.20.0",
-    "pytorch-lightning>=1.9.0,<1.10.0",
+    "scikit-image>=0.19.1,<0.21.0",
+    "pytorch-lightning>=2.0.0,<2.1",
     "text-unidecode>=1.3,<1.4",
-    "torchmetrics>=0.11.0,<0.12.0",
+    "torchmetrics~=1.0.0rc0",
     "transformers>=4.23.0,<4.27.0",
     "nptyping>=1.4.4,<2.5.0",
     "omegaconf>=2.1.1,<2.3.0",

diff --git a/multimodal/src/autogluon/multimodal/configs/optimization/adamw.yaml b/multimodal/src/autogluon/multimodal/configs/optimization/adamw.yaml
@@ -16,7 +16,6 @@ optimization:
   skip_final_val: False # Flag to skip the last validation
   gradient_clip_val: 1
   gradient_clip_algorithm: "norm"
-  track_grad_norm: -1 # Whether to check gradient norm. We can set it to 2 to check for gradient norm.
   log_every_n_steps: 10
   val_metric: null
   top_k: 3

diff --git a/multimodal/src/autogluon/multimodal/matcher.py b/multimodal/src/autogluon/multimodal/matcher.py
@@ -864,15 +864,15 @@ def _fit(
 
         if not hpo_mode:
             if num_gpus <= 1:
-                strategy = None
+                strategy = "auto"
             else:
                 strategy = config.env.strategy
         else:
             # we don't support running each trial in parallel without ray lightning
             if use_ray_lightning:
                 strategy = hpo_kwargs.get("_ray_lightning_plugin")
             else:
-                strategy = None
+                strategy = "auto"
                 num_gpus = min(num_gpus, 1)
 
         config.env.num_gpus = num_gpus
@@ -886,7 +886,7 @@ def _fit(
         log_filter = LogFilter(blacklist_msgs)
         with apply_log_filter(log_filter):
             trainer = pl.Trainer(
-                accelerator="gpu" if num_gpus > 0 else None,
+                accelerator="gpu" if num_gpus > 0 else "auto",
                 devices=get_available_devices(
                     num_gpus=num_gpus,
                     auto_select_gpus=config.env.auto_select_gpus,
@@ -910,7 +910,6 @@ def _fit(
                 log_every_n_steps=OmegaConf.select(config, "optimization.log_every_n_steps", default=10),
                 enable_progress_bar=enable_progress_bar,
                 fast_dev_run=config.env.fast_dev_run,
-                track_grad_norm=OmegaConf.select(config, "optimization.track_grad_norm", default=-1),
                 val_check_interval=config.optimization.val_check_interval,
                 check_val_every_n_epoch=config.optimization.check_val_every_n_epoch
                 if hasattr(config.optimization, "check_val_every_n_epoch")

diff --git a/multimodal/src/autogluon/multimodal/optimization/lit_mmdet.py b/multimodal/src/autogluon/multimodal/optimization/lit_mmdet.py
@@ -230,12 +230,13 @@ def validation_step(self, batch, batch_idx, dataloader_idx=0):
         else:
             self.evaluate(batch, "val")
 
-    def validation_epoch_end(self, validation_step_outputs):
+    def on_validation_epoch_end(self):
         val_result = self.validation_metric.compute()
         if self.use_loss:
             self.log_dict({"val_direct_loss": val_result}, sync_dist=True)
         else:
             # TODO: add mAP/mAR_per_class
+            val_result.pop("classes", None)  # introduced in torchmetrics v1.0.0
             mAPs = {"val_" + k: v for k, v in val_result.items()}
             mAPs["val_mAP"] = mAPs["val_map"]
             self.log_dict(mAPs, sync_dist=True)

diff --git a/multimodal/src/autogluon/multimodal/optimization/lit_module.py b/multimodal/src/autogluon/multimodal/optimization/lit_module.py
@@ -239,13 +239,13 @@ def training_step(self, batch, batch_idx):
 
     def on_validation_start(self) -> None:
         if self.skip_final_val and self.trainer.should_stop:
-            self.trainer.val_dataloaders = []  # skip the final validation by setting val_dataloaders empty
             self.log(
                 self.validation_metric_name,
                 self.validation_metric,
                 on_step=False,
                 on_epoch=True,
             )
+            return None
         return super().on_validation_start()
 
     def validation_step(self, batch, batch_idx):

diff --git a/multimodal/src/autogluon/multimodal/optimization/utils.py b/multimodal/src/autogluon/multimodal/optimization/utils.py
@@ -11,6 +11,7 @@
 from pytorch_metric_learning import distances, losses, miners
 from torch import nn, optim
 from torch.nn import functional as F
+from torchmetrics.detection.mean_ap import MeanAveragePrecision
 from transformers import Adafactor
 from transformers.trainer_pt_utils import get_parameter_names
 
@@ -62,7 +63,6 @@
     ROOT_MEAN_SQUARED_ERROR,
     SPEARMANR,
 )
-from ..utils.map import MeanAveragePrecision
 from .losses import FocalLoss, MultiNegativesSoftmaxLoss, SoftTargetCrossEntropy
 from .lr_scheduler import (
     get_cosine_schedule_with_warmup,

diff --git a/multimodal/src/autogluon/multimodal/predictor.py b/multimodal/src/autogluon/multimodal/predictor.py
@@ -1429,15 +1429,15 @@ def _fit(
                         reduce_bucket_size=config.env.deepspeed_allreduce_size,
                     )
                 else:
-                    strategy = None
+                    strategy = "auto"
             else:
                 strategy = config.env.strategy
         else:
             # we don't support running each trial in parallel without ray lightning
             if use_ray_lightning:
                 strategy = hpo_kwargs.get("_ray_lightning_plugin")
             else:
-                strategy = None
+                strategy = "auto"
                 num_gpus = min(num_gpus, 1)
 
         config.env.num_gpus = num_gpus
@@ -1451,7 +1451,7 @@ def _fit(
         log_filter = LogFilter(blacklist_msgs)
         with apply_log_filter(log_filter):
             trainer = pl.Trainer(
-                accelerator="gpu" if num_gpus > 0 else None,
+                accelerator="gpu" if num_gpus > 0 else "auto",
                 devices=get_available_devices(
                     num_gpus=num_gpus,
                     auto_select_gpus=config.env.auto_select_gpus,
@@ -1475,7 +1475,6 @@ def _fit(
                 log_every_n_steps=OmegaConf.select(config, "optimization.log_every_n_steps", default=10),
                 enable_progress_bar=enable_progress_bar,
                 fast_dev_run=config.env.fast_dev_run,
-                track_grad_norm=OmegaConf.select(config, "optimization.track_grad_norm", default=-1),
                 val_check_interval=config.optimization.val_check_interval,
                 check_val_every_n_epoch=config.optimization.check_val_every_n_epoch
                 if hasattr(config.optimization, "check_val_every_n_epoch")

diff --git a/multimodal/src/autogluon/multimodal/utils/__init__.py b/multimodal/src/autogluon/multimodal/utils/__init__.py
@@ -46,7 +46,6 @@
 from .inference import extract_from_output, infer_batch, predict, process_batch, use_realtime
 from .load import CustomUnpickler, load_text_tokenizers
 from .log import LogFilter, apply_log_filter, get_fit_complete_message, get_fit_start_message, make_exp_dir
-from .map import MeanAveragePrecision
 from .matcher import compute_semantic_similarity, convert_data_for_ranking, create_siamese_model, semantic_search
 from .metric import compute_ranking_score, compute_score, get_minmax_mode, get_stopping_threshold, infer_metrics
 from .misc import logits_to_prob, merge_bio_format, shopee_dataset, tensor_to_ndarray, visualize_ner

diff --git a/multimodal/src/autogluon/multimodal/utils/inference.py b/multimodal/src/autogluon/multimodal/utils/inference.py
@@ -487,7 +487,7 @@ def predict(
 
     if num_gpus <= 1:
         # Force set strategy to be None if it's cpu-only or we have only one GPU.
-        strategy = None
+        strategy = "auto"
 
     precision = infer_precision(num_gpus=num_gpus, precision=predictor._config.env.precision, cpu_only_warning=False)