Upgrading to torch 2.0 (#5005)

* first attempt * installing torch GPU resourcs to pass website build * ValueError to RuntimeError * Metric unused lint error * remvoed the codevec * cuda upgrade * docker image for GPU tests * uncomment * debug schema * more on gpu * undocumented image * the pyenv version * setup cuda * right CUDA version * cache change * python3.8 * adjusted the tolerances for the new torch and cuda versions * Added the dependancy issue resolver to the readme for install * pinned torch packages * the config * relaxing the torch version a bit * torch again * torch==2.0.0+cu117 * torch vision * unpinned the torchvision * another try * py3.9 * python 39 in CircleCI names * torch==2.0.0 * installtorchcpu * python -m pip install --progress-bar off pytorch-pretrained-bert * python 3.9 * python 3.9 * sphinx upgrade * python3.9-dev * cache bump * importlib-metadata * version again * sphinx references * 0.15.1+cu118 * using MyST format * myst * image link * /docs
facebookresearch · Apr 19, 2023 · ad16d39 · ad16d39
1 parent 63de0a9
commit ad16d39
Show file tree

Hide file tree

Showing 12 changed files with 79 additions and 121 deletions.
diff --git a/.circleci/config.yml b/.circleci/config.yml
@@ -4,58 +4,41 @@ version: 2.1
 # environments where we run our jobs
 # -------------------------------------------------------------------------------------
 executors:
-  standard_cpu38:
+  standard_cpu39:
     docker:
-      - image: cimg/python:3.8.0
+      - image: cimg/python:3.9.0
     environment:
       PYTHONUNBUFFERED: 1
     resource_class: xlarge
 
-  small_cpu38:
+  small_cpu39:
     docker:
-      - image: cimg/python:3.8.0
+      - image: cimg/python:3.9.0
     environment:
       PYTHONUNBUFFERED: 1
     resource_class: medium
 
-  small_cpu38_conda:
-    docker:
-      - image: continuumio/miniconda3
-    environment:
-      PYTHONUNBUFFERED: 1
-    resource_class: medium
-
-  osx_cpu38:
+  osx_cpu39:
     macos:
       # https://circleci.com/docs/2.0/testing-ios/
       xcode: "12.5.1"
     environment:
-      PYTHON: 3.8.0
+      PYTHON: 3.9.0
       PYTHONUNBUFFERED: 1
       HOMEBREW_NO_AUTO_UPDATE: 1
     resource_class: macos.x86.medium.gen2
 
   gpu_small:
-    environment:
-      CUDA_VERSION: "11.2"
-      PYTHONUNBUFFERED: 1
-      CUDA_HOME: /usr/local/cuda-11.2
     machine:
-       image: ubuntu-2004-cuda-11.2:202103-01
+       image: linux-cuda-11:2023.02.1
     resource_class: gpu.nvidia.small.multi
 
   gpu_medium:
-    environment:
-      CUDA_VERSION: "11.2"
-      PYTHONUNBUFFERED: 1
-      CUDA_HOME: /usr/local/cuda-11.2
     machine:
-       image: ubuntu-2004-cuda-11.2:202103-01
+       image: linux-cuda-11:2023.02.1
     resource_class: gpu.nvidia.medium.multi
 
 
-
-
 # -------------------------------------------------------------------------------------
 # reusable commands
 # -------------------------------------------------------------------------------------
@@ -76,43 +59,13 @@ commands:
       - run:
           name: Sets up the virtual environment
           command: |
+            if [[ "$OSTYPE" == "linux-gnu"* ]]; then sudo apt update && sudo apt install python3.9 python3.9-dev; fi
             mkdir -p ~/venv
-            virtualenv --python=python3 ~/venv
+            virtualenv --python=python3.9 ~/venv
             echo ". ~/venv/bin/activate" >> $BASH_ENV
             . ~/venv/bin/activate
             python --version
 
-  setup_conda:
-    description: Sets up the virtual environment
-    steps:
-      - run:
-          name: Sets up the virtual environment
-          command: |
-            echo "Started creating the ParlAI conda environment."
-            conda create --quiet --yes --name conda_parlai python=3.8
-            echo "Created the ParlAI conda environment."
-            conda init bash
-            source ~/.bashrc
-            conda deactivate
-            echo "Activating the ParlAI conda environment."
-            conda activate conda_parlai
-            for package in pytorch==1.13.0 torchvision==0.14.0 torchaudio==0.13.0 pytorch-cuda=11.6
-            do
-              echo "Conda installing ${package}."
-              conda install --quiet --yes "${package}" -c pytorch -c nvidia
-            done
-            echo "conda activate conda_parlai" >> $BASH_ENV
-
-  codecov:
-    description: Coverage report
-    steps:
-      - run:
-          name: Coverage report
-          when: always
-          command: |
-            python -m codecov --flags $CIRCLE_JOB
-            mkdir -p ~/ParlAI/data
-
   installdeps:
     description: Install basic dependencies
     steps:
@@ -122,7 +75,6 @@ commands:
             pip install --upgrade pip
             for i in $(seq 1 3); do python -m pip install --progress-bar off pip setuptools && s=0 && break || s=$? && sleep 10; done; (exit $s)
             for i in $(seq 1 3); do python -m pip install --progress-bar off coverage && s=0 && break || s=$? && sleep 10; done; (exit $s)
-            for i in $(seq 1 3); do python -m pip install --progress-bar off codecov && s=0 && break || s=$? && sleep 10; done; (exit $s)
             mkdir -p ~/ParlAI/test-results
             for i in $(seq 1 3); do pip install -v -r requirements.txt && s=0 && break || s=$? && sleep 10; done; (exit $s)
             for i in $(seq 1 3); do python setup.py develop && s=0 && break || s=$? && sleep 10; done; (exit $s)
@@ -138,7 +90,7 @@ commands:
       - run:
           name: Install torch GPU and dependencies
           command: |
-            python -m pip install --progress-bar off torch==1.13.1 torchvision==0.14.1 torchaudio==0.13.1
+            python -m pip install --progress-bar off torch==2.0.0 torchvision==0.15.1+cu118 torchaudio==2.0.0 --index-url https://download.pytorch.org/whl/cu118
             python -m pip install --progress-bar off 'fairscale~=0.4.0'
             python -m pip install --progress-bar off pytorch-pretrained-bert
             python -m pip install --progress-bar off 'transformers==4.20'
@@ -157,7 +109,7 @@ commands:
           name: Install torch CPU and dependencies
           command: |
             python -m pip install --progress-bar off 'transformers==4.20'
-            python -m pip install --progress-bar off 'torch==1.13.1'
+            python -m pip install --progress-bar off torch==2.0.0 torchvision==0.15.1 torchaudio==2.0.0
             python -c 'import torch; print("Torch version:", torch.__version__)'
             python -m torch.utils.collect_env
 
@@ -167,8 +119,9 @@ commands:
       - run:
           name: Install torch CPU and dependencies
           command: |
-            python -m pip install --progress-bar off 'torch==1.13.1' 'torchvision==0.14.1' 'torchaudio==0.13.1'
+            python -m pip install --progress-bar off torch==2.0.0 torchvision==0.15.1 torchaudio==2.0.0 --index-url https://download.pytorch.org/whl/cpu
             python -m pip install --progress-bar off 'transformers==4.20'
+            python -m pip install --progress-bar off pytorch-pretrained-bert
             python -c 'import torch; print("Torch version:", torch.__version__)'
             python -m torch.utils.collect_env
 
@@ -200,9 +153,12 @@ commands:
           name: Setup CUDA
           working_directory: ~/
           command: |
-            pyenv versions
             nvidia-smi
-            pyenv global 3.9.2
+            sudo update-alternatives --set cuda /usr/local/cuda-11.8
+            nvcc --version
+            PYTHON3_VENV=$(pyenv versions | grep '3\..*' | cut -f 2 -d ' ')
+            echo "pyenv found Python ${PYTHON3_VENV}"
+            pyenv global $PYTHON3_VENV
 
   findtests:
     description: Find tests to run
@@ -270,7 +226,6 @@ commands:
           key: data-20230404-<< parameters.cachename >>-{{ checksum "teststorun.txt" }}
           paths:
             - "~/ParlAI/data"
-      - codecov
       - store_test_results:
           path: test-results
 
@@ -287,6 +242,7 @@ commands:
           key: deps-20230404-bw-{{ checksum "requirements.txt" }}
       - setup
       - installdeps
+      - installtorchcpu
       - save_cache:
           key: deps-20230404-bw-{{ checksum "requirements.txt" }}
           paths:
@@ -328,8 +284,8 @@ commands:
 # Actual jobs
 # -------------------------------------------------------------------------------------
 jobs:
-  cleaninstall_38:
-    executor: small_cpu38
+  cleaninstall_39:
+    executor: small_cpu39
     working_directory: ~/ParlAI
     parallelism: 1
     steps:
@@ -346,23 +302,23 @@ jobs:
             parlai display_data -t integration_tests
 
   unittests_osx:
-    executor: osx_cpu38
+    executor: osx_cpu39
     working_directory: ~/ParlAI
     parallelism: 2
     steps:
       - runtests:
           cachename: osx
           marker: unit
 
-  unittests_38:
-    executor: standard_cpu38
+  unittests_39:
+    executor: standard_cpu39
     working_directory: ~/ParlAI
     parallelism: 2
     steps:
       - runtests:
           more_installs:
             - installtorchcpu
-          cachename: ut38
+          cachename: ut39
           marker: unit
 
   unittests_gpu18:
@@ -391,7 +347,7 @@ jobs:
           pytest_flags: -v -s
 
   crowdsourcing_tests:
-    executor: small_cpu38
+    executor: small_cpu39
     working_directory: ~/ParlAI
     parallelism: 1
     steps:
@@ -403,7 +359,7 @@ jobs:
             - installcrowdsourcingdeps
 
   teacher_tests:
-    executor: standard_cpu38
+    executor: standard_cpu39
     working_directory: ~/ParlAI
     parallelism: 16
     steps:
@@ -416,22 +372,22 @@ jobs:
           pytest_flags: -v -s
 
   build_website:
-    executor: small_cpu38
+    executor: small_cpu39
     working_directory: ~/ParlAI
     parallelism: 1
     steps:
       - website:
           deploy: false
 
   deploy_website:
-    executor: small_cpu38
+    executor: small_cpu39
     working_directory: ~/ParlAI
     steps:
       - website:
           deploy: true
 
   test_website:
-    executor: small_cpu38
+    executor: small_cpu39
     working_directory: ~/ParlAI
     steps:
       - run:
@@ -454,23 +410,23 @@ workflows:
   version: 2
   commit:
     jobs:
-      - cleaninstall_38
+      - cleaninstall_39
       - unittests_gpu18:
           requires:
-            - unittests_38
+            - unittests_39
       - unittests_osx:
           requires:
-            - unittests_38
-      - unittests_38
+            - unittests_39
+      - unittests_39
       - long_gpu_tests:
           requires:
-            - unittests_38
+            - unittests_39
       - crowdsourcing_tests:
           requires:
-            - unittests_38
+            - unittests_39
       - teacher_tests:
           requires:
-            - unittests_38
+            - unittests_39
       - build_website:
           filters:
             branches:

diff --git a/.github/workflows/lint.yml b/.github/workflows/lint.yml
@@ -20,7 +20,7 @@ jobs:
       - name: Setup Python
         uses: actions/setup-python@v1
         with:
-          python-version: 3.8.x
+          python-version: 3.9.x
           architecture: x64
       - name: Fetch ParlAI
         uses: actions/checkout@v2
@@ -64,7 +64,7 @@ jobs:
       - name: Setup Python
         uses: actions/setup-python@v1
         with:
-          python-version: 3.8.x
+          python-version: 3.9.x
           architecture: x64
       - name: Fetch ParlAI
         uses: actions/checkout@v2
@@ -89,7 +89,7 @@ jobs:
           pip install -q -r requirements.txt
           pip install -q mypy mypy-extensions
           pip install -q git+https://github.com/numpy/numpy-stubs.git
-          pip install -q torch==1.5.1+cpu torchvision==0.6.1+cpu -f https://download.pytorch.org/whl/torch_stable.html
+          pip install -q torch torchvision torchaudio --index-url https://download.pytorch.org/whl/cpu
       - name: mypy
         run: |
           set -eux

diff --git a/README.md b/README.md
@@ -22,7 +22,7 @@
     <img src="https://img.shields.io/twitter/follow/parlai_parley?label=Twitter&style=social" alt="Twitter" />
   </a>
  </p>
- 
+
 -------------------------------------------------------------------------------------------------------------------------------------------------------
 
 [ParlAI](http://parl.ai) (pronounced “par-lay”) is a python framework for
@@ -84,6 +84,12 @@ git clone https://github.com/facebookresearch/ParlAI.git ~/ParlAI
 cd ~/ParlAI; python setup.py develop
 ```
 
+> **Note**
+> Sometimes the install from source maynot work due to dependencies (specially in PyTorch related packaged).
+> In that case try building a fresh conda environment and running the similar to the following:
+> `conda install pytorch==2.0.0 torchvision torchaudio torchtext pytorch-cuda=11.8 -c pytorch -c nvidia`.
+> Check torch setup documentation for your CUDA and OS versions.
+
 All needed data will be downloaded to `~/ParlAI/data`. If you need to clear out
 the space used by these files, you can safely delete these directories and any
 files needed will be downloaded again.

diff --git a/docs/source/conf.py b/docs/source/conf.py
@@ -61,7 +61,7 @@
 
 # General information about the project.
 project = 'ParlAI'
-copyright = '2020, Facebook AI Research'
+copyright = '2023, Facebook AI Research'
 author = 'Facebook AI'
 
 # The version info for the project you're documenting, acts as replacement for
@@ -75,10 +75,7 @@
 
 # The language for content autogenerated by Sphinx. Refer to documentation
 # for a list of supported languages.
-#
-# This is also used if you do content translation via gettext catalogs.
-# Usually you set "language" from the command line for these cases.
-language = None
+language = 'en'
 
 # List of patterns, relative to source directory, that match files and
 # directories to ignore when looking for source files.

diff --git a/docs/source/core/messages.md b/docs/source/core/messages.md
@@ -4,7 +4,7 @@
 .. automodule:: parlai.core.message
   :members:
 ```
-<img src="/_static/img/act-obs-dict.png" width="60%"/>
+<img src="/docs/_static/img/act-obs-dict.png" width="60%"/>
 
 The primary medium for information flow (messages between agents and the environment)
 in ParlAI is a Message, a subclass of a python `dict` containing the actions of an agent

diff --git a/docs/source/tutorial_metrics.md b/docs/source/tutorial_metrics.md
@@ -408,8 +408,8 @@ return message. This is a dictionary which maps field name to a metric value.
 When the teacher receives the response from the model, it utilizes the metrics
 field to update counters on its side.
 
+(list-of-metrics)=
 ## List of Metrics
-
 Below is a list of metrics and a brief explanation of each.
 
 :::{note} List of metrics