Skip to content
This repository has been archived by the owner on Mar 19, 2024. It is now read-only.

Commit

Permalink
Fixes for release
Browse files Browse the repository at this point in the history
  • Loading branch information
iseessel committed Oct 5, 2021
1 parent 65f2c8d commit b3de180
Show file tree
Hide file tree
Showing 9 changed files with 42 additions and 39 deletions.
3 changes: 3 additions & 0 deletions dev/packaging/apex_conda/README.md
Expand Up @@ -11,6 +11,9 @@ nothing to do here.
2. Go into the `inside` directory and clone apex with
`git clone https://github.com/NVIDIA/apex.git`.

Move to the appropriate commit.
`git checkout 1f2aa9156547377a023932a1512752c392d9bbdf`.

3. You may want to `docker pull pytorch/conda-cuda:latest`.

4. Run `bash go.sh` in this directory. This takes ages
Expand Down
16 changes: 15 additions & 1 deletion dev/packaging/apex_conda/inside/build.py
Expand Up @@ -24,6 +24,9 @@
"1.7.0": ["cu101", "cu102", "cu110"],
"1.7.1": ["cu101", "cu102", "cu110"],
"1.8.0": ["cu101", "cu102", "cu111"],
"1.8.1": ["cu101", "cu102", "cu111"],
"1.9.0": ["cu102", "cu111"],
"1.9.1": ["cu102", "cu111"],
}

CUDA_HOMES = {
Expand Down Expand Up @@ -72,6 +75,15 @@ def pytorch_versions_for_python(python_version):

print()
print("python", python_version, "pytorch", ptv, "cuda", cuv, flush=True)

apex_file_name = (
f"apex-{VERSION}-py{python_version_nodot}_{cuv}_pyt{ptv_nodot}.tar.bz2"
)

if os.path.exists(f"./inside/packaging/{apex_file_name}"):
print(f"Package: {apex_file_name} already found")
continue

args = [
"conda",
"build",
Expand All @@ -87,8 +99,10 @@ def pytorch_versions_for_python(python_version):
if python_version == "3.9" or cuv == "cu111":
args.insert(4, "conda-forge")
args.insert(4, "-c")

subprocess.check_call(args)
file = f"/opt/conda/conda-bld/linux-64/apex-{VERSION}-py{python_version_nodot}_{cuv}_pyt{ptv_nodot}.tar.bz2"

file = f"/opt/conda/conda-bld/linux-64/{apex_file_name}"
shutil.copy(file, "inside/packaging")

print("DONE")
4 changes: 3 additions & 1 deletion dev/packaging/apex_pip/README.md
Expand Up @@ -11,6 +11,9 @@ nothing to do here.
2. Go into the `inside` directory and clone apex with
`git clone https://github.com/NVIDIA/apex.git`.

Move to the appropriate commit.
`git checkout 1f2aa9156547377a023932a1512752c392d9bbdf`.

3. You may want to `docker pull pytorch/conda-cuda:latest`.

4. Run `bash go.sh` in this directory. This takes ages
Expand All @@ -19,7 +22,6 @@ and writes packages to `inside/output`.
5. You can upload the packages to s3, along with basic html files
which enable them to be used, with `bash after.sh`.


In particular, if you are in a jupyter/colab notebook you can
then install using these wheels with the following series of
commands.
Expand Down
4 changes: 4 additions & 0 deletions dev/packaging/apex_pip/inside/a.sh
Expand Up @@ -34,6 +34,9 @@ declare -A CONDA_CUDA_VERSIONS=(
["1.7.0"]="cu101 cu102 cu110"
["1.7.1"]="cu101 cu102 cu110"
["1.8.0"]="cu101 cu102 cu111"
["1.8.1"]="cu101 cu102 cu111"
["1.9.0"]="cu102 cu111"
["1.9.1"]="cu102 cu111"
)

#VERSION=$(python -c "exec(open('${script_dir}/apex/__init__.py').read()); print(__version__)")
Expand Down Expand Up @@ -85,6 +88,7 @@ do
outdir="../output/$tag"
if [[ -d "$outdir" ]]
then
echo "skipping" "$outdir"
continue
fi

Expand Down
5 changes: 5 additions & 0 deletions dev/packaging/vissl_conda/build_all_conda.sh
Expand Up @@ -8,8 +8,13 @@ set -ex

rm -rf dev/packaging/vissl_conda/ClassyVision
git clone https://github.com/facebookresearch/ClassyVision.git dev/packaging/vissl_conda/ClassyVision

rm -rf dev/packaging/vissl_conda/fairscale
git clone https://github.com/facebookresearch/fairscale.git dev/packaging/vissl_conda/fairscale
cd dev/packaging/vissl_conda/fairscale
git reset --hard df7db85cef7f9c30a5b821007754b96eb1f977b6
cd ../../../../

rm -rf classy_vision
cp -r dev/packaging/vissl_conda/ClassyVision/classy_vision classy_vision
rm -rf fairscale
Expand Down
4 changes: 2 additions & 2 deletions dev/packaging/vissl_conda/vissl/meta.yaml
Expand Up @@ -38,7 +38,7 @@ test:
source_files:
- tests
- tools
- dev/run_quick_tests.sh
- dev/run_quick_integration_tests.sh
- configs
requires:
- ca-certificates
Expand All @@ -47,7 +47,7 @@ test:
- tensorboard
commands:
- python -m unittest discover -v -s tests
- ./dev/run_quick_tests.sh
- ./dev/run_quick_integration_tests.sh

build:
string: py{{py}}
Expand Down
34 changes: 1 addition & 33 deletions dev/run_quick_tests.sh 100755 → 100644
Expand Up @@ -43,36 +43,4 @@ popd
# - verify that the associated jobs run to the end
# -----------------------------------------------------------------------------

CFG_LIST=(
"test/integration_test/quick_barlow_twins"
"test/integration_test/quick_deepcluster_v2"
"test/integration_test/quick_pirl"
"test/integration_test/quick_simclr"
"test/integration_test/quick_simclr_efficientnet"
"test/integration_test/quick_simclr_multicrop"
"test/integration_test/quick_simclr_regnet"
"test/integration_test/quick_swav"
)

echo "========================================================================"
echo "Configs to run:"
echo "${CFG_LIST[@]}"
echo "========================================================================"

BINARY="python ${SRC_DIR}/tools/run_distributed_engines.py"

for cfg in "${CFG_LIST[@]}"; do
echo "========================================================================"
echo "Running $cfg ..."
echo "========================================================================"
CHECKPOINT_DIR=$(mktemp -d)
# shellcheck disable=SC2102
# shellcheck disable=SC2086
CUDA_LAUNCH_BLOCKING=1 $BINARY config=$cfg \
config.DATA.TRAIN.DATA_SOURCES=[synthetic] \
hydra.verbose=true \
config.HOOKS.TENSORBOARD_SETUP.USE_TENSORBOARD=true \
config.CHECKPOINT.DIR="$CHECKPOINT_DIR" && echo "TEST OK" || exit

rm -rf $CHECKPOINT_DIR
done
bash "${SRC_DIR}/dev/run_quick_integration_tests.sh"
5 changes: 5 additions & 0 deletions tests/test_tasks.py
Expand Up @@ -7,6 +7,7 @@
import unittest

import pkg_resources
import torch
from parameterized import parameterized
from utils import UNIT_TEST_CONFIGS, SSLHydraConfig
from vissl.engines.train import train_main
Expand Down Expand Up @@ -38,6 +39,10 @@ def test_run(self, config_file_path: str):
pkg_resources.resource_filename(__name__, "test_data")
]

# Destroy process groups as torch may be initialized with NCCL, which
# is incompatible with test_cpu_regnet_moco.yaml
torch.distributed.destroy_process_group()

# run training and make sure no exception is raised
dist_run_id = get_dist_run_id(config, config.DISTRIBUTED.NUM_NODES)
train_main(
Expand Down
6 changes: 4 additions & 2 deletions vissl/utils/test_utils.py
Expand Up @@ -29,8 +29,10 @@ def in_temporary_directory(enabled: bool = True):
old_cwd = os.getcwd()
with tempfile.TemporaryDirectory() as temp_dir:
os.chdir(temp_dir)
yield temp_dir
os.chdir(old_cwd)
try:
yield temp_dir
finally:
os.chdir(old_cwd)
else:
yield os.getcwd()

Expand Down

0 comments on commit b3de180

Please sign in to comment.