Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add pytorch 1.10.0 to test space, remove 1.6.0 #3291

Merged
merged 6 commits into from
Dec 11, 2021
Merged
Show file tree
Hide file tree
Changes from 3 commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
36 changes: 18 additions & 18 deletions .buildkite/gen-pipeline.sh
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@ set -eu
repository=823773083436.dkr.ecr.us-east-1.amazonaws.com/buildkite

# our baseline test is
baseline="test-cpu-gloo-py3_8-tf2_6_0-keras2_6_0-torch1_9_0-mxnet1_8_0_p0-pyspark3_2_0"
baseline="test-cpu-gloo-py3_8-tf2_6_0-keras2_6_0-torch1_10_0-mxnet1_8_0_p0-pyspark3_2_0"
# in run_gloo_integration we run 'Elastic Spark * Tests' for this baseline
# so it has to have Gloo mpi kind

Expand All @@ -17,48 +17,48 @@ code_files=$(python "$dir/get_changed_code_files.py" || echo failure)
tests=$(if [[ -n "${PIPELINE_MODE:-}" ]] && ( [[ "${BUILDKITE_BRANCH:-}" == "${BUILDKITE_PIPELINE_DEFAULT_BRANCH:-}" ]] || [[ -n "$code_files" ]] ); then
# we vary the baseline along the Python dimension and PySpark together
# run_gloo_integration expects these to have Gloo mpi kind to run 'Elastic Spark * Tests'
printf "test-cpu-gloo-py3_7-tf2_6_0-keras2_6_0-torch1_9_0-mxnet1_8_0_p0-pyspark2_4_8 "
printf "test-cpu-gloo-py3_8-tf2_6_0-keras2_6_0-torch1_9_0-mxnet1_8_0_p0-pyspark3_1_2 "
printf "test-cpu-gloo-py3_7-tf2_6_0-keras2_6_0-torch1_10_0-mxnet1_8_0_p0-pyspark2_4_8 "
printf "test-cpu-gloo-py3_8-tf2_6_0-keras2_6_0-torch1_10_0-mxnet1_8_0_p0-pyspark3_1_2 "
# our baseline
printf "$baseline "

# then we vary the baseline along mpi kinds dimension
# our baseline again
# printf "test-cpu-gloo-py3_8-tf2_6_0-keras2_6_0-torch1_9_0-mxnet1_8_0_p0-pyspark3_2_0 "
printf "test-cpu-mpich-py3_8-tf2_6_0-keras2_6_0-torch1_9_0-mxnet1_8_0_p0-pyspark3_2_0 "
printf "test-cpu-oneccl-py3_8-tf2_6_0-keras2_6_0-torch1_9_0-mxnet1_8_0_p0-pyspark3_2_0 "
printf "test-cpu-openmpi-py3_8-tf2_6_0-keras2_6_0-torch1_9_0-mxnet1_8_0_p0-pyspark3_2_0 "
# printf "test-cpu-gloo-py3_8-tf2_6_0-keras2_6_0-torch1_10_0-mxnet1_8_0_p0-pyspark3_2_0 "
printf "test-cpu-mpich-py3_8-tf2_6_0-keras2_6_0-torch1_10_0-mxnet1_8_0_p0-pyspark3_2_0 "
printf "test-cpu-oneccl-py3_8-tf2_6_0-keras2_6_0-torch1_10_0-mxnet1_8_0_p0-pyspark3_2_0 "
printf "test-cpu-openmpi-py3_8-tf2_6_0-keras2_6_0-torch1_10_0-mxnet1_8_0_p0-pyspark3_2_0 "
# note: we test openmpi-gloo mpi kind in this variation in each of [cpu, gpu, mixed]
printf "test-cpu-openmpi-gloo-py3_8-tf2_6_0-keras2_6_0-torch1_9_0-mxnet1_8_0_p0-pyspark3_2_0 "
printf "test-cpu-openmpi-gloo-py3_8-tf2_6_0-keras2_6_0-torch1_10_0-mxnet1_8_0_p0-pyspark3_2_0 "

# then we vary the baseline along the framework dimensions all together
# some frameworks are not available for our baseline Python version 3.8, so we use Python 3.7
# run_gloo_integration expects tf1 to have Gloo mpi kind to run 'Elastic Spark * Tests'
printf "test-cpu-gloo-py3_7-tf1_15_5-keras2_2_4-torch1_6_0-mxnet1_5_1_p0-pyspark3_2_0 "
printf "test-cpu-gloo-py3_7-tf1_15_5-keras2_2_4-torch1_7_1-mxnet1_5_1_p0-pyspark3_2_0 "
# there is no mxnet-1.6.0.post0 and mxnet-1.6.0 does not work with horovod
# https://github.com/apache/incubator-mxnet/issues/16193
# however, there is an mxnet-cu101-1.6.0.post0, so we test this with gpu instead of cpu
#printf "test-cpu-gloo-py3_8-tf2_4_3-keras2_3_1-torch1_7_1-mxnet1_6_0_p0-pyspark3_2_0 "
printf "test-cpu-gloo-py3_8-tf2_5_1-keras2_4_3-torch1_8_1-mxnet1_7_0_p2-pyspark3_2_0 "
#printf "test-cpu-gloo-py3_8-tf2_4_3-keras2_3_1-torch1_8_1-mxnet1_6_0_p0-pyspark3_2_0 "
printf "test-cpu-gloo-py3_8-tf2_5_1-keras2_4_3-torch1_9_1-mxnet1_7_0_p2-pyspark3_2_0 "
# our baseline again
# printf "test-cpu-gloo-py3_8-tf2_6_0-keras2_6_0-torch1_9_0-mxnet1_8_0_p0-pyspark3_2_0 "
# printf "test-cpu-gloo-py3_8-tf2_6_0-keras2_6_0-torch1_10_0-mxnet1_8_0_p0-pyspark3_2_0 "
printf "test-cpu-gloo-py3_8-tfhead-keras_none-torchhead-mxnethead-pyspark3_2_0 "

# then we vary the frameworks for gpu
printf "test-gpu-gloo-py3_7-tf1_15_5-keras2_2_4-torch1_6_0-mxnet1_5_1_p0-pyspark3_2_0 "
printf "test-gpu-gloo-py3_7-tf1_15_5-keras2_2_4-torch1_7_1-mxnet1_5_1_p0-pyspark3_2_0 "
# this is required as we cannot test mxnet-1.6.0.post0 with cpu
printf "test-gpu-gloo-py3_8-tf2_4_3-keras2_3_1-torch1_7_1-mxnet1_6_0_p0-pyspark3_2_0 "
printf "test-gpu-gloo-py3_8-tf2_4_3-keras2_3_1-torch1_8_1-mxnet1_6_0_p0-pyspark3_2_0 "
# we additionally test the previous framework combination (CUDA 10.x) with mxnet 1.7.x
# as mxnet 1.7.x only supports CUDA 10.x, but next framework combination targets CUDA 11.x
printf "test-gpu-gloo-py3_8-tf2_4_3-keras2_3_1-torch1_7_1-mxnet1_7_0_p1-pyspark3_2_0 "
printf "test-gpu-gloo-py3_8-tf2_4_3-keras2_3_1-torch1_8_1-mxnet1_7_0_p1-pyspark3_2_0 "
# we deviate from mxnet1_7_0_p2 here as other frameworks target CUDA 11.x and
# mxnet 1.7.x only supports CUDA 10.x, with mxnet 1.8.x we have CUDA 11.x packages
printf "test-gpu-gloo-py3_8-tf2_5_1-keras2_4_3-torch1_8_1-mxnet1_8_0_p0-pyspark3_2_0 "
printf "test-gpu-openmpi-gloo-py3_8-tf2_6_0-keras2_6_0-torch1_9_0-mxnet1_8_0_p0-pyspark3_2_0 "
printf "test-gpu-gloo-py3_8-tf2_5_1-keras2_4_3-torch1_9_1-mxnet1_8_0_p0-pyspark3_2_0 "
printf "test-gpu-openmpi-gloo-py3_8-tf2_6_0-keras2_6_0-torch1_10_0-mxnet1_8_0_p0-pyspark3_2_0 "
printf "test-gpu-gloo-py3_8-tfhead-keras_none-torchhead-mxnethead-pyspark3_2_0 "

# and one final test with mixed cpu+gpu
printf "test-mixed-openmpi-gloo-py3_8-tf2_6_0-keras2_6_0-torch1_9_0-mxnet1_8_0_p0-pyspark3_2_0 "
printf "test-mixed-openmpi-gloo-py3_8-tf2_6_0-keras2_6_0-torch1_10_0-mxnet1_8_0_p0-pyspark3_2_0 "
fi | if [[ "${PIPELINE_MODE:-}" == "GPU"* ]]; then sed -E "s/[^ ]*-cpu-[^ ]*//g"; else cat; fi \
| if [[ "${PIPELINE_MODE:-}" == "GPU HEADS" ]]; then sed -E "s/ /\n/g" | grep -e "-tfhead-keras_none-torchhead-mxnethead-" | paste -s -d " " -; else cat; fi \
| if [[ "${PIPELINE_MODE:-}" == "GPU NON HEADS" ]]; then sed -E "s/[^ ]*-tfhead-keras_none-torchhead-mxnethead-[^ ]*//g"; else cat; fi)
Expand Down
28 changes: 14 additions & 14 deletions .github/workflows/ci.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -164,7 +164,7 @@ jobs:
fail-fast: false
matrix:
include:
- image: test-cpu-gloo-py3_7-tf1_15_5-keras2_2_4-torch1_6_0-mxnet1_5_1_p0-pyspark3_2_0
- image: test-cpu-gloo-py3_7-tf1_15_5-keras2_2_4-torch1_7_1-mxnet1_5_1_p0-pyspark3_2_0
Elastic_Spark_TensorFlow_Tests_2: true
Elastic_Tests_2: true
Gloo_Cluster_PyTests: true
Expand All @@ -185,7 +185,7 @@ jobs:
Spark_Torch_MNIST: true
build_timeout: 30

- image: test-cpu-gloo-py3_7-tf2_6_0-keras2_6_0-torch1_9_0-mxnet1_8_0_p0-pyspark2_4_8
- image: test-cpu-gloo-py3_7-tf2_6_0-keras2_6_0-torch1_10_0-mxnet1_8_0_p0-pyspark2_4_8
Elastic_Spark_TensorFlow_Tests_1: true
Elastic_Spark_Torch_Tests: true
Elastic_Tests_1: true
Expand All @@ -203,7 +203,7 @@ jobs:
Spark_Torch_MNIST: true
build_timeout: 30

- image: test-cpu-gloo-py3_8-tf2_5_1-keras2_4_3-torch1_8_1-mxnet1_7_0_p2-pyspark3_2_0
- image: test-cpu-gloo-py3_8-tf2_5_1-keras2_4_3-torch1_9_1-mxnet1_7_0_p2-pyspark3_2_0
Elastic_Tests_1: true
Gloo_Cluster_PyTests: true
Gloo_MXNet_MNIST: true
Expand All @@ -219,7 +219,7 @@ jobs:
Spark_Torch_MNIST: true
build_timeout: 30

- image: test-cpu-gloo-py3_8-tf2_6_0-keras2_6_0-torch1_9_0-mxnet1_8_0_p0-pyspark3_1_2
- image: test-cpu-gloo-py3_8-tf2_6_0-keras2_6_0-torch1_10_0-mxnet1_8_0_p0-pyspark3_1_2
Elastic_Spark_TensorFlow_Tests_1: true
Elastic_Spark_Torch_Tests: true
Elastic_Tests_1: true
Expand All @@ -237,7 +237,7 @@ jobs:
Spark_Torch_MNIST: true
build_timeout: 30

- image: test-cpu-gloo-py3_8-tf2_6_0-keras2_6_0-torch1_9_0-mxnet1_8_0_p0-pyspark3_2_0
- image: test-cpu-gloo-py3_8-tf2_6_0-keras2_6_0-torch1_10_0-mxnet1_8_0_p0-pyspark3_2_0
Elastic_Spark_TensorFlow_Tests_1: true
Elastic_Spark_Torch_Tests: true
Elastic_Tests_1: true
Expand All @@ -255,7 +255,7 @@ jobs:
Spark_Torch_MNIST: true
build_timeout: 30

- image: test-cpu-mpich-py3_8-tf2_6_0-keras2_6_0-torch1_9_0-mxnet1_8_0_p0-pyspark3_2_0
- image: test-cpu-mpich-py3_8-tf2_6_0-keras2_6_0-torch1_10_0-mxnet1_8_0_p0-pyspark3_2_0
MPI_Cluster_PyTests: true
MPI_MXNet_MNIST: true
MPI_Parallel_PyTests: true
Expand All @@ -267,7 +267,7 @@ jobs:
Single_PyTorch_MNIST: true
build_timeout: 30

- image: test-cpu-openmpi-gloo-py3_8-tf2_6_0-keras2_6_0-torch1_9_0-mxnet1_8_0_p0-pyspark3_2_0
- image: test-cpu-openmpi-gloo-py3_8-tf2_6_0-keras2_6_0-torch1_10_0-mxnet1_8_0_p0-pyspark3_2_0
Elastic_Tests_1: true
Gloo_Cluster_PyTests: true
Gloo_MXNet_MNIST: true
Expand All @@ -291,7 +291,7 @@ jobs:
Spark_Torch_MNIST: true
build_timeout: 30

- image: test-cpu-openmpi-py3_8-tf2_6_0-keras2_6_0-torch1_9_0-mxnet1_8_0_p0-pyspark3_2_0
- image: test-cpu-openmpi-py3_8-tf2_6_0-keras2_6_0-torch1_10_0-mxnet1_8_0_p0-pyspark3_2_0
MPI_Cluster_PyTests: true
MPI_MXNet_MNIST: true
MPI_Parallel_PyTests: true
Expand All @@ -307,22 +307,22 @@ jobs:
Spark_Torch_MNIST: true
build_timeout: 30

- image: test-gpu-gloo-py3_7-tf1_15_5-keras2_2_4-torch1_6_0-mxnet1_5_1_p0-pyspark3_2_0
- image: test-gpu-gloo-py3_7-tf1_15_5-keras2_2_4-torch1_7_1-mxnet1_5_1_p0-pyspark3_2_0
build_timeout: 40

- image: test-gpu-gloo-py3_8-tf2_4_3-keras2_3_1-torch1_7_1-mxnet1_6_0_p0-pyspark3_2_0
- image: test-gpu-gloo-py3_8-tf2_4_3-keras2_3_1-torch1_8_1-mxnet1_6_0_p0-pyspark3_2_0
build_timeout: 40

- image: test-gpu-gloo-py3_8-tf2_4_3-keras2_3_1-torch1_7_1-mxnet1_7_0_p1-pyspark3_2_0
- image: test-gpu-gloo-py3_8-tf2_4_3-keras2_3_1-torch1_8_1-mxnet1_7_0_p1-pyspark3_2_0
build_timeout: 40

- image: test-gpu-gloo-py3_8-tf2_5_1-keras2_4_3-torch1_8_1-mxnet1_8_0_p0-pyspark3_2_0
- image: test-gpu-gloo-py3_8-tf2_5_1-keras2_4_3-torch1_9_1-mxnet1_8_0_p0-pyspark3_2_0
build_timeout: 40

- image: test-gpu-openmpi-gloo-py3_8-tf2_6_0-keras2_6_0-torch1_9_0-mxnet1_8_0_p0-pyspark3_2_0
- image: test-gpu-openmpi-gloo-py3_8-tf2_6_0-keras2_6_0-torch1_10_0-mxnet1_8_0_p0-pyspark3_2_0
build_timeout: 40

- image: test-mixed-openmpi-gloo-py3_8-tf2_6_0-keras2_6_0-torch1_9_0-mxnet1_8_0_p0-pyspark3_2_0
- image: test-mixed-openmpi-gloo-py3_8-tf2_6_0-keras2_6_0-torch1_10_0-mxnet1_8_0_p0-pyspark3_2_0
build_timeout: 40

steps:
Expand Down
72 changes: 36 additions & 36 deletions docker-compose.test.yml
Original file line number Diff line number Diff line change
Expand Up @@ -11,9 +11,9 @@ services:
PYTHON_VERSION: 3.8
TENSORFLOW_PACKAGE: tensorflow-cpu==2.6.0
KERAS_PACKAGE: keras==2.6.0
PYTORCH_PACKAGE: torch==1.9.0+cpu
PYTORCH_PACKAGE: torch==1.10.0+cpu
PYTORCH_LIGHTNING_PACKAGE: pytorch-lightning==1.3.8
TORCHVISION_PACKAGE: torchvision==0.10.0+cpu
TORCHVISION_PACKAGE: torchvision==0.11.1+cpu
MXNET_PACKAGE: mxnet==1.8.0.post0
PYSPARK_PACKAGE: pyspark==3.2.0
SPARK_PACKAGE: spark-3.2.0/spark-3.2.0-bin-hadoop2.7.tgz
Expand All @@ -22,57 +22,57 @@ services:
shm_size: 8gb

# our baseline first
test-cpu-gloo-py3_8-tf2_6_0-keras2_6_0-torch1_9_0-mxnet1_8_0_p0-pyspark3_2_0:
test-cpu-gloo-py3_8-tf2_6_0-keras2_6_0-torch1_10_0-mxnet1_8_0_p0-pyspark3_2_0:
extends: test-cpu-base
test-cpu-mpich-py3_8-tf2_6_0-keras2_6_0-torch1_9_0-mxnet1_8_0_p0-pyspark3_2_0:
test-cpu-mpich-py3_8-tf2_6_0-keras2_6_0-torch1_10_0-mxnet1_8_0_p0-pyspark3_2_0:
extends: test-cpu-base
build:
args:
MPI_KIND: MPICH
HOROVOD_BUILD_FLAGS: HOROVOD_WITHOUT_GLOO=1
test-cpu-oneccl-py3_8-tf2_6_0-keras2_6_0-torch1_9_0-mxnet1_8_0_p0-pyspark3_2_0:
test-cpu-oneccl-py3_8-tf2_6_0-keras2_6_0-torch1_10_0-mxnet1_8_0_p0-pyspark3_2_0:
extends: test-cpu-base
build:
args:
MPI_KIND: ONECCL
HOROVOD_BUILD_FLAGS: HOROVOD_WITHOUT_GLOO=1
test-cpu-openmpi-py3_8-tf2_6_0-keras2_6_0-torch1_9_0-mxnet1_8_0_p0-pyspark3_2_0:
test-cpu-openmpi-py3_8-tf2_6_0-keras2_6_0-torch1_10_0-mxnet1_8_0_p0-pyspark3_2_0:
extends: test-cpu-base
build:
args:
MPI_KIND: OpenMPI
HOROVOD_BUILD_FLAGS: HOROVOD_WITHOUT_GLOO=1
test-cpu-openmpi-gloo-py3_8-tf2_6_0-keras2_6_0-torch1_9_0-mxnet1_8_0_p0-pyspark3_2_0:
test-cpu-openmpi-gloo-py3_8-tf2_6_0-keras2_6_0-torch1_10_0-mxnet1_8_0_p0-pyspark3_2_0:
extends: test-cpu-base
build:
args:
MPI_KIND: OpenMPI

test-cpu-gloo-py3_7-tf1_15_5-keras2_2_4-torch1_6_0-mxnet1_5_1_p0-pyspark3_2_0:
test-cpu-gloo-py3_7-tf1_15_5-keras2_2_4-torch1_7_1-mxnet1_5_1_p0-pyspark3_2_0:
extends: test-cpu-base
build:
args:
PYTHON_VERSION: 3.7
# there is no tensorflow-cpu>1.15.0, so we use tensorflow==1.15.5
TENSORFLOW_PACKAGE: tensorflow==1.15.5
KERAS_PACKAGE: keras==2.2.4
PYTORCH_PACKAGE: torch==1.6.0+cpu
PYTORCH_PACKAGE: torch==1.7.1+cpu
PYTORCH_LIGHTNING_PACKAGE: pytorch_lightning==1.3.8
TORCHVISION_PACKAGE: torchvision==0.7.0+cpu
TORCHVISION_PACKAGE: torchvision==0.8.2+cpu
MXNET_PACKAGE: mxnet==1.5.1.post0
# there is no mxnet-1.6.0.post0 and mxnet-1.6.0 does not work with horovod
# https://github.com/apache/incubator-mxnet/issues/16193
# however, there is an mxnet-cu101-1.6.0.post0, so we test this with gpu instead of cpu
# this cpu test variation is defined as gpu in gpu frameworks variations below
# test-cpu-gloo-py3_8-tf2_4_3-keras2_3_1-torch1_7_1-mxnet1_6_0_p0-pyspark3_2_0:
test-cpu-gloo-py3_8-tf2_5_1-keras2_4_3-torch1_8_1-mxnet1_7_0_p2-pyspark3_2_0:
# test-cpu-gloo-py3_8-tf2_4_3-keras2_3_1-torch1_8_1-mxnet1_6_0_p0-pyspark3_2_0:
test-cpu-gloo-py3_8-tf2_5_1-keras2_4_3-torch1_9_1-mxnet1_7_0_p2-pyspark3_2_0:
extends: test-cpu-base
build:
args:
TENSORFLOW_PACKAGE: tensorflow==2.5.1
KERAS_PACKAGE: keras==2.4.3
PYTORCH_PACKAGE: torch==1.8.1+cpu
TORCHVISION_PACKAGE: torchvision==0.9.1
PYTORCH_PACKAGE: torch==1.9.1+cpu
TORCHVISION_PACKAGE: torchvision==0.10.1
MXNET_PACKAGE: mxnet==1.7.0.post2
# then our baseline again, omitted ...
test-cpu-gloo-py3_8-tfhead-keras_none-torchhead-mxnethead-pyspark3_2_0:
Expand All @@ -86,14 +86,14 @@ services:
PYTORCH_LIGHTNING_PACKAGE: pytorch_lightning
MXNET_PACKAGE: mxnet-nightly

test-cpu-gloo-py3_7-tf2_6_0-keras2_6_0-torch1_9_0-mxnet1_8_0_p0-pyspark2_4_8:
test-cpu-gloo-py3_7-tf2_6_0-keras2_6_0-torch1_10_0-mxnet1_8_0_p0-pyspark2_4_8:
extends: test-cpu-base
build:
args:
PYTHON_VERSION: 3.7
PYSPARK_PACKAGE: pyspark==2.4.8
SPARK_PACKAGE: spark-2.4.8/spark-2.4.8-bin-hadoop2.7.tgz
test-cpu-gloo-py3_8-tf2_6_0-keras2_6_0-torch1_9_0-mxnet1_8_0_p0-pyspark3_1_2:
test-cpu-gloo-py3_8-tf2_6_0-keras2_6_0-torch1_10_0-mxnet1_8_0_p0-pyspark3_1_2:
extends: test-cpu-base
build:
args:
Expand Down Expand Up @@ -123,8 +123,8 @@ services:
shm_size: 8gb

# okay to mix cuda 10.0 and 10.1 here as pytorch ships its own cuda libs
# torch==1.6.0+cu101 requires torchvision==0.7.0+cu101
test-gpu-gloo-py3_7-tf1_15_5-keras2_2_4-torch1_6_0-mxnet1_5_1_p0-pyspark3_2_0:
# torch==1.7.1+cu101 requires torchvision==0.8.2+cu101
test-gpu-gloo-py3_7-tf1_15_5-keras2_2_4-torch1_7_1-mxnet1_5_1_p0-pyspark3_2_0:
extends: test-gpu-base
build:
args:
Expand All @@ -134,12 +134,12 @@ services:
PYTHON_VERSION: 3.7
TENSORFLOW_PACKAGE: tensorflow-gpu==1.15.5
KERAS_PACKAGE: keras==2.2.4
PYTORCH_PACKAGE: torch==1.6.0+cu101
PYTORCH_PACKAGE: torch==1.7.1+cu101
PYTORCH_LIGHTNING_PACKAGE: pytorch_lightning==1.3.8
TORCHVISION_PACKAGE: torchvision==0.7.0+cu101
TORCHVISION_PACKAGE: torchvision==0.8.2+cu101
MXNET_PACKAGE: mxnet-cu100==1.5.1.post0
# this is required as we cannot test mxnet-1.6.0.post0 with cpu
test-gpu-gloo-py3_8-tf2_4_3-keras2_3_1-torch1_7_1-mxnet1_6_0_p0-pyspark3_2_0:
test-gpu-gloo-py3_8-tf2_4_3-keras2_3_1-torch1_8_1-mxnet1_6_0_p0-pyspark3_2_0:
extends: test-gpu-base
build:
args:
Expand All @@ -148,13 +148,13 @@ services:
NCCL_VERSION_OVERRIDE: 2.7.8-1+cuda10.1
TENSORFLOW_PACKAGE: tensorflow-gpu==2.4.3
KERAS_PACKAGE: keras==2.3.1
PYTORCH_PACKAGE: torch==1.7.1+cu101
PYTORCH_PACKAGE: torch==1.8.1+cu101
PYTORCH_LIGHTNING_PACKAGE: pytorch_lightning==1.3.8
TORCHVISION_PACKAGE: torchvision==0.8.2+cu101
TORCHVISION_PACKAGE: torchvision==0.9.1+cu101
MXNET_PACKAGE: mxnet-cu101==1.6.0.post0
# we additionally test the previous framework combination (CUDA 10.x) with mxnet 1.7.x
# as mxnet 1.7.x only supports CUDA 10.x, but next framework combination targets CUAA 11.x
test-gpu-gloo-py3_8-tf2_4_3-keras2_3_1-torch1_7_1-mxnet1_7_0_p1-pyspark3_2_0:
# as mxnet 1.7.x only supports CUDA 10.x, but next framework combination targets CUDA 11.x
test-gpu-gloo-py3_8-tf2_4_3-keras2_3_1-torch1_8_1-mxnet1_7_0_p1-pyspark3_2_0:
extends: test-gpu-base
build:
args:
Expand All @@ -163,13 +163,13 @@ services:
NCCL_VERSION_OVERRIDE: 2.7.8-1+cuda10.1
TENSORFLOW_PACKAGE: tensorflow-gpu==2.4.3
KERAS_PACKAGE: keras==2.3.1
PYTORCH_PACKAGE: torch==1.7.1+cu101
PYTORCH_PACKAGE: torch==1.8.1+cu101
PYTORCH_LIGHTNING_PACKAGE: pytorch_lightning==1.3.8
TORCHVISION_PACKAGE: torchvision==0.8.2+cu101
TORCHVISION_PACKAGE: torchvision==0.9.1+cu101
MXNET_PACKAGE: mxnet-cu101==1.7.0.post1
# we deviate from mxnet1_7_0_p2 here as other frameworks target CUDA 11.x and
# mxnet 1.7.x only supports CUDA 10.x, with mxnet 1.8.x we have CUDA 11.x packages
test-gpu-gloo-py3_8-tf2_5_1-keras2_4_3-torch1_8_1-mxnet1_8_0_p0-pyspark3_2_0:
test-gpu-gloo-py3_8-tf2_5_1-keras2_4_3-torch1_9_1-mxnet1_8_0_p0-pyspark3_2_0:
extends: test-gpu-base
build:
args:
Expand All @@ -178,11 +178,11 @@ services:
NCCL_VERSION_OVERRIDE: 2.8.4-1+cuda11.2
TENSORFLOW_PACKAGE: tensorflow-gpu==2.5.1
KERAS_PACKAGE: keras==2.4.3
PYTORCH_PACKAGE: torch==1.8.1+cu111
PYTORCH_PACKAGE: torch==1.9.1+cu111
PYTORCH_LIGHTNING_PACKAGE: pytorch_lightning==1.3.8
TORCHVISION_PACKAGE: torchvision==0.9.1+cu111
TORCHVISION_PACKAGE: torchvision==0.10.1+cu111
MXNET_PACKAGE: mxnet-cu112==1.8.0.post0
test-gpu-openmpi-gloo-py3_8-tf2_6_0-keras2_6_0-torch1_9_0-mxnet1_8_0_p0-pyspark3_2_0:
test-gpu-openmpi-gloo-py3_8-tf2_6_0-keras2_6_0-torch1_10_0-mxnet1_8_0_p0-pyspark3_2_0:
extends: test-gpu-base
build:
args:
Expand All @@ -192,9 +192,9 @@ services:
MPI_KIND: OpenMPI
TENSORFLOW_PACKAGE: tensorflow-gpu==2.6.0
KERAS_PACKAGE: keras==2.6.0
PYTORCH_PACKAGE: torch==1.9.0+cu111
PYTORCH_PACKAGE: torch==1.10.0+cu111
PYTORCH_LIGHTNING_PACKAGE: pytorch-lightning==1.3.8
TORCHVISION_PACKAGE: torchvision==0.10.0+cu111
TORCHVISION_PACKAGE: torchvision==0.11.1+cu111
MXNET_PACKAGE: mxnet-cu112==1.8.0.post0
test-gpu-gloo-py3_8-tfhead-keras_none-torchhead-mxnethead-pyspark3_2_0:
extends: test-gpu-base
Expand All @@ -210,7 +210,7 @@ services:
TORCHVISION_PACKAGE: torchvision
MXNET_PACKAGE: mxnet-nightly-cu112

test-mixed-openmpi-gloo-py3_8-tf2_6_0-keras2_6_0-torch1_9_0-mxnet1_8_0_p0-pyspark3_2_0:
test-mixed-openmpi-gloo-py3_8-tf2_6_0-keras2_6_0-torch1_10_0-mxnet1_8_0_p0-pyspark3_2_0:
extends: test-gpu-base
build:
args:
Expand All @@ -220,9 +220,9 @@ services:
MPI_KIND: OpenMPI
TENSORFLOW_PACKAGE: tensorflow-gpu==2.6.0
KERAS_PACKAGE: keras==2.6.0
PYTORCH_PACKAGE: torch==1.9.0+cu111
PYTORCH_PACKAGE: torch==1.10.0+cu111
PYTORCH_LIGHTNING_PACKAGE: pytorch_lightning==1.3.8
TORCHVISION_PACKAGE: torchvision==0.10.0+cu111
TORCHVISION_PACKAGE: torchvision==0.11.1+cu111
MXNET_PACKAGE: mxnet-cu112==1.8.0.post0
HOROVOD_BUILD_FLAGS: ""
HOROVOD_MIXED_INSTALL: 1
Loading