Skip to content
This repository has been archived by the owner on Nov 17, 2023. It is now read-only.

[v1.x] Fix nightly CD for python docker image releases #19774

Merged
merged 3 commits into from
Mar 2, 2021
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
42 changes: 15 additions & 27 deletions cd/Jenkinsfile_cd_pipeline
Original file line number Diff line number Diff line change
Expand Up @@ -47,41 +47,29 @@ pipeline {
cd_utils = load('cd/Jenkinsfile_utils.groovy')

// Update release job state in Jenkins
cd_utils.update_release_job_state()
cd_utils.update_release_job_state(params.CD_RELEASE_JOB_NAME)
}
}
}

stage("MXNet Release") {
steps {
script {
cd_utils.error_checked_parallel([

"Static libmxnet based release": {
stage("Build") {
cd_utils.trigger_release_job("Build static libmxnet", "mxnet_lib/static", params.MXNET_VARIANTS)
}
stage("Releases") {
cd_utils.error_checked_parallel([
"PyPI Release": {
echo "Building PyPI Release"
cd_utils.trigger_release_job("Release PyPI Packages", "python/pypi", params.MXNET_VARIANTS)
},
"Python Docker Release": {
echo "Building Python Docker Release"
cd_utils.trigger_release_job("Release Python Docker Images", "python/docker", params.MXNET_VARIANTS)
}
])
}
},

"Dynamic libmxnet based release": {
stage("Build") {
cd_utils.trigger_release_job("Build dynamic libmxnet", "mxnet_lib/dynamic", params.MXNET_VARIANTS)
stage("Build libmxnet") {
cd_utils.trigger_release_job(params.CD_RELEASE_JOB_NAME, "Build libmxnet", "mxnet_lib", params.MXNET_VARIANTS)
}
stage("Releases") {
cd_utils.error_checked_parallel([
"PyPI Release": {
echo "Building PyPI Release"
cd_utils.trigger_release_job(params.CD_RELEASE_JOB_NAME, "Release PyPI Packages", "python/pypi", params.MXNET_VARIANTS)
},
"Python Docker Release": {
echo "Building Python Docker Release"
cd_utils.trigger_release_job(params.CD_RELEASE_JOB_NAME, "Release Python Docker Images", "python/docker", params.MXNET_VARIANTS)
}
}

])
])
}
}
}
}
Expand Down
7 changes: 3 additions & 4 deletions cd/Jenkinsfile_release_job
Original file line number Diff line number Diff line change
Expand Up @@ -42,7 +42,7 @@ pipeline {
// Using string instead of choice parameter to keep the changes to the parameters minimal to avoid
// any disruption caused by different COMMIT_ID values chaning the job parameter configuration on
// Jenkins.
string(defaultValue: "mxnet_lib/static", description: "Pipeline to build", name: "RELEASE_JOB_TYPE")
string(defaultValue: "mxnet_lib", description: "Pipeline to build", name: "RELEASE_JOB_TYPE")
string(defaultValue: "cpu,native,cu100,cu101,cu102,cu110,cu112", description: "Comma separated list of variants", name: "MXNET_VARIANTS")
booleanParam(defaultValue: false, description: 'Whether this is a release build or not', name: "RELEASE_BUILD")
}
Expand Down Expand Up @@ -74,7 +74,7 @@ pipeline {
|Release Build: ${params.RELEASE_BUILD}
|Commit Id: ${env.GIT_COMMIT}
|Branch: ${env.GIT_BRANCH}
|Variants: ${env.MXNET_VARIANTS}""".stripMargin()
|Variants: ${params.MXNET_VARIANTS}""".stripMargin()
}
}
}
Expand All @@ -90,8 +90,7 @@ pipeline {

// Add new job types here
def valid_job_types = [
"mxnet_lib/static",
"mxnet_lib/dynamic",
"mxnet_lib",
"python/pypi",
"python/docker"
]
Expand Down
20 changes: 10 additions & 10 deletions cd/Jenkinsfile_utils.groovy
Original file line number Diff line number Diff line change
Expand Up @@ -19,16 +19,16 @@

// Triggers a downstream jenkins job responsible for building, testing
// and publishing all the variants for a particular 'job_type'.
// The 'job_type' should be the name of the directory that contains the
// 'Jenkins_pipeline.groovy' file and has the pipeline definition for the
// The 'job_type' should be the name of the directory that contains the
// 'Jenkins_pipeline.groovy' file and has the pipeline definition for the
// artifact (docker image, binary, pypi or maven package, etc.) that should
// be published.

STATE_UPDATE="State Update"

def trigger_release_job(job_name, job_type, mxnet_variants) {
def trigger_release_job(cd_release_job, job_name, job_type, mxnet_variants) {
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Curious, why do we pass the cd_release_job instead of using the env var?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

CD_RELEASE_JOB_NAME is actually a parameter of the CD pipeline, so this is how it's supposed to be used. It is also set as a global env variable, which can potentially cause some issues.
For reference, it is addressed in master by this PR #17775

def run = build(
job: env.CD_RELEASE_JOB_NAME,
job: cd_release_job,
parameters: [
string(name: "RELEASE_JOB_NAME", value: "${job_name}"),
string(name: "RELEASE_JOB_TYPE", value: "${job_type}"),
Expand All @@ -49,7 +49,7 @@ def trigger_release_job(job_name, job_type, mxnet_variants) {
// continue with the pipeline and try to post as many releases as possible
// but mark it as unstable
if (result == "UNSTABLE" || result == "ABORTED") {
currentBuild.result = "UNSTABLE"
currentBuild.result = "UNSTABLE"
}

// Throw an exception on failure, because this would mean the whole
Expand All @@ -65,12 +65,12 @@ def trigger_release_job(job_name, job_type, mxnet_variants) {
// the configuration of the release job in jenkins
// to the configuration of release job as defined in the
// Jenkinsfile _release_job for env.GIT_COMMIT revision
def update_release_job_state() {
def update_release_job_state(cd_release_job) {
build(
job: env.CD_RELEASE_JOB_NAME,
job: cd_release_job,
parameters: [
string(name: "RELEASE_JOB_TYPE", value: STATE_UPDATE),

// Should be set to the current git commit
string(name: "COMMIT_ID", value: "${env.GIT_COMMIT}")
])
Expand Down Expand Up @@ -103,7 +103,7 @@ def wrap_variant_pipeline_fn(variant_pipeline, total_num_pipelines) {
// The outcome of the execution of each parallel step will affect
// the result (SUCCESS, FAILURE, ABORTED, UNSTABLE) of the overall job.
// If all steps fail or are aborted, the job will be set to failed.
// If some steps fail or are aborted, the job will be set to unstable.
// If some steps fail or are aborted, the job will be set to unstable.
def error_checked_parallel(variant_pipelines) {
pipelines = variant_pipelines.inject([:]) { mp, key, value ->
mp << ["${key}": wrap_variant_pipeline_fn(value, variant_pipelines.size())]
Expand Down Expand Up @@ -179,7 +179,7 @@ def restore_dynamic_libmxnet(variant) {
// NOTE: Be mindful of the expected time that a step should take. If it will take a long time,
// and it can be done in a CPU node, do it in a CPU node. We should avoid using GPU instances unless
// we *have* to.
// However, if it is only packaging libmxnet and that doesn't take long. Then, the pipeline can
// However, if it is only packaging libmxnet and that doesn't take long. Then, the pipeline can
// just run on a single node. As is done bellow.
// For examples of multi-node CD pipelines, see the the binary_release/static and binary_release/dynamic
// pipeline.
Expand Down
4 changes: 2 additions & 2 deletions cd/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -60,7 +60,7 @@ The [release job](Jenkinsfile_release_job) takes five parameters:
* **RELEASE\_JOB\_TYPE**: Defines the release pipeline you want to execute.
* **COMMIT_ID**: The commit id to build

The release job executes, in parallel, the release pipeline for each of the variants (**MXNET_VARIANTS**) for the job type (**RELEASE\_JOB\_TYPE**). The job type the path to a directory (relative to the `cd` directory) that includes a `Jenkins_pipeline.groovy` file ([e.g.](mxnet_lib/static/Jenkins_pipeline.groovy)).
The release job executes, in parallel, the release pipeline for each of the variants (**MXNET_VARIANTS**) for the job type (**RELEASE\_JOB\_TYPE**). The job type the path to a directory (relative to the `cd` directory) that includes a `Jenkins_pipeline.groovy` file ([e.g.](mxnet_lib/Jenkins_pipeline.groovy)).

NOTE: The **COMMIT_ID** is a little tricky and we must be very careful with it. It is necessary to ensure that the same commit is built through out the pipeline, but at the same time, it has the potential to change the current state of the release job configuration - specifically the parameter configuration. Any changes to this configuration will require a "dry-run" of the release job to ensure Jenkins has the current (master) version. This is acceptable as there will be few changes to the parameter configuration for the job, if any at all. But, it's something to keep in mind.

Expand Down Expand Up @@ -192,4 +192,4 @@ def test(mxnet_variant) {

Examples:

Both the [statically linked libmxnet](mxnet_lib/static/Jenkins_pipeline.groovy) and [dynamically linked libmxnet](mxnet_lib/dynamic/Jenkins_pipeline.groovy) pipelines have long running compilation and testing stages that **do not** require specialized/expensive hardware (e.g. GPUs). Therefore, as much as possible, it is important to run each stage in on its own node, and design the pipeline to spend the least amount of time possible on expensive hardware. E.g. for GPU builds, only run GPU tests on GPU instances, all other stages can be executed on CPU nodes.
The [libmxnet](mxnet_lib/Jenkins_pipeline.groovy) pipeline has long running compilation and testing stages that **do not** require specialized/expensive hardware (e.g. GPUs). Therefore, as much as possible, it is important to run each stage in on its own node, and design the pipeline to spend the least amount of time possible on expensive hardware. E.g. for GPU builds, only run GPU tests on GPU instances, all other stages can be executed on CPU nodes.
58 changes: 0 additions & 58 deletions cd/mxnet_lib/dynamic/Jenkins_pipeline.groovy

This file was deleted.

17 changes: 8 additions & 9 deletions cd/python/docker/Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -23,15 +23,14 @@
ARG BASE_IMAGE
FROM ${BASE_IMAGE}

RUN apt-get update || true
RUN apt-get install -y software-properties-common
RUN add-apt-repository -y ppa:deadsnakes/ppa
RUN apt-get update || true
RUN apt-get install -y python3.7-dev python3.7-distutils virtualenv wget
RUN ln -sf /usr/bin/python3.7 /usr/local/bin/python3

RUN wget -nv https://bootstrap.pypa.io/get-pip.py
RUN python3 get-pip.py
RUN apt-get update && \
apt-get install -y software-properties-common && \
add-apt-repository -y ppa:deadsnakes/ppa && \
apt-get update && \
apt-get install -y python3.7-dev python3.7-distutils virtualenv wget && \
ln -sf /usr/bin/python3.7 /usr/local/bin/python3 && \
wget -nv https://bootstrap.pypa.io/get-pip.py && \
python3 get-pip.py

ARG MXNET_COMMIT_ID
ENV MXNET_COMMIT_ID=${MXNET_COMMIT_ID}
Expand Down
3 changes: 0 additions & 3 deletions cd/python/docker/Dockerfile.test
Original file line number Diff line number Diff line change
Expand Up @@ -23,9 +23,6 @@
ARG BASE_IMAGE
FROM ${BASE_IMAGE}

# Install test dependencies
RUN pip install nose

ARG USER_ID=1001
ARG GROUP_ID=1001

Expand Down
18 changes: 8 additions & 10 deletions cd/python/docker/python_images.sh
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,7 @@

set -xe

usage="Usage: python_images.sh <build|test|publish> MXNET-VARIANT"
usage="Usage: python_images.sh <build|test|push> MXNET-VARIANT"

command=${1:?$usage}
mxnet_variant=${2:?$usage}
Expand All @@ -39,8 +39,8 @@ image_name="${repository}:${main_tag}"

resources_path='cd/python/docker'

if [ ! -z "${RELEASE_DOCKERHUB_REPOSITORY}" ]; then
image_name="${RELEASE_DOCKERHUB_REPOSITORY}/${image_name}"
if [ ! -z "${RELEASE_PUBLIC_ECR_REPOSITORY}" ]; then
image_name="${RELEASE_PUBLIC_ECR_REPOSITORY}/${image_name}"
fi

build() {
Expand All @@ -57,26 +57,24 @@ test() {

# Ensure the correct context root is passed in when building - Dockerfile.test expects ci directory
docker build -t "${test_image_name}" --build-arg USER_ID=`id -u` --build-arg GROUP_ID=`id -g` --build-arg BASE_IMAGE="${image_name}" -f ${resources_path}/Dockerfile.test ./ci
./ci/safe_docker_run.py ${runtime_param} --cap-add "SYS_PTRACE" -u `id -u`:`id -g` -v `pwd`:/work/mxnet "${test_image_name}" ${resources_path}/test_python_image.sh "${mxnet_variant}"
}

push() {
if [ -z "${RELEASE_DOCKERHUB_REPOSITORY}" ]; then
echo "Cannot publish image without RELEASE_DOCKERHUB_REPOSITORY environment variable being set."
if [ -z "${RELEASE_PUBLIC_ECR_REPOSITORY}" ]; then
echo "Cannot publish image without RELEASE_PUBLIC_ECR_REPOSITORY environment variable being set."
exit 1
fi

# The secret name env var is set in the Jenkins configuration
# Manage Jenkins -> Configure System
./${ci_utils}/docker_login.py --secret-name "${RELEASE_DOCKERHUB_SECRET_NAME}"
# Retrieve an authentication token and authenticate Docker client to registry
aws ecr-public get-login-password --region us-east-1 | docker login --username AWS --password-stdin public.ecr.aws/w6z5f7h2

# Push image
docker push "${image_name}"

# Iterate over remaining tags, if any
for ((i=1;i<${#docker_tags[@]};i++)); do
local docker_tag="${docker_tags[${i}]}"
local latest_image_name="${RELEASE_DOCKERHUB_REPOSITORY}/${repository}:${docker_tag}_py3"
local latest_image_name="${RELEASE_PUBLIC_ECR_REPOSITORY}/${repository}:${docker_tag}_py3"

docker tag "${image_name}" "${latest_image_name}"
docker push "${latest_image_name}"
Expand Down
2 changes: 1 addition & 1 deletion cd/utils/docker_tag.sh
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@

mxnet_variant=${1:?"Please specify the mxnet variant as the first parameter"}
is_release=${RELEASE_BUILD:-false}
version=${VERSION:-nightly}
version=${VERSION:-nightly_v1.x}

# The docker tags will be in the form <version>_<hardware>(_mkl)
# Eg. nightly_cpu, 1.4.0_cpu_mkl, nightly_gpu_cu80_mkl, etc.
Expand Down
14 changes: 7 additions & 7 deletions cd/utils/mxnet_base_image.sh
Original file line number Diff line number Diff line change
Expand Up @@ -22,25 +22,25 @@ mxnet_variant=${1:?"Please specify the mxnet variant as the first parameter"}

case ${mxnet_variant} in
cu100*)
echo "nvidia/cuda:10.0-cudnn7-runtime-ubuntu16.04"
echo "nvidia/cuda:10.0-cudnn7-runtime-ubuntu18.04"
;;
cu101*)
echo "nvidia/cuda:10.1-cudnn7-runtime-ubuntu16.04"
echo "nvidia/cuda:10.1-cudnn7-runtime-ubuntu18.04"
;;
cu102*)
echo "nvidia/cuda:10.2-cudnn7-runtime-ubuntu16.04"
echo "nvidia/cuda:10.2-cudnn7-runtime-ubuntu18.04"
;;
cu110*)
echo "nvidia/cuda:11.0-cudnn8-runtime-ubuntu16.04"
echo "nvidia/cuda:11.0-cudnn8-runtime-ubuntu18.04"
;;
cu112*)
echo "nvidia/cuda:11.2.1-cudnn8-runtime-ubuntu16.04"
echo "nvidia/cuda:11.2.1-cudnn8-runtime-ubuntu18.04"
;;
cpu)
echo "ubuntu:16.04"
echo "ubuntu:18.04"
;;
native)
echo "ubuntu:16.04"
echo "ubuntu:18.04"
;;
*)
echo "Error: Unrecognized mxnet-variant: '${mxnet_variant}'"
Expand Down