Skip to content

Commit

Permalink
Add Dockerfile partials to support Mkl + MPI + Horovod; Remove traili…
Browse files Browse the repository at this point in the history
…ng whitespace from python.partial.Dockerfile
  • Loading branch information
claynerobison committed May 21, 2019
1 parent eae5d0b commit 3023584
Show file tree
Hide file tree
Showing 7 changed files with 146 additions and 2 deletions.
@@ -0,0 +1,3 @@
# Check out horovod source code if --build-arg CHECKOUT_HOROVOD_SRC=1
ARG CHECKOUT_HOROVOD_SRC=0
RUN test "${CHECKOUT_HOROVOD_SRC}" -eq 1 && git clone --recursive https://github.com/uber/horovod.git /horovod_src || true
@@ -0,0 +1,2 @@
# Install Horovod
RUN ${PIP} install --no-cache-dir horovod
44 changes: 44 additions & 0 deletions tensorflow/tools/dockerfiles/partials/mpi.partial.Dockerfile
@@ -0,0 +1,44 @@
# install libnuma, openssh, wget
RUN apt-get update && apt-get install -y --no-install-recommends --fix-missing \
libnuma-dev \
openssh-server \
openssh-client \
wget && \
apt-get clean && \
rm -rf /var/lib/apt/lists/* || \
yum -y update && yum -y install \
numactl-devel \
openssh-server \
openssh-clients \
wget && \
yum clean all || \
echo "Unsupported Linux distribution. Aborting!" && exit 1

# Install Open MPI
RUN mkdir /tmp/openmpi && \
cd /tmp/openmpi && \
wget https://www.open-mpi.org/software/ompi/v4.0/downloads/openmpi-4.0.0.tar.gz && \
tar zxf openmpi-4.0.0.tar.gz && \
cd openmpi-4.0.0 && \
./configure --enable-orterun-prefix-by-default && \
make -j $(nproc) all && \
make install && \
ldconfig && \
rm -rf /tmp/openmpi

# Create a wrapper for OpenMPI to allow running as root by default
RUN mv /usr/local/bin/mpirun /usr/local/bin/mpirun.real && \
echo '#!/bin/bash' > /usr/local/bin/mpirun && \
echo 'mpirun.real --allow-run-as-root "$@"' >> /usr/local/bin/mpirun && \
chmod a+x /usr/local/bin/mpirun

# Configure OpenMPI to run good defaults:
RUN echo "btl_tcp_if_exclude = lo,docker0" >> /usr/local/etc/openmpi-mca-params.conf

# Install OpenSSH for MPI to communicate between containers
RUN mkdir -p /var/run/sshd

# Allow OpenSSH to talk to containers without asking for confirmation
RUN cat /etc/ssh/ssh_config | grep -v StrictHostKeyChecking > /etc/ssh/ssh_config.new && \
echo " StrictHostKeyChecking no" >> /etc/ssh/ssh_config.new && \
mv /etc/ssh/ssh_config.new /etc/ssh/ssh_config
Expand Up @@ -15,4 +15,4 @@ RUN ${PIP} --no-cache-dir install --upgrade \
setuptools

# Some TF tools expect a "python" binary
RUN ln -s $(which ${PYTHON}) /usr/local/bin/python
RUN ln -s $(which ${PYTHON}) /usr/local/bin/python
33 changes: 32 additions & 1 deletion tensorflow/tools/dockerfiles/spec.yml
@@ -1,5 +1,5 @@
header: |
# Copyright 2018 The TensorFlow Authors. All Rights Reserved.
# Copyright 2019 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
Expand Down Expand Up @@ -83,6 +83,21 @@ slice_sets:
- ubuntu/python
- tensorflow
- shell
- add_to_name: "-horovod"
dockerfile_exclusive_name: "horovod"
dockerfile_subdirectory: "mkl"
partials:
- ubuntu/version
- ubuntu/cpu
- ubuntu/python
- tensorflow
- mpi
- horovod
- shell
tests:
- import-mkl-horovod.sh
args:
- TF_PACKAGE=intel-tensorflow
- add_to_name: "-gpu"
dockerfile_exclusive_name: "gpu"
args:
Expand Down Expand Up @@ -110,6 +125,22 @@ slice_sets:
- build-cpu.sh
args:
- CHECKOUT_TF_SRC=1
- add_to_name: "devel-horovod"
dockerfile_exclusive_name: "devel-horovod"
dockerfile_subdirectory: "mkl"
partials:
- ubuntu/version
- ubuntu/devel-cpu
- ubuntu/python
- ubuntu/bazel
- mpi
- devel-horovod
- shell
tests:
- build-mkl-horovod.sh
args:
- CHECKOUT_TF_SRC=1
- CHECKOUT_HOROVOD_SRC=1
- add_to_name: "devel-gpu"
dockerfile_exclusive_name: "devel-gpu"
partials:
Expand Down
46 changes: 46 additions & 0 deletions tensorflow/tools/dockerfiles/tests/build-mkl-horovod.sh
@@ -0,0 +1,46 @@
#!/usr/bin/env bash

# Copyright 2019 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ============================================================================



# Download and build TensorFlow.
set -euxo pipefail
git clone --branch=master --depth=1 https://github.com/tensorflow/tensorflow.git /tensorflow
cd /tensorflow

ln -s $(which ${PYTHON}) /usr/local/bin/python

# Build TensorFlow with support for Intel(R) MKL-DNN
yes "" | ${PYTHON} configure.py && \
bazel build -c opt --config=mkl --cxxopt="-D_GLIBCXX_USE_CXX11_ABI=0" \
tensorflow/tools/pip_package:build_pip_package && \
bazel-bin/tensorflow/tools/pip_package/build_pip_package /tmp/pip && \
pip --no-cache-dir install --upgrade /tmp/pip/tensorflow-*.whl && \
rm -rf /tmp/pip && \
rm -rf /root/.cache


# download and build Horovod
git clone --recursive https://github.com/uber/horovod.git
cd horovod
# export environment
export HOROVOD_WITHOUT_PYTORCH=1
export HOROVOD_WITH_TENSORFLOW=1
python setup.py sdist
pip --no-cache-dir install --upgrade sdist/horovod*.tar.gz && \
rm -rf sdist && \
rm -rf /root/.cache
18 changes: 18 additions & 0 deletions tensorflow/tools/dockerfiles/tests/import-mkl-horovod.sh
@@ -0,0 +1,18 @@
#!/usr/bin/env bash

# Copyright 2019 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ============================================================================

python -c 'from tensorflow.python import pywrap_tensorflow; pywrap_tensorflow.IsMklEnabled() or exit(1); import horovod.tensorflow as hvd'

0 comments on commit 3023584

Please sign in to comment.