Skip to content
Permalink
Branch: master
Find file Copy path
Fetching contributors…
Cannot retrieve contributors at this time
170 lines (141 sloc) 8.19 KB
dist: trusty
language: python
python:
- "2.7"
- "3.5"
- "3.6"
services:
- docker
before_install:
# force latest Ubuntu for Python 3.6 and nightly TensorFlow which requires new glibc
- |
if [[ ${TRAVIS_PYTHON_VERSION} == "3.6" || ${TF_PACKAGE} == "tf-nightly" ]]; then
export UBUNTU=18.04
else
export UBUNTU=16.04
fi
- docker pull ubuntu:${UBUNTU}
# run docker container for an hour
- docker run -v `pwd`:/horovod ubuntu:${UBUNTU} /bin/sh -c "sleep 3600" &
# wait for docker to start
- sleep 5
- export CONTAINER=$(docker ps -q | head -n 1)
- docker exec ${CONTAINER} /bin/sh -c "apt-get update -qq"
# install necessary network tools
- docker exec ${CONTAINER} /bin/sh -c "apt-get install -y wget openssh-client git build-essential"
# install OpenJDK 8 for PySpark
- docker exec ${CONTAINER} /bin/sh -c "apt install -y openjdk-8-jdk-headless"
# install Python and add a proper symlink
- |
if [[ ${TRAVIS_PYTHON_VERSION} == "3.6" ]]; then
docker exec ${CONTAINER} /bin/sh -c "apt-get install -y python${TRAVIS_PYTHON_VERSION} python${TRAVIS_PYTHON_VERSION}-dev python3-distutils"
else
docker exec ${CONTAINER} /bin/sh -c "apt-get install -y python${TRAVIS_PYTHON_VERSION} python${TRAVIS_PYTHON_VERSION}-dev"
fi
- docker exec ${CONTAINER} /bin/sh -c "ln -s /usr/bin/python${TRAVIS_PYTHON_VERSION} /usr/bin/python"
- docker exec ${CONTAINER} /bin/sh -c "wget https://bootstrap.pypa.io/get-pip.py && python get-pip.py && rm get-pip.py"
- docker exec ${CONTAINER} /bin/sh -c "pip install -U --force pip setuptools requests"
# install PySpark
- docker exec ${CONTAINER} /bin/sh -c "pip install pyspark==${PYSPARK}"
env:
matrix:
- TF_PACKAGE=tensorflow==1.1.0 KERAS_PACKAGE=keras==2.0.0 PYTORCH_PACKAGE=torch==0.4.0 MXNET_PACKAGE=mxnet-gcc5 MPI=OpenMPI PYSPARK=2.1.2
- TF_PACKAGE=tensorflow==1.6.0 KERAS_PACKAGE=keras==2.1.2 PYTORCH_PACKAGE=torch==0.4.1 MXNET_PACKAGE=mxnet-gcc5 MPI=OpenMPI PYSPARK=2.3.2
- TF_PACKAGE=tensorflow==1.12.0 KERAS_PACKAGE=keras==2.2.2 PYTORCH_PACKAGE=torch==1.0.0 MXNET_PACKAGE=mxnet-gcc5 MPI=OpenMPI PYSPARK=2.4.0
- TF_PACKAGE=tf-nightly KERAS_PACKAGE=git+https://github.com/keras-team/keras.git PYTORCH_PACKAGE=torch-nightly MXNET_PACKAGE=mxnet-gcc5 MPI=OpenMPI PYSPARK=2.4.0
- TF_PACKAGE=tensorflow==1.12.0 KERAS_PACKAGE=keras==2.2.2 PYTORCH_PACKAGE=torch==1.0.0 MXNET_PACKAGE=mxnet-gcc5 MPI=MPICH PYSPARK=2.4.0
matrix:
fast_finish: true
exclude:
- python: "3.5"
env: TF_PACKAGE=tensorflow==1.6.0 KERAS_PACKAGE=keras==2.1.2 PYTORCH_PACKAGE=torch==0.4.0 MXNET_PACKAGE=mxnet-gcc5 MPI=OpenMPI PYSPARK=2.3.2
- python: "3.6"
env: TF_PACKAGE=tensorflow==1.6.0 KERAS_PACKAGE=keras==2.1.2 PYTORCH_PACKAGE=torch==0.4.1 MXNET_PACKAGE=mxnet-gcc5 MPI=OpenMPI PYSPARK=2.3.2
- python: "3.5"
env: TF_PACKAGE=tensorflow==1.12.0 KERAS_PACKAGE=keras==2.2.2 PYTORCH_PACKAGE=torch==1.0.0 MXNET_PACKAGE=mxnet-gcc5 MPI=MPICH PYSPARK=2.4.0
- python: "3.6"
env: TF_PACKAGE=tensorflow==1.12.0 KERAS_PACKAGE=keras==2.2.2 PYTORCH_PACKAGE=torch==1.0.0 MXNET_PACKAGE=mxnet-gcc5 MPI=MPICH PYSPARK=2.4.0
- python: "3.5"
env: TF_PACKAGE=tf-nightly KERAS_PACKAGE=git+https://github.com/keras-team/keras.git PYTORCH_PACKAGE=torch-nightly MXNET_PACKAGE=mxnet-gcc5 MPI=OpenMPI PYSPARK=2.4.0
install:
- |
if [[ ${MPI} == "OpenMPI" ]]; then
docker exec ${CONTAINER} /bin/sh -c "wget -O /tmp/openmpi-3.0.0-bin.tar.gz https://github.com/uber/horovod/files/1596799/openmpi-3.0.0-bin.tar.gz"
docker exec ${CONTAINER} /bin/sh -c "cd /usr/local && tar -zxf /tmp/openmpi-3.0.0-bin.tar.gz && ldconfig"
else
# installs mpich version 3.0.4
docker exec ${CONTAINER} /bin/sh -c "apt-get install -y mpich"
fi
- docker exec ${CONTAINER} /bin/sh -c "pip install mpi4py"
# TensorFlow
- docker exec ${CONTAINER} /bin/sh -c "pip install ${TF_PACKAGE}"
# Keras & dependencies
- docker exec ${CONTAINER} /bin/sh -c "pip install ${KERAS_PACKAGE} h5py scipy pandas"
# PyTorch dependencies
- docker exec ${CONTAINER} /bin/sh -c "pip install future typing"
# PyTorch
- |
if [[ ${PYTORCH_PACKAGE} == "torch-nightly" ]]; then
docker exec ${CONTAINER} /bin/sh -c "pip install torchvision"
docker exec ${CONTAINER} /bin/sh -c "pip uninstall -y torch"
docker exec ${CONTAINER} /bin/sh -c "pip install torch_nightly -v -f https://download.pytorch.org/whl/nightly/cpu/torch_nightly.html"
else
docker exec ${CONTAINER} /bin/sh -c "pip install ${PYTORCH_PACKAGE} torchvision"
fi
# MXNet
- docker exec ${CONTAINER} /bin/sh -c "pip install ${MXNET_PACKAGE}"
# Horovod
- docker exec ${CONTAINER} /bin/sh -c "cd /horovod && python setup.py sdist"
- docker exec ${CONTAINER} /bin/sh -c "pip install -v /horovod/dist/horovod-*.tar.gz"
script:
- |
if [[ ${MPI} == "OpenMPI" ]]; then
export MPIRUN="mpirun -allow-run-as-root -np 2 -H localhost:2 -bind-to none -map-by slot -mca mpi_abort_print_stack 1"
else
export MPIRUN="mpirun -np 2"
fi
# prepare ~/.keras folder to avoid race condition
- docker exec ${CONTAINER} /bin/sh -c "mkdir -p ~/.keras"
# run unit tests
- docker exec ${CONTAINER} /bin/sh -c "pip install pytest && cd /horovod/test && (echo test_*.py | xargs -n 1 ${MPIRUN} pytest -v)"
# run stall test
- docker exec ${CONTAINER} /bin/sh -c "${MPIRUN} python /horovod/test/test_stall.py"
# hack for compatibility of MNIST example with tf 1.1.0
- |
if [[ ${TF_PACKAGE} == "tensorflow==1.1.0" ]]; then
docker exec ${CONTAINER} /bin/sh -c "sed -i \"s/from tensorflow import keras/from tensorflow.contrib import keras/\" /horovod/examples/tensorflow_mnist.py"
fi
# hack TensorFlow MNIST example to be smaller
- docker exec ${CONTAINER} /bin/sh -c "sed -i \"s/last_step=20000/last_step=100/\" /horovod/examples/tensorflow_mnist.py"
# run TensorFlow MNIST example
- docker exec ${CONTAINER} /bin/sh -c "${MPIRUN} python /horovod/examples/tensorflow_mnist.py"
# run TensorFlow MNIST example with horovodrun. For now, horovdrun does not
# support MPICH.
- |
if [[ ${MPI} != "MPICH" ]]; then
docker exec ${CONTAINER} /bin/sh -c "horovodrun -np 2 -H localhost:2 python /horovod/examples/tensorflow_mnist.py"
fi
# hack TensorFlow Eager MNIST example to be smaller
- docker exec ${CONTAINER} /bin/sh -c "sed -i \"s/dataset.take(20000/dataset.take(100/\" /horovod/examples/tensorflow_mnist_eager.py"
# run TensorFlow Eager MNIST example
- |
if [[ ${TF_PACKAGE} == "tensorflow==1.12.0" ]]; then
docker exec ${CONTAINER} /bin/sh -c "${MPIRUN} python /horovod/examples/tensorflow_mnist_eager.py"
fi
# download Keras MNIST dataset
- docker exec ${CONTAINER} /bin/sh -c "python -c \"from keras.datasets import mnist; mnist.load_data()\""
# hack Keras MNIST advanced example to be smaller
- docker exec ${CONTAINER} /bin/sh -c "sed -i \"s/epochs = .*/epochs = 12/\" /horovod/examples/keras_mnist_advanced.py"
- docker exec ${CONTAINER} /bin/sh -c "sed -i \"s/model.add(Conv2D(32, kernel_size=(3, 3),/model.add(Conv2D(1, kernel_size=(3, 3),/\" /horovod/examples/keras_mnist_advanced.py"
- docker exec ${CONTAINER} /bin/sh -c "sed -i \"s/model.add(Conv2D(64, (3, 3), activation='relu'))//\" /horovod/examples/keras_mnist_advanced.py"
# run Keras MNIST advanced example
- docker exec ${CONTAINER} /bin/sh -c "${MPIRUN} python /horovod/examples/keras_mnist_advanced.py"
# hack PyTorch MNIST example to be smaller
- docker exec ${CONTAINER} /bin/sh -c "sed -i \"s/self.fc1 = nn.Linear(320, 50)/self.fc1 = nn.Linear(784, 50)/\" /horovod/examples/pytorch_mnist.py"
- docker exec ${CONTAINER} /bin/sh -c "sed -i \"s/x = F.relu(F.max_pool2d(self.conv1(x), 2))//\" /horovod/examples/pytorch_mnist.py"
- docker exec ${CONTAINER} /bin/sh -c "sed -i \"s/x = F.relu(F.max_pool2d(self.conv2_drop(self.conv2(x)), 2))//\" /horovod/examples/pytorch_mnist.py"
- docker exec ${CONTAINER} /bin/sh -c "sed -i \"s/x = x.view(-1, 320)/x = x.view(-1, 784)/\" /horovod/examples/pytorch_mnist.py"
# run PyTorch MNIST example
- docker exec ${CONTAINER} /bin/sh -c "${MPIRUN} python /horovod/examples/pytorch_mnist.py"
# run MXNet MNIST example
- docker exec ${CONTAINER} /bin/sh -c "${MPIRUN} python /horovod/examples/mxnet_mnist.py"
You can’t perform that action at this time.