Skip to content

Commit 846bfc0

Browse files
committed
simplify build w/ docker
1 parent 2e2ace1 commit 846bfc0

File tree

11 files changed

+183
-99
lines changed

11 files changed

+183
-99
lines changed

.travis.yml

Lines changed: 20 additions & 92 deletions
Original file line numberDiff line numberDiff line change
@@ -2,104 +2,32 @@ sudo: required
22

33
dist: trusty
44

5-
services:
6-
- docker
7-
8-
language: python
9-
10-
cache:
11-
directories:
12-
- $HOME/.ivy2
13-
- $HOME/.sbt/launchers/
14-
- $HOME/.cache/spark-versions
5+
language: minimal
156

167
env:
178
global:
18-
- SCALA_VERSION=2.11.8
19-
- SPARK_VERSION=2.3.1
20-
- SPARK_BUILD="spark-${SPARK_VERSION}-bin-hadoop2.7"
21-
- SPARK_BUILD_URL="https://dist.apache.org/repos/dist/release/spark/spark-${SPARK_VERSION}/spark-${SPARK_VERSION}-bin-hadoop2.7.tgz"
22-
- SPARK_HOME=$HOME/.cache/spark-versions/$SPARK_BUILD
23-
- RUN_ONLY_LIGHT_TESTS=True
9+
- DOCKER_COMPOSE_VERSION=1.22.0
2410
matrix:
25-
- PYTHON_VERSION=3.6.2 TEST_SUITE=scala-tests
26-
- PYTHON_VERSION=3.6.2 TEST_SUITE=python-tests
27-
- PYTHON_VERSION=3.6.2 TEST_SUITE=pylint
28-
- PYTHON_VERSION=3.5.1 TEST_SUITE=python-tests
29-
- PYTHON_VERSION=3.5.1 TEST_SUITE=pylint
30-
- PYTHON_VERSION=2.7.13 TEST_SUITE=python-tests
31-
- PYTHON_VERSION=2.7.13 TEST_SUITE=pylint
32-
33-
before_install:
34-
- ./bin/download_travis_dependencies.sh
35-
- ci_env=`bash <(curl -s https://codecov.io/env)`
36-
- if [[ "$PYTHON_VERSION" == 2.* ]]; then
37-
export CONDA_URL="repo.continuum.io/miniconda/Miniconda-latest-Linux-x86_64.sh"
38-
export PYSPARK_PYTHON=python2;
39-
else
40-
export CONDA_URL="repo.continuum.io/miniconda/Miniconda3-latest-Linux-x86_64.sh";
41-
export PYSPARK_PYTHON=python3;
42-
fi
11+
- PYTHON_VERSION=3.6 TEST_SUITE=pylint
12+
- PYTHON_VERSION=2.7 TEST_SUITE=pylint
13+
- PYTHON_VERSION=3.6 TEST_SUITE=python-tests
14+
- PYTHON_VERSION=2.7 TEST_SUITE=python-tests
15+
- PYTHON_VERSION=3.6 TEST_SUITE=scala-tests
4316

44-
- docker run $ci_env
45-
-e "JAVA_HOME=/usr/lib/jvm/java-8-openjdk-amd64"
46-
-e SPARK_VERSION
47-
-e SPARK_BUILD
48-
-e SCALA_VERSION
49-
-e PYTHON_VERSION
50-
-e PYSPARK_PYTHON
51-
-e SPARK_HOME
52-
-e RUN_ONLY_LIGHT_TESTS
53-
-e CONDA_URL
54-
-d --name ubuntu-test -v $HOME ubuntu:16.04 tail -f /dev/null
55-
- docker ps
56-
57-
# See this page: http://conda.pydata.org/docs/travis.html
58-
install:
59-
# install needed ubuntu packages
60-
- docker exec -t ubuntu-test bash -c "
61-
apt-get update;
62-
apt-get upgrade -y;
63-
apt-get install -y curl openjdk-8-jdk bzip2"
64-
65-
# Copy the current dir into the docker container
66-
- docker exec -t ubuntu-test bash -c "mkdir -p $TRAVIS_BUILD_DIR"
67-
- docker cp `pwd`/. ubuntu-test:$TRAVIS_BUILD_DIR
17+
services:
18+
- docker
6819

69-
# Download and set up miniconda
70-
- docker exec -t ubuntu-test bash -c "
71-
curl https://$CONDA_URL >> $HOME/miniconda.sh;
72-
bash $HOME/miniconda.sh -b -p $HOME/miniconda;
73-
bash $HOME/miniconda.sh -b -p $HOME/miniconda;
74-
$HOME/miniconda/bin/conda config --set always_yes yes --set changeps1 no;
75-
$HOME/miniconda/bin/conda update -q conda;
76-
$HOME/miniconda/bin/conda info -a;
77-
$HOME/miniconda/bin/conda create -q -n test-environment python=$PYTHON_VERSION"
20+
before_install:
21+
# update docker compose to the specified version, https://docs.travis-ci.com/user/docker/#using-docker-compose
22+
- sudo rm /usr/local/bin/docker-compose
23+
- curl -L https://github.com/docker/compose/releases/download/${DOCKER_COMPOSE_VERSION}/docker-compose-`uname -s`-`uname -m` > docker-compose
24+
- chmod +x docker-compose
25+
- sudo mv docker-compose /usr/local/bin
7826

79-
# Activate conda environment ad install required packages
80-
- docker exec -t ubuntu-test bash -c "
81-
source $HOME/miniconda/bin/activate test-environment;
82-
pip install --user -r $TRAVIS_BUILD_DIR/dev/dev-requirements.txt;
83-
pip install --user -r $TRAVIS_BUILD_DIR/python/requirements.txt;"
27+
install :
28+
- docker-compose build --build-arg PYTHON_VERSION=$PYTHON_VERSION
29+
- docker-compose up -d --scale worker=2
30+
- docker-compose exec master bash -c "cd /mnt/sparkdl && build/sbt assembly"
8431

8532
script:
86-
- docker cp $HOME/.cache ubuntu-test:$HOME/
87-
# build assembly
88-
- docker exec -t ubuntu-test bash -c "
89-
source $HOME/miniconda/bin/activate test-environment;
90-
cd $TRAVIS_BUILD_DIR;
91-
./dev/run.py assembly"
92-
# run python style and test suites
93-
- docker exec -t ubuntu-test bash -c "
94-
source $HOME/miniconda/bin/activate test-environment;
95-
cd $TRAVIS_BUILD_DIR;
96-
./dev/run.py $TEST_SUITE"
97-
98-
after_success:
99-
# Unfortunately we need to install coverage here even though it's been installed via pip in the virual env.
100-
# The codecov bash script executes `which coverage` and that comes up empty unless we install it directly.
101-
- docker exec -t ubuntu-test bash -c "
102-
source $HOME/miniconda/bin/activate test-environment;
103-
$HOME/miniconda/bin/conda install -c anaconda coverage;
104-
cd $TRAVIS_BUILD_DIR;
105-
bash <(curl -s https://codecov.io/bash)"
33+
- docker-compose exec master bash -c "cd /mnt/sparkdl && dev/run.py $TEST_SUITE"

Dockerfile

Lines changed: 71 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,71 @@
1+
FROM ubuntu:16.04
2+
3+
ARG PYTHON_VERSION=3.6
4+
5+
RUN apt-get update && \
6+
apt-get install -y wget bzip2 build-essential openjdk-8-jdk ssh sudo && \
7+
apt-get clean
8+
9+
# Add ubuntu user and enable password-less sudo
10+
RUN useradd -mU -s /bin/bash -G sudo ubuntu && \
11+
echo "ubuntu ALL=(ALL) NOPASSWD: ALL" >> /etc/sudoers
12+
13+
# Set up SSH. Docker will cache this layer. So the keys end up the same on all containers.
14+
# We use a non-default location to simulate MLR, which doesn't have passwordless SSH.
15+
# We still keep the private key for manual SSH during debugging.
16+
RUN ssh-keygen -q -f ~/.ssh/docker -N "" && \
17+
cp ~/.ssh/docker.pub ~/.ssh/authorized_keys
18+
19+
# Install Open MPI
20+
RUN wget --quiet https://github.com/uber/horovod/files/1596799/openmpi-3.0.0-bin.tar.gz -O /tmp/openmpi.tar.gz && \
21+
cd /usr/local && \
22+
tar -zxf /tmp/openmpi.tar.gz && \
23+
ldconfig && \
24+
rm -r /tmp/openmpi.tar.gz
25+
26+
# Install Miniconda.
27+
RUN wget --quiet https://repo.continuum.io/miniconda/Miniconda3-4.5.11-Linux-x86_64.sh -O /tmp/miniconda.sh && \
28+
/bin/bash /tmp/miniconda.sh -b -p /opt/conda
29+
ENV PATH /opt/conda/bin:$PATH
30+
31+
# Install sparkdl dependencies.
32+
# Instead of activate the "sparkdl" conda env, we update env variables directly.
33+
ENV PYTHON_VERSION $PYTHON_VERSION
34+
ENV PATH /opt/conda/envs/sparkdl/bin:$PATH
35+
ENV LD_LIBRARY_PATH /opt/conda/envs/sparkdl/lib:$LD_LIBRARY_PATH
36+
ENV PYTHONPATH /opt/conda/envs/sparkdl/lib/python$PYTHON_VERSION/site-packages:$PYTHONPATH
37+
COPY ./environment.yml /tmp/environment.yml
38+
RUN conda create -n sparkdl python=$PYTHON_VERSION && \
39+
conda env update -n sparkdl -f /tmp/environment.yml
40+
41+
# Install Spark and update env variables.
42+
ENV SPARK_VERSION 2.4.0
43+
ENV SPARK_BUILD "spark-${SPARK_VERSION}-bin-hadoop2.7"
44+
ENV SPARK_BUILD_URL "https://dist.apache.org/repos/dist/release/spark/spark-2.4.0/${SPARK_BUILD}.tgz"
45+
RUN wget --quiet $SPARK_BUILD_URL -O /tmp/spark.tgz && \
46+
tar -C /opt -xf /tmp/spark.tgz && \
47+
mv /opt/$SPARK_BUILD /opt/spark && \
48+
rm /tmp/spark.tgz
49+
ENV SPARK_HOME /opt/spark
50+
ENV PATH $SPARK_HOME/bin:$PATH
51+
ENV PYTHONPATH /opt/spark/python/lib/py4j-0.10.7-src.zip:/opt/spark/python/lib/pyspark.zip:$PYTHONPATH
52+
53+
# Declare env variables to run tests.
54+
ENV SCALA_VERSION 2.11.8
55+
ENV PYSPARK_PYTHON python
56+
ENV RUN_ONLY_LIGHT_TESTS True
57+
58+
# Persist important env variables to /etc/environment.
59+
RUN echo "SCALA_VERSION=$SCALA_VERSION" >> /etc/environment && \
60+
echo "SPARK_VERSION=$SPARK_VERSION" >> /etc/environment && \
61+
echo "SPARK_HOME=$SPARK_HOME" >> /etc/environment && \
62+
echo "PATH=$PATH" > /etc/environment && \
63+
echo "PYTHONPATH=$PYTHONPATH" >> /etc/environment && \
64+
echo "LD_LIBRARY_PATH=$LD_LIBRARY_PATH" >> /etc/environment && \
65+
echo "PYSPARK_PYTHON=$PYSPARK_PYTHON" >> /etc/environment && \
66+
echo "RUN_ONLY_LIGHT_TESTS=$RUN_ONLY_LIGHT_TESTS" >> /etc/environment
67+
68+
# The sparkdl dir will be mmounted here.
69+
VOLUME /mnt/sparkdl
70+
71+
ENTRYPOINT service ssh restart && /bin/bash

build.sbt

Lines changed: 21 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -3,10 +3,11 @@
33

44
import ReleaseTransformations._
55

6-
val sparkVer = sys.props.getOrElse("spark.version", "2.3.1")
6+
val sparkVer = sys.props.getOrElse("spark.version", "2.4.0")
77
val sparkBranch = sparkVer.substring(0, 3)
88
val defaultScalaVer = sparkBranch match {
99
case "2.3" => "2.11.8"
10+
case "2.4" => "2.11.8"
1011
case _ => throw new IllegalArgumentException(s"Unsupported Spark version: $sparkVer.")
1112
}
1213
val scalaVer = sys.props.getOrElse("scala.version", defaultScalaVer)
@@ -16,14 +17,19 @@ sparkVersion := sparkVer
1617

1718
scalaVersion := scalaVer
1819

20+
name := "spark-deep-learning"
21+
1922
spName := "databricks/spark-deep-learning"
2023

21-
// Don't forget to set the version
24+
organization := "com.databricks"
25+
2226
version := (version in ThisBuild).value + s"-spark$sparkBranch"
2327

2428
// All Spark Packages need a license
2529
licenses := Seq("Apache-2.0" -> url("http://opensource.org/licenses/Apache-2.0"))
2630

31+
isSnapshot := version.value.contains("-SNAPSHOT")
32+
2733
spAppendScalaVersion := true
2834

2935
// Add Spark components this package depends on, e.g, "mllib", ....
@@ -34,7 +40,7 @@ sparkComponents ++= Seq("mllib-local", "mllib", "sql")
3440

3541
// add any Spark Package dependencies using spDependencies.
3642
// e.g. spDependencies += "databricks/spark-avro:0.1"
37-
spDependencies += s"databricks/tensorframes:0.5.0-s_${scalaMajorVersion}"
43+
spDependencies += s"databricks/tensorframes:0.5.0-s_$scalaMajorVersion"
3844

3945

4046
libraryDependencies ++= Seq(
@@ -48,6 +54,7 @@ libraryDependencies ++= Seq(
4854

4955
assemblyMergeStrategy in assembly := {
5056
case "requirements.txt" => MergeStrategy.concat
57+
case "LICENSE-2.0.txt" => MergeStrategy.rename
5158
case x =>
5259
val oldStrategy = (assemblyMergeStrategy in assembly).value
5360
oldStrategy(x)
@@ -80,6 +87,14 @@ autoAPIMappings := true
8087

8188
coverageHighlighting := false
8289

90+
unmanagedResources in Compile += baseDirectory.value / "LICENSE"
91+
92+
unmanagedResourceDirectories in Compile += baseDirectory.value / "python"
93+
94+
includeFilter in unmanagedResources := "requirements.txt" ||
95+
new SimpleFileFilter(_.relativeTo(baseDirectory.value / "python")
96+
.exists(_.getPath.matches("sparkdl/.*\\.py"))) || "*.png" || "*.jpg" || "*.pb"
97+
8398
// We only use sbt-release to update version numbers for now.
8499
releaseProcess := Seq[ReleaseStep](
85100
inquireVersions,
@@ -89,3 +104,6 @@ releaseProcess := Seq[ReleaseStep](
89104
setNextVersion,
90105
commitNextVersion
91106
)
107+
108+
// Skip tests during assembly
109+
test in assembly := {}

dev/dev-requirements.txt

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3,3 +3,4 @@ argcomplete==1.9.4
33
pylint==1.8.4
44
prospector==0.12.7
55
yapf==0.21.0
6+
PyYAML==3.13

dev/run.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -227,7 +227,7 @@ def sbt(*args):
227227
assert(not missing_env)
228228
cmd = ("./build/sbt", "-Dspark.version=" + required_env.get("SPARK_VERSION"),
229229
"-Dscala.version=" + required_env.get("SCALA_VERSION"))
230-
call_subprocess(cmd + args)
230+
return call_subprocess(cmd + args)
231231

232232

233233
def assembly():

docker-compose.yml

Lines changed: 22 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,22 @@
1+
version: '3'
2+
services:
3+
master:
4+
build: .
5+
hostname: master
6+
environment:
7+
- MASTER=spark://master:7077
8+
entrypoint: bash -c "service ssh restart && spark-class org.apache.spark.deploy.master.Master -h master"
9+
ports:
10+
- "4040:4040" # driver UI
11+
- "8080:8080" # master UI
12+
volumes:
13+
- .:/mnt/sparkdl
14+
worker:
15+
build: .
16+
entrypoint: bash -c "service ssh restart && spark-class org.apache.spark.deploy.worker.Worker spark://master:7077"
17+
ports:
18+
- "8081-8090:8081" # worker UI
19+
links:
20+
- master
21+
volumes:
22+
- .:/mnt/sparkdl

docker-resources/ssh_config

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,2 @@
1+
Host *
2+
StrictHostKeyChecking no

environment.yml

Lines changed: 35 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,35 @@
1+
# To choose a Python version, first create the env with:
2+
# conda create -n sparkdl python=3.6
3+
# and then update it
4+
# conda env update -n sparkdl -f environment.yml
5+
name: sparkdl
6+
channels:
7+
- https://repo.anaconda.com/pkgs/main/linux-64/
8+
dependencies:
9+
# runtime
10+
- six=1.11.0
11+
- nomkl # do not install MKL because it is large
12+
- pandas=0.23.4
13+
- h5py=2.8.0
14+
- pillow=4.1.1
15+
- cloudpickle=0.5.2
16+
- tensorflow=1.10.0
17+
- keras=2.2.4
18+
- paramiko=2.4.1
19+
- wrapt=1.10.11
20+
# test
21+
- coverage=4.5.1
22+
- nose=1.3.7
23+
- parameterized=0.6.1
24+
- pylint=1.8.4
25+
- argcomplete=1.9.4
26+
- PyYAML=3.13
27+
- pip:
28+
# runtime
29+
- horovod==0.15.0
30+
# test
31+
- prospector==0.12.11
32+
- argh==0.26.2
33+
- yapf==0.21.0
34+
# docs
35+
- sphinx

python/requirements.txt

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,11 +1,16 @@
11
# This file should list any python package dependencies.
22
coverage>=4.4.1
33
h5py>=2.7.0
4-
keras==2.2.2 # NOTE: this package has only been tested with keras 2.2.2
4+
keras==2.2.4 # NOTE: this package has only been tested with keras 2.2.4
55
nose>=1.3.7 # for testing
66
parameterized>=0.6.1 # for testing
77
pillow>=4.1.1,<4.2
88
pygments>=2.2.0
99
tensorflow==1.10.0 # NOTE: this package has only been tested with tensorflow 1.10.0
1010
pandas>=0.19.1
1111
six>=1.10.0
12+
paramiko>=2.4.0
13+
PyNaCl==1.2.1 # Note: This is a transitive dependency of paramiko v 1.3.0 fails
14+
cloudpickle>=0.5.2
15+
horovod==0.15.0
16+
wrapt==1.10.11

python/tests/transformers/named_image_InceptionV3_test.py

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -13,11 +13,13 @@
1313
# limitations under the License.
1414
#
1515

16+
import os
1617
from .named_image_test import NamedImageTransformerBaseTestCase
1718

1819

1920
class NamedImageTransformerInceptionV3Test(NamedImageTransformerBaseTestCase):
2021

21-
__test__ = True
22+
# TODO(ML-5165) Enable these tests in a separate target
23+
__test__ = os.getenv('RUN_ONLY_LIGHT_TESTS', False) != "True"
2224
name = "InceptionV3"
2325
featurizerCompareDigitsExact = 4

0 commit comments

Comments
 (0)