# Copyright 2019 Bytedance Inc. or its affiliates. All Rights Reserved. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. # ============================================================================= FROM nvidia/cuda:9.0-devel-ubuntu16.04 ENV CUDA_VERSION=9.0 ARG REGION RUN rm -f /tmp/pip.conf &&\ echo -e '[global]\nindex-url = https://pypi.douban.com/simple' >> /tmp/pip.conf RUN if [ "x$REGION" = "xchina" ]; then mkdir -p ~/.pip && mv /tmp/pip.conf ~/.pip/; fi ENV USE_CUDA_PATH /usr/local/cuda:/usr/local/cudnn/lib64 ENV PATH /usr/local/cuda/bin:/usr/local/nvidia/bin:${PATH} ENV LD_LIBRARY_PATH /usr/local/cudnn/lib64:/usr/local/cuda/lib64:/usr/local/lib:/usr/local/nvidia/lib:/usr/local/nvidia/lib64:/usr/local/nccl/lib:$LD_LIBRARY_PATH ENV LIBRARY_PATH /usr/local/cudnn/lib64:/usr/local/cuda/lib64:$LIBRARY_PATH ENV BYTEPS_BASE_PATH /usr/local ENV BYTEPS_PATH $BYTEPS_BASE_PATH/byteps ENV BYTEPS_GIT_LINK https://github.com/bytedance/byteps ARG CUDNN_VERSION=7.4.1.5-1+cuda$CUDA_VERSION RUN apt-get update &&\ apt-get install -y --allow-unauthenticated --allow-downgrades --allow-change-held-packages --no-install-recommends \ build-essential \ ca-certificates \ git \ curl \ wget \ vim \ libopenblas-dev \ liblapack-dev \ libopencv-dev \ python \ python-dev \ python-setuptools \ libjemalloc-dev \ graphviz \ cmake \ libjpeg-dev \ libpng-dev \ iftop \ lsb-release \ libcudnn7=${CUDNN_VERSION} \ libnuma-dev \ gcc-4.9 \ g++-4.9 \ gcc-4.9-base RUN apt-get update &&\ apt-get -y install python-pip &&\ pip install --upgrade pip RUN pip --no-cache-dir install \ matplotlib \ numpy==1.15.2 \ scipy \ sklearn \ pandas \ graphviz==0.9.0 \ mxboard \ tensorboard==1.0.0a6 # Install NCCL ENV NCCL_VERSION=d7a58cfa5865c4f627a128c3238cc72502649881 RUN cd / && \ wget -q -O - https://github.com/NVIDIA/nccl/archive/$NCCL_VERSION.tar.gz | tar -xzf - && \ cd nccl-$NCCL_VERSION && make -j src.build && make pkg.txz.build && \ mkdir -p /usr/local/nccl && \ tar -Jxf /nccl-$NCCL_VERSION/build/pkg/txz/nccl*.txz -C /usr/local/nccl/ --strip-components 1 && \ echo "/usr/local/nccl/lib" >> /etc/ld.so.conf.d/nvidia.conf && \ ldconfig && rm -rf /nccl-$NCCL_VERSION WORKDIR /root/ RUN echo "/usr/local/cuda/lib64" >> /etc/ld.so.conf.d/cuda.conf && \ echo "/usr/local/cudnn/lib64" >> /etc/ld.so.conf.d/cuda.conf && \ echo "/usr/local/nvidia/lib" >> /etc/ld.so.conf.d/nvidia.conf && \ echo "/usr/local/nvidia/lib64" >> /etc/ld.so.conf.d/nvidia.conf && \ ldconfig RUN ln -sf /usr/local/cudnn/include/cudnn.h /usr/local/cuda/include/ && \ ln -sf /usr/local/cudnn/lib64/libcudnn* /usr/local/cuda/lib64 &&\ ln -sf /usr/local/cuda/lib64/stubs/libcuda.so /usr/local/cuda/lib64/libcuda.so && \ ln -sf /usr/local/cuda/lib64/libcuda.so /usr/local/cuda/lib64/libcuda.so.1 ################################ install your framework ################################ # install mxnet ARG FRAMEWORK_VERSION=1.4.1 RUN pip --no-cache-dir install mxnet-cu90==$FRAMEWORK_VERSION ################################ install your framework ################################ # fix git source code RUN cd $BYTEPS_BASE_PATH RUN git clone --recurse-submodules $BYTEPS_GIT_LINK RUN cd $BYTEPS_PATH/3rdparty/ps-lite RUN git checkout byteps RUN git pull origin byteps RUN git reset --hard 62bbf9a1a7c489c45b65379e2b02e8a620f8a4c1 RUN cd - # Pin GCC to 4.9 (priority 200) to compile correctly against TensorFlow, PyTorch, and MXNet. RUN update-alternatives --install /usr/bin/gcc gcc $(readlink -f $(which gcc)) 100 && \ update-alternatives --install /usr/bin/x86_64-linux-gnu-gcc x86_64-linux-gnu-gcc $(readlink -f $(which gcc)) 100 && \ update-alternatives --install /usr/bin/g++ g++ $(readlink -f $(which g++)) 100 && \ update-alternatives --install /usr/bin/x86_64-linux-gnu-g++ x86_64-linux-gnu-g++ $(readlink -f $(which g++)) 100 RUN update-alternatives --install /usr/bin/gcc gcc /usr/bin/gcc-4.9 200 && \ update-alternatives --install /usr/bin/x86_64-linux-gnu-gcc x86_64-linux-gnu-gcc /usr/bin/gcc-4.9 200 && \ update-alternatives --install /usr/bin/g++ g++ /usr/bin/g++-4.9 200 && \ update-alternatives --install /usr/bin/x86_64-linux-gnu-g++ x86_64-linux-gnu-g++ /usr/bin/g++-4.9 200 # Install BytePS ARG BYTEPS_NCCL_LINK=shared RUN cd $BYTEPS_PATH &&\ BYTEPS_WITHOUT_PYTORCH=1 BYTEPS_WITHOUT_TENSORFLOW=1 python setup.py install &&\ BYTEPS_WITHOUT_PYTORCH=1 BYTEPS_WITHOUT_TENSORFLOW=1 python setup.py bdist_wheel # Remove GCC pinning RUN update-alternatives --remove gcc /usr/bin/gcc-4.9 && \ update-alternatives --remove x86_64-linux-gnu-gcc /usr/bin/gcc-4.9 && \ update-alternatives --remove g++ /usr/bin/g++-4.9 && \ update-alternatives --remove x86_64-linux-gnu-g++ /usr/bin/g++-4.9 RUN rm -rf /usr/local/cuda/lib64/libcuda.so && \ rm -rf /usr/local/cuda/lib64/libcuda.so.1 # Install OpenSSH for MPI to communicate between containers RUN apt-get install -y --no-install-recommends openssh-client openssh-server && \ mkdir -p /var/run/sshd # Allow OpenSSH to talk to containers without asking for confirmation RUN cat /etc/ssh/ssh_config | grep -v StrictHostKeyChecking > /etc/ssh/ssh_config.new && \ echo " StrictHostKeyChecking no" >> /etc/ssh/ssh_config.new && \ mv /etc/ssh/ssh_config.new /etc/ssh/ssh_config # other RUN pip install easydict RUN apt-get install -y python-opencv # ssh env COPY sync_ssh_env.sh . RUN bash ./sync_ssh_env.sh RUN rm -rf ./sync_ssh_env.sh