diff --git a/.gitignore b/.gitignore index bc1953a5c..a5c67e360 100644 --- a/.gitignore +++ b/.gitignore @@ -28,9 +28,9 @@ src/pylint.d/* src/interface_py/py3nvml src/interface_py/xgboost deps/ -data/ -open_data/ -smalldata/ +./data/ +./open_data/ +./smalldata/ tests/smalldata tests/data *.idea/ @@ -72,7 +72,7 @@ tests/results examples/py/sourced.png # temp data generated -testsxgboost/fifa_data.pk +tests/python/xgboost/fifa_data.pk examples/py/ipums_feather # Visual Studio Code @@ -157,7 +157,7 @@ src/interface_py/h2o4gpu/utils/ ## add submodules so user has to force to add/commit cub -xgboost +./xgboost py3nvml scikit-learn tests/googletest diff --git a/CMakeLists.txt b/CMakeLists.txt index 76b29c218..c88dd0502 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -51,14 +51,13 @@ FILE(GLOB_RECURSE COMMON_SOURCES INCLUDE_DIRECTORIES( src/include src/cpu/include + # Here and not in target_include_directories b/c cmake < 3.7 which we use in Dockerfiles does not support it + src/gpu/include ${PYTHON_INCLUDE_PATH} ${PYTHON_INCLUDE_PATH_CUST} ) ADD_LIBRARY(commonh2o4gpu OBJECT ${COMMON_SOURCES}) -TARGET_INCLUDE_DIRECTORIES (commonh2o4gpu PUBLIC - src/include - ) #============= BUILD COMMON CPU/GPU CODE #============= BUILD CPU LIBRARY @@ -140,9 +139,6 @@ if(USE_CUDA) ${BLAS_LIBRARIES} ${NVTX_LIBRARY} ${NVML_LIBRARY}) - TARGET_INCLUDE_DIRECTORIES (gpuh2o4gpu PUBLIC - src/gpu/include - ) #============= BUILD GPU LIBRARY #============= GPU SWIG diff --git a/DEVEL.md b/DEVEL.md index 2e19d9a85..19d93766d 100644 --- a/DEVEL.md +++ b/DEVEL.md @@ -102,7 +102,7 @@ git clone https://github.com/h2oai/xgboost cd xgboost git checkout h2oai make -f Makefile2 -pip install python-package/dist/xgboost-0.7-py3-none-any.whl --upgrade +pip install python-package/dist/xgboost-0.71-py3-none-any.whl --upgrade ``` Note: By default the GPU NCCL version is installed using your local cuda version. @@ -111,6 +111,8 @@ If fully understand build, can do jump to latter steps of ## Build flags and options: +To find a full list of used flags and options please refer to `make/config.mk`. Here are the most useful ones: + ##### Debug mode To build the code in debug mode set `CMAKE_BUILD_TYPE=Debug` when building e.g. `make fullinstall CMAKE_BUILD_TYPE=Debug`. @@ -123,6 +125,10 @@ To enable `nvToolsExt` set the `USENVTX` variable e.g. `make fullinstall USENVTX To expedite the building process in dev setup you can set `DEV_BUILD=ON` e.g. `make fullinstall DEV_BUILD=ON`. This will build the binary with only single CUDA compute capability (currently 6.1). +##### NCCL + +Currently only XGBoost part of the project is using NCCL. By default NCCL support is ON during builds. To turn it off run with `USENCCL=0` e.g. `make fullinstall USENCCL=0` + ## Testing - test python package diff --git a/Dockerfile-build b/Dockerfile-build index 125a11d09..4dfbdc656 100644 --- a/Dockerfile-build +++ b/Dockerfile-build @@ -1,100 +1,174 @@ -# Copy this file and Run from one level higher than the git pull directory -# To build: docker build -t opsh2oai/h2oai-nv -f Dockerfile-nvdocker . -# To run with docker run -it -p 12345:12345 opsh2oai/h2oai-nv +ARG docker_name -ARG layer -ARG version - -FROM $layer:$version -# e.g. FROM ubuntu:latest - -RUN echo $layer +FROM $docker_name MAINTAINER H2o.ai -ENV DEBIAN_FRONTEND noninteractive +# +# Env variables for CUDA. Necessary because certain systems don't support nvidia-docker so we should use plain docker as much as possible. +# ENV HOME=/root -ENV PYENV_ROOT=$HOME/.pyenv -ENV PATH=$PYENV_ROOT/bin:$PATH - -RUN if $layer -ne "ubuntu"; then export CUDA_HOME=/usr/local/cuda && \ - export PATH=/usr/local/cuda/bin:$PATH && \ - export LD_LIBRARY_PATH_MORE=/home/$USER/lib/:$CUDA_HOME/lib64/:$CUDA_HOME/lib/:/usr/local/cuda/lib64:/usr/local/cuda/extras/CUPTI/lib64:/usr/local/nvidia/lib:/usr/local/nvidia/lib64 && \ - export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:$LD_LIBRARY_PATH_MORE && \ - export CUDADIR=/usr/local/cuda/include/ && \ - export OMP_NUM_THREADS=32 && \ - export MKL_NUM_THREADS=32 && \ - export VECLIB_MAXIMUM_THREADS=32; fi +ENV CUDA_HOME=/usr/local/cuda +ENV CUDADIR=/usr/local/cuda/include/ +ENV PATH=/usr/local/cuda/bin:$PATH +ENV LD_LIBRARY_PATH_CUDA=$CUDA_HOME/lib64/:$CUDA_HOME/lib/:/usr/local/cuda/lib64:/usr/local/cuda/extras/CUPTI/lib64:/usr/local/nvidia/lib:/usr/local/nvidia/lib64 +ENV LD_LIBRARY_PATH_BUILD=/lib64:/usr/local/lib64:/home/$USER/lib/ +ENV LD_LIBRARY_PATH=$LD_LIBRARY_PATH_CUDA:$LD_LIBRARY_PATH_BUILD:$LD_LIBRARY_PATH -# Symlinks for NVML -RUN \ - export NVIDIA_DRIVER=$(grep "Module" /proc/driver/nvidia/version | cut -f 9 -d' ' | cut -f 1 -d '.') && \ - mkdir -p /usr/lib/nvidia-$NVIDIA_DRIVER && \ - ln -s /usr/local/nvidia/lib64/libnvidia-ml.so.1 /usr/lib/nvidia-$NVIDIA_DRIVER/libnvidia-ml.so +# +# Env variables used by the codebase. +# +ENV OMP_NUM_THREADS=32 +ENV MKL_NUM_THREADS=32 +ENV VECLIB_MAXIMUM_THREADS=32 -# add-apt-repository ppa:fkrull/deadsnakes && \ +# +# Library versions +# +ENV MINICONDA_VERSION=4.4.10 +ENV SWIG_VERSION=3.0.12 +ENV PILLOW_VERSION=4.2.1 +ENV GIT_VERSION=2.17.0 -# Setup Repos -RUN \ - apt-get update -y && \ - apt-get -y install curl apt-utils python-software-properties \ - software-properties-common iputils-ping wget cpio net-tools build-essential \ - git zip dirmngr && \ - apt-get -y --no-install-recommends install \ - python3-dateutil python3-magic s3cmd && \ - wget http://launchpadlibrarian.net/326935544/s3cmd_2.0.0-1_all.deb && \ - dpkg -i s3cmd_2.0.0-1_all.deb && \ - add-apt-repository ppa:jonathonf/python-3.6 && \ - apt-get update -yqq && \ - curl -sL https://deb.nodesource.com/setup_7.x | bash - && \ - apt-get -y --no-install-recommends install \ - python3.6 \ - python3.6-dev \ - python3-pip \ - python3-setuptools \ - python3-wheel && \ - update-alternatives --install /usr/bin/python python /usr/bin/python3.6 100 && \ - python -m pip install --upgrade pip && \ - apt-get clean && \ - rm -rf /var/cache/apt/* && \ - apt-get install -y libopenblas-dev axel && \ - apt-get install -y make build-essential libssl-dev zlib1g-dev libbz2-dev libreadline-dev libsqlite3-dev wget curl llvm libncurses5-dev libncursesw5-dev xz-utils tk-dev swig +# +# Install necessary libraries and dependencies +# +RUN yum install -y epel-release + +# Setup gcc etc. +RUN yum install -y gcc gcc-c++ libgcc libstdc++ libgomp glibc + +# Git requirements +RUN yum install -y libcurl-devel zlib-devel asciidoc xmlto wget make autoconf gettext +# Compile from source because yum's latest version is 1.8.3 +# --depth for submodule update which we use was added in 1.8.4 RUN \ - mkdir -p .pylint.d && \ - rm -rf ~/.pyenv && \ - git clone https://github.com/pyenv/pyenv.git ~/.pyenv && \ - eval "$(/root/.pyenv/bin/pyenv init -)" && \ - CONFIGURE_OPTS=--enable-shared /root/.pyenv/bin/pyenv install 3.6.1 && \ - CONFIGURE_OPTS=--enable-shared /root/.pyenv/bin/pyenv global 3.6.1 && \ - pip install setuptools --no-cache-dir - -# Install Daal library -COPY scripts/daal/install_daal.sh scripts/daal/install_daal.sh + wget https://www.kernel.org/pub/software/scm/git/git-${GIT_VERSION}.tar.xz && \ + tar xf git-${GIT_VERSION}.tar.xz && \ + cd git-${GIT_VERSION} && \ + make configure && \ + ./configure --prefix=/usr && \ + make all && \ + make install; + +# H2O4GPU requirements + util programs +RUN yum install -y \ + ncurses-devel \ + bzip2 \ + which \ + axel \ + cmake3 \ + openssl-devel \ + libpng-devel \ + freetype-devel \ + blas-devel \ + openblas-devel && \ + wget https://repo.continuum.io/miniconda/Miniconda3-${MINICONDA_VERSION}-Linux-`arch`.sh && \ + bash Miniconda3-${MINICONDA_VERSION}-Linux-`arch`.sh -b -p /opt/h2oai/h2o4gpu/python && \ + wget https://s3.amazonaws.com/artifacts.h2o.ai/releases/ai/h2o/dai-thirdparty-deps-llvm/1.0-master-21/`arch`-centos7/llvm.tar.bz2 && \ + tar xvf llvm.tar.bz2 && \ + cp -r llvm/* /opt/h2oai/h2o4gpu/ && \ + rm -rf llvm* + +ENV LLVM4=/opt/h2oai/h2o4gpu +ENV PATH=/opt/h2oai/h2o4gpu/python/bin:$PATH +ENV PATH=/usr/local/bin:$PATH +ENV PATH=$LLVM4/bin:$PATH +ENV LD_LIBRARY_PATH=$LLVM4/lib:$LD_LIBRARY_PATH + +# +# Symlinks +# + +# AR for conda +RUN ln /usr/bin/ar $LLVM4/bin/`arch`-conda_cos6-linux-gnu-ar + +# CentOS' yum install cmake has only 2.X so need to install cmake3 and make a symlink +RUN ln -s /usr/bin/cmake3 /usr/bin/cmake + +# Symlinks for Python libs used by SWIG in CMake - it does not recognize Miniconda paths otherwise RUN \ - chmod +x scripts/daal/install_daal.sh && \ - scripts/daal/install_daal.sh + mkdir -p /usr/lib64/ && \ + ln -s /opt/h2oai/h2o4gpu/python/lib/*python* /usr/lib64/ && \ + mkdir -p /usr/include/python3.6m && \ + ln -s /opt/h2oai/h2o4gpu/python/include/python3.6m/* /usr/include/python3.6m + +# Yumming openblas puts some files in a not-so-standard locations +RUN ln -s /usr/include/openblas/* /usr/local/include/ -COPY requirements_buildonly.txt requirements_buildonly.txt -COPY requirements_runtime.txt requirements_runtime.txt -COPY requirements_runtime_demos.txt requirements_runtime_demos.txt +# Symlinks for NVML RUN \ - chmod a+rwx / && \ - chmod -R a+rwx /root && \ - chmod ugo+s /root/ && \ - mkdir -p /root/.cache/ && \ - eval "$(/root/.pyenv/bin/pyenv init -)" && \ - /root/.pyenv/bin/pyenv global 3.6.1 && \ - pip install setuptools && \ - pip install -r requirements_buildonly.txt && \ - pip install -r requirements_runtime.txt && \ - pip install -r requirements_runtime_demos.txt - -# Install R dependencies and h2o4gpu R package when appropriate -COPY scripts/install_r.sh scripts/install_r.sh -COPY scripts/test_r_pkg.sh scripts/test_r_pkg.sh -COPY scripts/install_r_deps.sh scripts/install_r_deps.sh + mkdir -p /usr/lib64/nvidia/ && \ + ln -s /usr/local/cuda-`nvcc --version | tail -n 1 | cut -f 5 -d' ' | cut -f 1 -d ','`/targets/`arch`-linux/lib/stubs/libnvidia-ml.so /usr/lib64/nvidia/libnvidia-ml.so + +# +# Builds from source due to too old versions in yum +# +WORKDIR $HOME + +# SWIG RUN \ - apt-get update -y && \ - apt-get -y install libcurl4-openssl-dev libssl-dev libxml2-dev && \ - scripts/install_r_deps.sh + wget https://sourceforge.net/projects/swig/files/swig/swig-${SWIG_VERSION}/swig-${SWIG_VERSION}.tar.gz && \ + tar -zxvf swig-${SWIG_VERSION}.tar.gz && \ + cd swig-${SWIG_VERSION} && \ + ./configure --prefix=/usr && \ + make -j $(nproc) && \ + make install && \ + cd $HOME && \ + rm -rf swig-3* + +# TODO Install DAAL + +# +# PPC64 specific - certain libs/whl don't support PPC64LE +# + +# Arrow +RUN bash -c 'if [ `arch` = "ppc64le" ]; then \ + git clone https://github.com/apache/arrow.git && \ + cd $HOME/arrow/cpp && \ + git checkout tags/apache-arrow-0.8.0 && \ + yum install -y boost-devel && \ + pip install numpy cython && \ + cmake -DARROW_CXXFLAGS="-lutil" -DARROW_PYTHON=on && make -j && make install && \ + cd $HOME/arrow/python && \ + ARROW_HOME=/usr/local python setup.py install && \ + yum install -y libjpeg-devel;\ + fi' + +# Pillow +RUN bash -c 'if [ `arch` = "ppc64le" ]; then \ + wget https://files.pythonhosted.org/packages/55/aa/f7f983fb72710a9daa4b3374b7c160091d3f94f5c09221f9336ade9027f3/Pillow-${PILLOW_VERSION}.tar.gz && \ + tar xvf Pillow-${PILLOW_VERSION}.tar.gz && \ + cd $HOME/Pillow-${PILLOW_VERSION} && \ + sed -i "s/'ppc64'/'ppc64le'/g" setup.py && \ + python setup.py install && \ + cd $HOME && \ + rm -rf Pillow-${PILLOW_VERSION}*; \ + fi' + + +# +# Install Python requirements +# +RUN pip install numpy setuptools + +COPY src/interface_py/requirements_buildonly.txt requirements_buildonly.txt +COPY src/interface_py/requirements_runtime.txt requirements_runtime.txt +COPY src/interface_py/requirements_runtime_demos.txt requirements_runtime_demos.txt + +RUN pip install -r requirements_buildonly.txt +RUN pip install -r requirements_runtime.txt +RUN pip install -r requirements_runtime_demos.txt + +RUN chmod -R o+rwx /opt/h2oai/h2o4gpu/python +RUN chmod -R o+rwx /root + +WORKDIR $HOME + +ENV GIT_AUTHOR_NAME="anonymous" +ENV GIT_AUTHOR_EMAIL="anonymous@h2o.ai" +ENV GIT_COMMITTER_NAME="anonymous" +ENV GIT_COMMITTER_EMAIL="anonymous@h2o.ai" +ENV EMAIL="anonymous@h2o.ai" \ No newline at end of file diff --git a/Dockerfile-build-centos7.in b/Dockerfile-build-centos7.in deleted file mode 100644 index f3ffcbea1..000000000 --- a/Dockerfile-build-centos7.in +++ /dev/null @@ -1,138 +0,0 @@ -FROM FROM_SUBST - -MAINTAINER H2o.ai - -ENV CUDA_HOME=/usr/local/cuda -ENV PATH=/usr/local/cuda/bin:$PATH -ENV LD_LIBRARY_PATH_MORE=/usr/lib/gcc/ppc64le-redhat-linux/4.8.2/:/home/$USER/lib/:$CUDA_HOME/lib64/:$CUDA_HOME/lib/:/usr/local/cuda/lib64:/usr/local/cuda/extras/CUPTI/lib64:/usr/local/nvidia/lib:/usr/local/nvidia/lib64 -ENV LD_LIBRARY_PATH=/lib64:$LD_LIBRARY_PATH:$LD_LIBRARY_PATH_MORE -ENV CUDADIR=/usr/local/cuda/include/ -ENV OMP_NUM_THREADS=32 -ENV MKL_NUM_THREADS=32 -ENV HOME=/root -ENV VECLIB_MAXIMUM_THREADS=32 -RUN \ - yum groupinstall -y "Development Tools" - -RUN \ - yum install -y \ - ncurses-devel \ - zlib-devel \ - wget \ - bzip2 \ - openssl-devel \ - libcurl-devel && \ - wget https://repo.continuum.io/miniconda/Miniconda3-4.3.27-Linux-ARCH_SUBST.sh && \ - bash Miniconda3-4.3.27-Linux-ARCH_SUBST.sh -b -p /opt/h2oai/dai/python && \ - wget https://s3.amazonaws.com/artifacts.h2o.ai/releases/ai/h2o/dai-thirdparty-deps-llvm/1.0-master-21/ARCH_SUBST-centos7/llvm.tar.bz2 && \ - tar xvf llvm.tar.bz2 && \ - cp -r llvm/* /opt/h2oai/dai/ && \ - rm -rf llvm* - -ENV LLVM4=/opt/h2oai/dai -ENV PATH=/opt/h2oai/dai/python/bin:$PATH -ENV PATH=/usr/local/bin:$PATH -ENV PATH=$LLVM4/bin:$PATH -ENV LD_LIBRARY_PATH=$LLVM4/lib -COPY scripts/gcc_wrapper.sh /opt/h2oai/gcc_wrapper/gcc -COPY scripts/g++_wrapper.sh /opt/h2oai/gcc_wrapper/g++ -COPY scripts/gcc_wrapper.sh /opt/h2oai/gcc_wrapper/ARCH_SUBST-conda_cos6-linux-gnu-gcc -ENV PATH=/opt/h2oai/gcc_wrapper:$PATH -RUN ln /usr/bin/ar $LLVM4/bin/ARCH_SUBST-conda_cos6-linux-gnu-ar - -RUN yum install -y atlas-devel blas-devel && \ - ln /usr/lib64/libgfortran.so.3 /usr/lib64/libgfortran.so && \ - wget http://github.com/xianyi/OpenBLAS/archive/v0.2.20.tar.gz && \ - tar xvf v0.2.20.tar.gz && \ - rm v0.2.20.tar.gz && \ - cd OpenBLAS-0.2.20 && make CBLAS_ONLY=1 && make PREFIX=/usr/local install - -ENV OPENBLAS_PREFIX=open -RUN yum install -y libstdc++ libc libgomp -ENV LD_LIBRARY_PATH=/usr/local/lib:$LD_LIBRARY_PATH -COPY scripts/g++_wrapper.sh /opt/h2oai/gcc_wrapper/ARCH_SUBST-conda_cos6-linux-gnu-c++ -RUN yum install -y which -WORKDIR $HOME -COPY requirements_buildonly.txt requirements_buildonly.txt -COPY requirements_runtime.txt requirements_runtime.txt -COPY requirements_runtime_demos.txt requirements_runtime_demos.txt - -RUN yum install -y \ - libpng-devel \ - freetype-devel - -ENV PATH=/usr/local/bin:$PATH -RUN \ - wget https://cmake.org/files/v3.10/cmake-3.10.1.tar.gz && \ - tar xvf cmake-3.10.1.tar.gz && \ - cd $HOME/cmake-3.10.1 && \ - ./configure && \ - make -j6 install && \ - cd $HOME && \ - rm -rf cmake-3* - -# Install SWIG b/c yum has old versions -RUN \ - wget https://sourceforge.net/projects/swig/files/swig/swig-3.0.12/swig-3.0.12.tar.gz && \ - tar -zxvf swig-3.0.12.tar.gz && \ - cd swig-3.0.12 && \ - ./configure --prefix=/usr && \ - make -j 4 && \ - make install && \ - cd $HOME && \ - rm -rf swig-3* - -# Symlinks for CMake/SWIG - it does not recognize Miniconda paths otherwise -RUN \ - mkdir -p /usr/lib64/ && \ - ln -s /opt/h2oai/dai/python/lib/*python* /usr/lib64/ && \ - mkdir -p /usr/include/python3.6m && \ - ln -s /opt/h2oai/dai/python/include/python3.6m/* /usr/include/python3.6m - -RUN bash -c 'if [ `arch` = "ppc64le" ]; then \ - git clone https://github.com/apache/arrow.git && \ - cd $HOME/arrow/cpp && \ - git checkout tags/apache-arrow-0.8.0 && \ - yum install -y boost-devel && \ - pip install numpy cython && \ - cmake -DARROW_CXXFLAGS="-lutil" -DARROW_PYTHON=on && make -j && make install && \ - cd $HOME/arrow/python && \ - ARROW_HOME=/usr/local python setup.py install && \ - yum install -y libjpeg-devel; \ - fi' - -ENV PILLOW_VERSION=4.2.1 -RUN bash -c 'if [ `arch` = "ppc64le" ]; then \ - wget https://files.pythonhosted.org/packages/55/aa/f7f983fb72710a9daa4b3374b7c160091d3f94f5c09221f9336ade9027f3/Pillow-${PILLOW_VERSION}.tar.gz && \ - tar xvf Pillow-${PILLOW_VERSION}.tar.gz && \ - cd $HOME/Pillow-${PILLOW_VERSION} && \ - sed -i "s/'ppc64'/'ppc64le'/g" setup.py && \ - python3.6 setup.py install && \ - cd $HOME && \ - rm -rf Pillow-${PILLOW_VERSION}*; \ - fi' - -# Symlinks for NVML -RUN \ - mkdir -p /usr/lib64/nvidia/ && \ - ln -s /usr/local/cuda-MY_CUDA_VERSION_SUBST/targets/ARCH_SUBST-linux/lib/stubs/libnvidia-ml.so /usr/lib64/nvidia/libnvidia-ml.so - -RUN pip install numpy -RUN pip install setuptools llvmlite==0.20.0 scikit-build scipy -RUN sed -i 's/cmake/# cmake/' requirements_buildonly.txt -RUN pip install -r requirements_buildonly.txt -RUN pip install -r requirements_runtime.txt -RUN pip install -r requirements_runtime_demos.txt - -RUN yum install -y which - -RUN chmod -R o+rwx /opt/h2oai/dai/python -RUN chmod -R o+rwx /root - -WORKDIR $HOME - -ENV GIT_AUTHOR_NAME="anonymous" -ENV GIT_AUTHOR_EMAIL="anonymous@h2o.ai" -ENV GIT_COMMITTER_NAME="anonymous" -ENV GIT_COMMITTER_EMAIL="anonymous@h2o.ai" -ENV EMAIL="anonymous@h2o.ai" diff --git a/Dockerfile-runtime b/Dockerfile-runtime index 8f73f8966..7363b9607 100644 --- a/Dockerfile-runtime +++ b/Dockerfile-runtime @@ -1,76 +1,74 @@ -#How to run: -#To build: docker build -t opsh2o4gpu/h2o4gpu-runtime -f Dockerfile-runtime . -#To run: nvidia-docker run -p 8888:8888 -v /some/local/log:/log opsh2o4gpu/h2o4gpu-runtime & -# or without nvidia: docker run -p 8888:8888 -v /some/local/log:/log opsh2o4gpu/h2o4gpu-runtime & - -ARG layer -ARG version - -FROM $layer:$version -# e.g. FROM ubuntu:latest - -ARG wheel -ENV wheel=${wheel} -ARG buckettype -ENV buckettype=${buckettype} +ARG docker_name +FROM $docker_name MAINTAINER H2o.ai -ENV DEBIAN_FRONTEND noninteractive - -RUN if $layer -ne "ubuntu"; then export CUDA_HOME=/usr/local/cuda && \ - export PATH=/usr/local/cuda/bin:$PATH && \ - export LD_LIBRARY_PATH_MORE=/home/$USER/lib/:$CUDA_HOME/lib64/:$CUDA_HOME/lib/:/usr/local/cuda/lib64:/usr/local/cuda/extras/CUPTI/lib64:/usr/local/nvidia/lib:/usr/local/nvidia/lib64 && \ - export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:$LD_LIBRARY_PATH_MORE && \ - export CUDADIR=/usr/local/cuda/include/ && \ - export OMP_NUM_THREADS=32 && \ - export MKL_NUM_THREADS=32 && \ - export VECLIB_MAXIMUM_THREADS=32; fi - -# add-apt-repository ppa:fkrull/deadsnakes && \ - -RUN \ - # Setup Repos - apt-get update -y && \ - apt-get -y install curl apt-utils python-software-properties \ - software-properties-common iputils-ping wget cpio net-tools build-essential \ - git zip dirmngr && \ - apt-get -y --no-install-recommends install \ - python3-dateutil \ - python3-magic && \ - wget http://launchpadlibrarian.net/326935544/s3cmd_2.0.0-1_all.deb && \ - dpkg -i s3cmd_2.0.0-1_all.deb && \ - add-apt-repository ppa:jonathonf/python-3.6 && \ - apt-get update -yqq && \ - curl -sL https://deb.nodesource.com/setup_7.x | bash - && \ - # Install H2o dependencies - apt-get -y --no-install-recommends install \ - python3.6 \ - python3.6-dev \ - virtualenv \ - python3-pip && \ - update-alternatives --install /usr/bin/python python /usr/bin/python3.6 100 && \ - python -m pip install --upgrade pip && \ - apt-get clean && \ - rm -rf /var/cache/apt/* && \ - apt-get install -y libopenblas-dev pbzip2 - -RUN \ - mkdir h2o4gpu_env && \ - virtualenv --python=/usr/bin/python3.6 h2o4gpu_env && \ - chmod -R o+w h2o4gpu_env && \ - . h2o4gpu_env/bin/activate && \ - pip install --no-cache-dir --upgrade pip && \ - pip install --no-cache-dir --upgrade setuptools && \ - pip install --no-cache-dir --upgrade numpy && \ - pip install --no-cache-dir --upgrade jupyter +ENV HOME=/root +ENV CUDA_HOME=/usr/local/cuda +ENV PATH=/usr/local/cuda/bin:$PATH +ENV CUDADIR=/usr/local/cuda/include/ +ENV LD_LIBRARY_PATH=/usr/lib64:/usr/local/lib:$LD_LIBRARY_PATH + +ENV MINICONDA_VERSION=4.4.10 + +# Setup gcc etc. +RUN yum install -y epel-release + +RUN yum install -y gcc gcc-c++ libgcc libstdc++ libgomp glibc + +RUN yum install -y \ + make \ + ncurses-devel \ + zlib-devel \ + wget \ + blas-devel \ + openblas-devel \ + libpng-devel \ + freetype-devel \ + bzip2 && \ + wget https://repo.continuum.io/miniconda/Miniconda3-${MINICONDA_VERSION}-Linux-`arch`.sh && \ + bash Miniconda3-${MINICONDA_VERSION}-Linux-`arch`.sh -b -p /opt/h2oai/h2o4gpu/python +ENV PATH=/opt/h2oai/h2o4gpu/python/bin:$PATH + +# +# PPC64 specific - certain libs/whl don't support PPC64LE +# + +WORKDIR $HOME + +# Arrow +RUN bash -c 'if [ `arch` = "ppc64le" ]; then \ + yum install -y git boost-devel cmake3 && \ + ln -s /usr/bin/cmake3 /usr/bin/cmake && \ + git clone https://github.com/apache/arrow.git && \ + cd $HOME/arrow/cpp && \ + git checkout tags/apache-arrow-0.8.0 && \ + pip install numpy cython && \ + cmake -DARROW_CXXFLAGS="-lutil" -DARROW_PYTHON=on && make -j && make install && \ + cd $HOME/arrow/python && \ + ARROW_HOME=/usr/local python setup.py install && \ + yum install -y libjpeg-devel; \ + fi' + +# Pillow +ENV PILLOW_VERSION=4.2.1 +RUN bash -c 'if [ `arch` = "ppc64le" ]; then \ + wget https://files.pythonhosted.org/packages/55/aa/f7f983fb72710a9daa4b3374b7c160091d3f94f5c09221f9336ade9027f3/Pillow-${PILLOW_VERSION}.tar.gz && \ + tar xvf Pillow-${PILLOW_VERSION}.tar.gz && \ + cd $HOME/Pillow-${PILLOW_VERSION} && \ + sed -i "s/'ppc64'/'ppc64le'/g" setup.py && \ + python setup.py install && \ + cd $HOME && \ + rm -rf Pillow-${PILLOW_VERSION}*; \ + fi' + +WORKDIR / # Add requirements -COPY requirements_runtime.txt requirements.txt -COPY requirements_runtime_demos.txt requirements_runtime_demos.txt +COPY src/interface_py/requirements_runtime.txt requirements.txt +COPY src/interface_py/requirements_runtime_demos.txt requirements_runtime_demos.txt RUN \ - . h2o4gpu_env/bin/activate && \ chmod a+rwx requirements*.txt && \ pip install --no-cache-dir -r requirements.txt && \ pip install --no-cache-dir -r requirements_runtime_demos.txt @@ -91,7 +89,6 @@ COPY examples/py/demos/H2O4GPU_PCA.ipynb /jupyter/demos/H2O4GPU_PCA.ipynb COPY examples/py/demos/H2O4GPU_Daal_LinearRegression.ipynb /jupyter/demos/H2O4GPU_Daal_LinearRegression.ipynb COPY examples/py/demos/figures /jupyter/demos/figures RUN \ - . h2o4gpu_env/bin/activate && \ cd /jupyter/demos && \ chmod -R a+rwx /jupyter && \ mkdir /scikit_learn_data && \ @@ -103,14 +100,12 @@ RUN \ HOME=/jupyter jupyter notebook --generate-config && \ sed -i "s/#c.NotebookApp.token = ''/c.NotebookApp.token = 'h2o'/" /jupyter/.jupyter/jupyter_notebook_config.py && \ chmod -R a+rwx /jupyter/.jupyter -# Add shell wrapper -COPY run.sh /run.sh +# Add shell wrapper +COPY scripts/run.sh /run.sh RUN \ - . h2o4gpu_env/bin/activate && \ chmod a+rwx run.sh - ARG h2o4gpu_VERSION ARG h2o4gpu_COMMIT ARG DOCKER_VERSION_TAG @@ -118,5 +113,5 @@ LABEL \ h2o4gpu_commit="$h2o4gpu_COMMIT" \ docker_version_tag="$DOCKER_VERSION_TAG" -ENTRYPOINT ["./run.sh"] +ENTRYPOINT ["./run.sh"] EXPOSE 8888 diff --git a/EXAMPLE_SOLVER.md b/EXAMPLE_SOLVER.md index 276ab17d9..7af543ef4 100644 --- a/EXAMPLE_SOLVER.md +++ b/EXAMPLE_SOLVER.md @@ -356,4 +356,4 @@ Add Java wrapper files - *coming soon*. ### Tests -Add tests! Currently adding Python tests in `tests_open` is the easiest way. C/C++/CUDA tests coming soon. \ No newline at end of file +Add tests! Currently adding Python tests in `tests/python/open_data` is the easiest way. C/C++/CUDA tests coming soon. \ No newline at end of file diff --git a/Jenkinsfile-cpu.base b/Jenkinsfile-cpu.base deleted file mode 100644 index 20ee763dc..000000000 --- a/Jenkinsfile-cpu.base +++ /dev/null @@ -1,23 +0,0 @@ -// TOOD: rename to @Library('h2o-jenkins-pipeline-lib') -@Library('test-shared-library') _ - -import ai.h2o.ci.Utils -import static ai.h2o.ci.Utils.banner -def utilsLib = new Utils() -import ai.h2o.ci.BuildInfo - -def commitMessage = '' -def h2o4gpuUtils = null - -def dist = "dist8" -def BUILDTYPE = "cpu" -def cuda = "ubuntu:16.04" -def extratag = "-cpu" -def linuxwheel = "linux_whl2" -def testtype = "dotestfast_nonccl" -def labelbuild = "docker && linux" -def labeltest = "docker" -def labelruntime = "docker" -def doingbenchmark = "0" -def dobenchmark = "1" -def doruntime = "1" diff --git a/Jenkinsfile-dai b/Jenkinsfile-dai deleted file mode 100644 index 37cb8c728..000000000 --- a/Jenkinsfile-dai +++ /dev/null @@ -1,120 +0,0 @@ -#!/usr/bin/groovy -//------------------------------------------------------------------------------ -// This Source Code Form is subject to the terms of the Mozilla Public -// License, v. 2.0. If a copy of the MPL was not distributed with this -// file, You can obtain one at http://mozilla.org/MPL/2.0/. -//------------------------------------------------------------------------------ -@Library('test-shared-library') _ - -import ai.h2o.ci.Utils -def utilsLib = new Utils() - -pipeline { - agent none - - // Setup job options - options { - ansiColor('xterm') - timestamps() - timeout(time: 120, unit: 'MINUTES') - buildDiscarder(logRotator(daysToKeepStr: '30')) - } - - stages { - stage('Build') { - parallel { - stage('Build on x86_64-centos7-cuda8.0') { - agent { - label "linux && docker && !micro" - } - steps { - dumpInfo 'x86_64-centos7-cuda8 Build Info' - script { - sh """ - make mrproper_in_docker - make BRANCH_NAME=${env.BRANCH_NAME} BUILD_NUM=${env.BUILD_ID} centos7_cuda8_in_docker - """ - } - stash includes: 'dist/**/*', name: 'x86_64-centos7-cuda8.0' - } - } - stage('Build on x86_64-centos7-cuda9.0') { - agent { - label "linux && docker && !micro" - } - steps { - dumpInfo 'x86_64-centos7-cuda9 Build Info' - script { - sh """ - make mrproper_in_docker - make BRANCH_NAME=${env.BRANCH_NAME} BUILD_NUM=${env.BUILD_ID} centos7_cuda9_in_docker - """ - } - stash includes: 'dist/**/*', name: 'x86_64-centos7-cuda9.0' - } - } - stage('Build on ppc64le-centos7-cuda8.0') { - agent { - label "ibm-power" - } - steps { - dumpInfo 'ppc64le-centos7-cuda8 Build Info' - script { - sh """ - make mrproper_in_docker - make BRANCH_NAME=${env.BRANCH_NAME} BUILD_NUM=${env.BUILD_ID} centos7_cuda8_in_docker - """ - } - stash includes: 'dist/**/*', name: 'ppc64le-centos7-cuda8.0' - } - } - stage('Build on ppc64le-centos7-cuda9.0') { - agent { - label "ibm-power" - } - steps { - dumpInfo 'ppc64le-centos7-cuda9 Build Info' - script { - sh """ - make mrproper_in_docker - make BRANCH_NAME=${env.BRANCH_NAME} BUILD_NUM=${env.BUILD_ID} centos7_cuda9_in_docker - """ - } - stash includes: 'dist/**/*', name: 'ppc64le-centos7-cuda9.0' - } - } - } - } - - stage('Publish centos7 snapshot to S3') { - when { - branch 'master' - } - agent { - label "linux && docker && !micro" - } - steps { - sh "rm -rf dist" - unstash 'x86_64-centos7-cuda8.0' - unstash 'x86_64-centos7-cuda9.0' - unstash 'ppc64le-centos7-cuda8.0' - unstash 'ppc64le-centos7-cuda9.0' - sh 'echo "Stashed files:" && find dist' - script { - docker.withRegistry("https://docker.h2o.ai", "docker.h2o.ai") { - docker.image('s3cmd').inside { - def versionText = utilsLib.getCommandOutput("cat dist/x86_64-centos7-cuda8.0/VERSION.txt") - s3up { - localArtifact = 'dist/*' - artifactId = "h2o4gpu" - version = versionText - keepPrivate = false - } - } - } - } - } - } - } -} - diff --git a/Jenkinsfile-nccl-cuda8.base b/Jenkinsfile-nccl-cuda8.base deleted file mode 100644 index 876992844..000000000 --- a/Jenkinsfile-nccl-cuda8.base +++ /dev/null @@ -1,25 +0,0 @@ -// TOOD: rename to @Library('h2o-jenkins-pipeline-lib') _ -@Library('test-shared-library') _ - -import ai.h2o.ci.Utils -import static ai.h2o.ci.Utils.banner -def utilsLib = new Utils() -import ai.h2o.ci.BuildInfo - -def commitMessage = '' -def h2o4gpuUtils = null - -def dist = "dist1" -def BUILDTYPE = "nccl-cuda8" -def cuda = "nvidia/cuda:8.0-cudnn5-devel-ubuntu16.04" -def cudart = "nvidia/cuda:8.0-cudnn5-runtime-ubuntu16.04" -def extratag = "-nccl-cuda8" -def linuxwheel = "linux_whl1" -def testtype = "dotestfast" -def labelbuild = "nvidia-docker && (mr-dl11 || mr-dl16 || mr-dl10)" -def labeltest = "gpu && nvidia-docker && (mr-dl11 || mr-dl16 || mr-dl10)" -def labelruntime = "nvidia-docker" -def doingbenchmark = "0" -def dobenchmark = "0" -def doruntime = "1" - diff --git a/Jenkinsfile-nccl-cuda9-aws1-benchmark b/Jenkinsfile-nccl-cuda9-aws1-benchmark deleted file mode 100644 index ef847b64d..000000000 --- a/Jenkinsfile-nccl-cuda9-aws1-benchmark +++ /dev/null @@ -1,280 +0,0 @@ -#!/usr/bin/groovy - -//################ FILE IS AUTO-GENERATED from .base files -//################ DO NOT MODIFY -//################ See scripts/make_jenkinsfiles.sh - -// TOOD: rename to @Library('h2o-jenkins-pipeline-lib') _ -@Library('test-shared-library') _ - -import ai.h2o.ci.Utils -import static ai.h2o.ci.Utils.banner -def utilsLib = new Utils() -import ai.h2o.ci.BuildInfo - -def commitMessage = '' -def h2o4gpuUtils = null - -def dist = "dist7" -def BUILDTYPE = "nccl-cuda9-aws1-benchmark" -def cuda = "nvidia/cuda:9.0-cudnn7-devel-ubuntu16.04" -def cudart = "nvidia/cuda:9.0-cudnn7-runtime-ubuntu16.04" -def extratag = "-nccl-cuda9-aws1-benchmark" -def linuxwheel = "linux_whl7" -def testtype = "dotestperf" -def labelbuild = "ec2P32xlarge" -def labeltest = "ec2P32xlarge" -def labelruntime = "ec2P32xlarge" -def doingbenchmark = "1" -def dobenchmark = "0" -def doruntime = "0" - -//################ BELOW IS COPY/PASTE of Jenkinsfile.utils2 (except stage names) -// Just Notes: -// -//def jobnums = [0 , 1 , 2 , 3] -//def tags = ["nccl" , "nonccl" , "nccl" , "nonccl"] -//def cudatags = ["cuda8", "cuda8" , "cuda9" , "cuda9"] -//def dobuilds = [1, 0, 0, 0] -//def dofulltests = [1, 0, 0, 0] -//def dopytests = [1, 0, 0, 0] -//def doruntimes = [1, 1, 1, 1] -//def dockerimagesbuild = ["nvidia/cuda:8.0-cudnn5-devel-ubuntu16.04", "nvidia/cuda:8.0-cudnn5-devel-ubuntu16.04", "nvidia/cuda:9.0-cudnn7-devel-ubuntu16.04", "nvidia/cuda:9.0-cudnn7-devel-ubuntu16.04"] -//def dockerimagesruntime = ["nvidia/cuda:8.0-cudnn5-runtime-ubuntu16.04", "nvidia/cuda:8.0-cudnn5-runtime-ubuntu16.04", "nvidia/cuda:9.0-cudnn7-runtime-ubuntu16.04", "nvidia/cuda:9.0-cudnn7-runtime-ubuntu16.04"] -//def dists = ["dist1","dist2","dist3","dist4"] - -def benchmark_commit_trigger - -pipeline { - agent none - - // Setup job options - options { - ansiColor('xterm') - timestamps() - timeout(time: 300, unit: 'MINUTES') - buildDiscarder(logRotator(numToKeepStr: '10')) - disableConcurrentBuilds() - skipDefaultCheckout() - } - - environment { - MAKE_OPTS = "-s CI=1" // -s: silent mode - BUILD_TYPE = "${BUILDTYPE}" - } - - stages { - ///////////////////////////////////////////////////////////////////// - // - // - // Avoid mr-dl8 and mr-dl10 for build for now due to permission denied issue - ///////////////////////////////////////////////////////////////////// - stage("Git clone on Linux nccl-cuda9-aws1-benchmark") { - - agent { - label "${labelbuild}" - } - steps { - dumpInfo 'Linux Build Info' - // Do checkout - retryWithTimeout(200 /* seconds */, 3 /* retries */) { - deleteDir() - checkout([ - $class : 'GitSCM', - branches : scm.branches, - doGenerateSubmoduleConfigurations: false, - extensions : scm.extensions + [[$class: 'SubmoduleOption', disableSubmodules: true, recursiveSubmodules: false, reference: '', trackingSubmodules: false, shallow: true]], - submoduleCfg : [], - userRemoteConfigs : scm.userRemoteConfigs]) - } - script { - h2o4gpuUtils = load "Jenkinsfile.utils" - buildInfo("h2o4gpu", h2o4gpuUtils.isRelease()) - commitMessage = sh(script: 'git log -1 --pretty=%B | tr "\n" " "', returnStdout: true).trim() - echo "Commit Message: ${commitMessage}" - benchmark_commit_trigger = ("${commitMessage}" ==~ /.*trigger_benchmark.*/) - echo "benchmark_commit_trigger: ${benchmark_commit_trigger}" - } - stash includes: "Jenkinsfile*", name: "jenkinsfiles" - } - } - stage("Build Wheel on Linux nccl-cuda9-aws1-benchmark") { - - agent { - label "${labelbuild}" - } - when { - expression { - unstash "jenkinsfiles" - h2o4gpuUtils = load "Jenkinsfile.utils" - return "${doingbenchmark}" == "1" || h2o4gpuUtils.doBuild() || h2o4gpuUtils.doTests() - } - } - steps { - // Do checkout - retryWithTimeout(200 /* seconds */, 3 /* retries */) { - deleteDir() - checkout([ - $class : 'GitSCM', - branches : scm.branches, - doGenerateSubmoduleConfigurations: false, - extensions : scm.extensions + [[$class: 'SubmoduleOption', disableSubmodules: true, recursiveSubmodules: false, reference: '', trackingSubmodules: false, shallow: true]], - submoduleCfg : [], - userRemoteConfigs : scm.userRemoteConfigs]) - } - script { - h2o4gpuUtils = load "Jenkinsfile.utils" - h2o4gpuUtils.buildOnLinux("${cuda}", "${extratag}", "${dist}", "${linuxwheel}") - - buildInfo("h2o4gpu", h2o4gpuUtils.isRelease()) - - script { - // Load the version file content - buildInfo.get().setVersion(utilsLib.getCommandOutput("cat build/VERSION.txt")) - utilsLib.setCurrentBuildName(buildInfo.get().getVersion()) - utilsLib.appendBuildDescription("""|Authors: ${buildInfo.get().getAuthorNames().join(" ")} - |Git SHA: ${buildInfo.get().getGitSha().substring(0, 8)} - |""".stripMargin("|")) - } - - } - } - } - - stage("Test Wheel & Pylint & S3up on Linux nccl-cuda9-aws1-benchmark") { - agent { - label "${labeltest}" - } - when { - expression { - unstash "jenkinsfiles" - h2o4gpuUtils = load "Jenkinsfile.utils" - return "${doingbenchmark}" == "1" || h2o4gpuUtils.doTests() && (h2o4gpuUtils.rerun_disabled(commitMessage) || !h2o4gpuUtils.wasStageSuccessful("Fast Test Wheel & Pylint & S3up on Linux")) - } - } - steps { - dumpInfo 'Linux Test Info' - // Get source code (should put tests into wheel, then wouldn't have to checkout) - retryWithTimeout(200 /* seconds */, 3 /* retries */) { - checkout scm - } - script { - unstash 'version_info' - sh """ - echo "Before Stashed wheel file:" - ls -l src/interface_py/${dist}/ || true - rm -rf src/interface_py/${dist}/ || true - """ - unstash "${linuxwheel}" - sh """ - echo "After Stashed wheel file:" - ls -l src/interface_py/${dist}/ || true - """ - h2o4gpuUtils.runTests(buildInfo.get(), "${cuda}", "${extratag}", "${dist}", "${testtype}") - } - retryWithTimeout(500 /* seconds */, 5 /* retries */) { - withCredentials([[$class: 'AmazonWebServicesCredentialsBinding', credentialsId: "awsArtifactsUploader"]]) { - script { - h2o4gpuUtils.publishToS3(buildInfo.get(), "${extratag}" , "${dist}") - } - } - } - } - } - stage("Build/Publish Runtime Docker Linux nccl-cuda9-aws1-benchmark") { - agent { - label "${labelruntime}" - } - when { - expression { - unstash "jenkinsfiles" - h2o4gpuUtils = load "Jenkinsfile.utils" - return "${doruntime}" == "1" && h2o4gpuUtils.doRuntime() - } - } - steps { - dumpInfo 'Linux Build Info' - // Do checkout - retryWithTimeout(200 /* seconds */, 3 /* retries */) { - deleteDir() - checkout([ - $class : 'GitSCM', - branches : scm.branches, - doGenerateSubmoduleConfigurations: false, - extensions : scm.extensions + [[$class: 'SubmoduleOption', disableSubmodules: true, recursiveSubmodules: false, reference: '', trackingSubmodules: false, shallow: true]], - submoduleCfg : [], - userRemoteConfigs : scm.userRemoteConfigs]) - } - script { - sh """ - echo "Before Stashed wheel file:" - ls -l src/interface_py/${dist}/ || true - rm -rf src/interface_py/${dist}/ || true - """ - unstash "${linuxwheel}" - sh """ - echo "After Stashed wheel file:" - ls -l src/interface_py/${dist}/ || true - """ - unstash 'version_info' - sh 'echo "Stashed version file:" && ls -l build/' - } - script { - h2o4gpuUtils.buildRuntime(buildInfo.get(), "${cudart}", "${dist}", "${extratag}") - } - retryWithTimeout(1000 /* seconds */, 5 /* retries */) { - withCredentials([[$class: 'AmazonWebServicesCredentialsBinding', credentialsId: "awsArtifactsUploader"]]) { - script { - h2o4gpuUtils.publishRuntimeToS3(buildInfo.get(), "${extratag}") - } - } - } - } - } - - stage("Benchmarking Linux nccl-cuda9-aws1-benchmark") { - agent { - label 'master' - } - when { - expression { - unstash "jenkinsfiles" - h2o4gpuUtils = load "Jenkinsfile.utils" - echo "benchmark_commit_trigger: ${benchmark_commit_trigger}" - return "${doingbenchmark}" == "1" || (("${benchmark_commit_trigger}"=="true" || h2o4gpuUtils.doTriggerBenchmarksJob()) && "${dobenchmark}" == "1" && env.BRANCH_NAME == "master") - } - } - steps { - script { - utilsLib.appendBuildDescription("BENCH \u2713") - } - - echo banner("Triggering downstream jobs h2o4gpu${extratag}-benchmark : RUNTIME_ID=${buildInfo.get().getVersion()}") - build job: "/h2o4gpu${extratag}-benchmark/${env.BRANCH_NAME}", parameters: [[$class: 'StringParameterValue', name: 'RUNTIME_ID', value: buildInfo.get().getVersion()]], propagate: false, wait: false, quietPeriod: 60 - } - } - - } // end over stages - post { - failure { - node('linux') { - script { - if(env.BRANCH_NAME == "master") { - emailext( - to: "mateusz@h2o.ai, jmckinney@h2o.ai", - subject: "BUILD FAILED: Job '${env.JOB_NAME} [${env.BUILD_NUMBER}]'", - body: '''${JELLY_SCRIPT, template="html_gmail"}''', - attachLog: true, - compressLog: true, - recipientProviders: [ - [$class: 'DevelopersRecipientProvider'], - ] - ) - } - } - } - } - } -} - - diff --git a/Jenkinsfile-nccl-cuda9-aws1-benchmark.base b/Jenkinsfile-nccl-cuda9-aws1-benchmark.base deleted file mode 100644 index 42e4ef13f..000000000 --- a/Jenkinsfile-nccl-cuda9-aws1-benchmark.base +++ /dev/null @@ -1,25 +0,0 @@ -// TOOD: rename to @Library('h2o-jenkins-pipeline-lib') _ -@Library('test-shared-library') _ - -import ai.h2o.ci.Utils -import static ai.h2o.ci.Utils.banner -def utilsLib = new Utils() -import ai.h2o.ci.BuildInfo - -def commitMessage = '' -def h2o4gpuUtils = null - -def dist = "dist7" -def BUILDTYPE = "nccl-cuda9-aws1-benchmark" -def cuda = "nvidia/cuda:9.0-cudnn7-devel-ubuntu16.04" -def cudart = "nvidia/cuda:9.0-cudnn7-runtime-ubuntu16.04" -def extratag = "-nccl-cuda9-aws1-benchmark" -def linuxwheel = "linux_whl7" -def testtype = "dotestperf" -def labelbuild = "ec2P32xlarge" -def labeltest = "ec2P32xlarge" -def labelruntime = "ec2P32xlarge" -def doingbenchmark = "1" -def dobenchmark = "0" -def doruntime = "0" - diff --git a/Jenkinsfile-nccl-cuda9-aws1.base b/Jenkinsfile-nccl-cuda9-aws1.base deleted file mode 100644 index d7f972359..000000000 --- a/Jenkinsfile-nccl-cuda9-aws1.base +++ /dev/null @@ -1,25 +0,0 @@ -// TOOD: rename to @Library('h2o-jenkins-pipeline-lib') _ -@Library('test-shared-library') _ - -import ai.h2o.ci.Utils -import static ai.h2o.ci.Utils.banner -def utilsLib = new Utils() -import ai.h2o.ci.BuildInfo - -def commitMessage = '' -def h2o4gpuUtils = null - -def dist = "dist5" -def BUILDTYPE = "nccl-cuda9-aws1" -def cuda = "nvidia/cuda:9.0-cudnn7-devel-ubuntu16.04" -def cudart = "nvidia/cuda:9.0-cudnn7-runtime-ubuntu16.04" -def extratag = "-nccl-cuda9-aws1" -def linuxwheel = "linux_whl5" -def testtype = "dotest" -def labelbuild = "ec2P32xlarge" -def labeltest = "ec2P32xlarge" -def labelruntime = "ec2P32xlarge" -def doingbenchmark = "0" -def dobenchmark = "1" -def doruntime = "0" - diff --git a/Jenkinsfile-nccl-cuda9-benchmark b/Jenkinsfile-nccl-cuda9-benchmark deleted file mode 100644 index 3ec99505f..000000000 --- a/Jenkinsfile-nccl-cuda9-benchmark +++ /dev/null @@ -1,280 +0,0 @@ -#!/usr/bin/groovy - -//################ FILE IS AUTO-GENERATED from .base files -//################ DO NOT MODIFY -//################ See scripts/make_jenkinsfiles.sh - -// TOOD: rename to @Library('h2o-jenkins-pipeline-lib') _ -@Library('test-shared-library') _ - -import ai.h2o.ci.Utils -import static ai.h2o.ci.Utils.banner -def utilsLib = new Utils() -import ai.h2o.ci.BuildInfo - -def commitMessage = '' -def h2o4gpuUtils = null - -def dist = "dist6" -def BUILDTYPE = "nccl-cuda9-benchmark" -def cuda = "nvidia/cuda:9.0-cudnn7-devel-ubuntu16.04" -def cudart = "nvidia/cuda:9.0-cudnn7-runtime-ubuntu16.04" -def extratag = "-nccl-cuda9-benchmark" -def linuxwheel = "linux_whl6" -def testtype = "dotestperf" -def labelbuild = "mr-dl3" -def labeltest = "mr-dl3" -def labelruntime = "mr-dl3" -def doingbenchmark = "1" -def dobenchmark = "0" -def doruntime = "0" - -//################ BELOW IS COPY/PASTE of Jenkinsfile.utils2 (except stage names) -// Just Notes: -// -//def jobnums = [0 , 1 , 2 , 3] -//def tags = ["nccl" , "nonccl" , "nccl" , "nonccl"] -//def cudatags = ["cuda8", "cuda8" , "cuda9" , "cuda9"] -//def dobuilds = [1, 0, 0, 0] -//def dofulltests = [1, 0, 0, 0] -//def dopytests = [1, 0, 0, 0] -//def doruntimes = [1, 1, 1, 1] -//def dockerimagesbuild = ["nvidia/cuda:8.0-cudnn5-devel-ubuntu16.04", "nvidia/cuda:8.0-cudnn5-devel-ubuntu16.04", "nvidia/cuda:9.0-cudnn7-devel-ubuntu16.04", "nvidia/cuda:9.0-cudnn7-devel-ubuntu16.04"] -//def dockerimagesruntime = ["nvidia/cuda:8.0-cudnn5-runtime-ubuntu16.04", "nvidia/cuda:8.0-cudnn5-runtime-ubuntu16.04", "nvidia/cuda:9.0-cudnn7-runtime-ubuntu16.04", "nvidia/cuda:9.0-cudnn7-runtime-ubuntu16.04"] -//def dists = ["dist1","dist2","dist3","dist4"] - -def benchmark_commit_trigger - -pipeline { - agent none - - // Setup job options - options { - ansiColor('xterm') - timestamps() - timeout(time: 300, unit: 'MINUTES') - buildDiscarder(logRotator(numToKeepStr: '10')) - disableConcurrentBuilds() - skipDefaultCheckout() - } - - environment { - MAKE_OPTS = "-s CI=1" // -s: silent mode - BUILD_TYPE = "${BUILDTYPE}" - } - - stages { - ///////////////////////////////////////////////////////////////////// - // - // - // Avoid mr-dl8 and mr-dl10 for build for now due to permission denied issue - ///////////////////////////////////////////////////////////////////// - stage("Git clone on Linux nccl-cuda9-benchmark") { - - agent { - label "${labelbuild}" - } - steps { - dumpInfo 'Linux Build Info' - // Do checkout - retryWithTimeout(200 /* seconds */, 3 /* retries */) { - deleteDir() - checkout([ - $class : 'GitSCM', - branches : scm.branches, - doGenerateSubmoduleConfigurations: false, - extensions : scm.extensions + [[$class: 'SubmoduleOption', disableSubmodules: true, recursiveSubmodules: false, reference: '', trackingSubmodules: false, shallow: true]], - submoduleCfg : [], - userRemoteConfigs : scm.userRemoteConfigs]) - } - script { - h2o4gpuUtils = load "Jenkinsfile.utils" - buildInfo("h2o4gpu", h2o4gpuUtils.isRelease()) - commitMessage = sh(script: 'git log -1 --pretty=%B | tr "\n" " "', returnStdout: true).trim() - echo "Commit Message: ${commitMessage}" - benchmark_commit_trigger = ("${commitMessage}" ==~ /.*trigger_benchmark.*/) - echo "benchmark_commit_trigger: ${benchmark_commit_trigger}" - } - stash includes: "Jenkinsfile*", name: "jenkinsfiles" - } - } - stage("Build Wheel on Linux nccl-cuda9-benchmark") { - - agent { - label "${labelbuild}" - } - when { - expression { - unstash "jenkinsfiles" - h2o4gpuUtils = load "Jenkinsfile.utils" - return "${doingbenchmark}" == "1" || h2o4gpuUtils.doBuild() || h2o4gpuUtils.doTests() - } - } - steps { - // Do checkout - retryWithTimeout(200 /* seconds */, 3 /* retries */) { - deleteDir() - checkout([ - $class : 'GitSCM', - branches : scm.branches, - doGenerateSubmoduleConfigurations: false, - extensions : scm.extensions + [[$class: 'SubmoduleOption', disableSubmodules: true, recursiveSubmodules: false, reference: '', trackingSubmodules: false, shallow: true]], - submoduleCfg : [], - userRemoteConfigs : scm.userRemoteConfigs]) - } - script { - h2o4gpuUtils = load "Jenkinsfile.utils" - h2o4gpuUtils.buildOnLinux("${cuda}", "${extratag}", "${dist}", "${linuxwheel}") - - buildInfo("h2o4gpu", h2o4gpuUtils.isRelease()) - - script { - // Load the version file content - buildInfo.get().setVersion(utilsLib.getCommandOutput("cat build/VERSION.txt")) - utilsLib.setCurrentBuildName(buildInfo.get().getVersion()) - utilsLib.appendBuildDescription("""|Authors: ${buildInfo.get().getAuthorNames().join(" ")} - |Git SHA: ${buildInfo.get().getGitSha().substring(0, 8)} - |""".stripMargin("|")) - } - - } - } - } - - stage("Test Wheel & Pylint & S3up on Linux nccl-cuda9-benchmark") { - agent { - label "${labeltest}" - } - when { - expression { - unstash "jenkinsfiles" - h2o4gpuUtils = load "Jenkinsfile.utils" - return "${doingbenchmark}" == "1" || h2o4gpuUtils.doTests() && (h2o4gpuUtils.rerun_disabled(commitMessage) || !h2o4gpuUtils.wasStageSuccessful("Fast Test Wheel & Pylint & S3up on Linux")) - } - } - steps { - dumpInfo 'Linux Test Info' - // Get source code (should put tests into wheel, then wouldn't have to checkout) - retryWithTimeout(200 /* seconds */, 3 /* retries */) { - checkout scm - } - script { - unstash 'version_info' - sh """ - echo "Before Stashed wheel file:" - ls -l src/interface_py/${dist}/ || true - rm -rf src/interface_py/${dist}/ || true - """ - unstash "${linuxwheel}" - sh """ - echo "After Stashed wheel file:" - ls -l src/interface_py/${dist}/ || true - """ - h2o4gpuUtils.runTests(buildInfo.get(), "${cuda}", "${extratag}", "${dist}", "${testtype}") - } - retryWithTimeout(500 /* seconds */, 5 /* retries */) { - withCredentials([[$class: 'AmazonWebServicesCredentialsBinding', credentialsId: "awsArtifactsUploader"]]) { - script { - h2o4gpuUtils.publishToS3(buildInfo.get(), "${extratag}" , "${dist}") - } - } - } - } - } - stage("Build/Publish Runtime Docker Linux nccl-cuda9-benchmark") { - agent { - label "${labelruntime}" - } - when { - expression { - unstash "jenkinsfiles" - h2o4gpuUtils = load "Jenkinsfile.utils" - return "${doruntime}" == "1" && h2o4gpuUtils.doRuntime() - } - } - steps { - dumpInfo 'Linux Build Info' - // Do checkout - retryWithTimeout(200 /* seconds */, 3 /* retries */) { - deleteDir() - checkout([ - $class : 'GitSCM', - branches : scm.branches, - doGenerateSubmoduleConfigurations: false, - extensions : scm.extensions + [[$class: 'SubmoduleOption', disableSubmodules: true, recursiveSubmodules: false, reference: '', trackingSubmodules: false, shallow: true]], - submoduleCfg : [], - userRemoteConfigs : scm.userRemoteConfigs]) - } - script { - sh """ - echo "Before Stashed wheel file:" - ls -l src/interface_py/${dist}/ || true - rm -rf src/interface_py/${dist}/ || true - """ - unstash "${linuxwheel}" - sh """ - echo "After Stashed wheel file:" - ls -l src/interface_py/${dist}/ || true - """ - unstash 'version_info' - sh 'echo "Stashed version file:" && ls -l build/' - } - script { - h2o4gpuUtils.buildRuntime(buildInfo.get(), "${cudart}", "${dist}", "${extratag}") - } - retryWithTimeout(1000 /* seconds */, 5 /* retries */) { - withCredentials([[$class: 'AmazonWebServicesCredentialsBinding', credentialsId: "awsArtifactsUploader"]]) { - script { - h2o4gpuUtils.publishRuntimeToS3(buildInfo.get(), "${extratag}") - } - } - } - } - } - - stage("Benchmarking Linux nccl-cuda9-benchmark") { - agent { - label 'master' - } - when { - expression { - unstash "jenkinsfiles" - h2o4gpuUtils = load "Jenkinsfile.utils" - echo "benchmark_commit_trigger: ${benchmark_commit_trigger}" - return "${doingbenchmark}" == "1" || (("${benchmark_commit_trigger}"=="true" || h2o4gpuUtils.doTriggerBenchmarksJob()) && "${dobenchmark}" == "1" && env.BRANCH_NAME == "master") - } - } - steps { - script { - utilsLib.appendBuildDescription("BENCH \u2713") - } - - echo banner("Triggering downstream jobs h2o4gpu${extratag}-benchmark : RUNTIME_ID=${buildInfo.get().getVersion()}") - build job: "/h2o4gpu${extratag}-benchmark/${env.BRANCH_NAME}", parameters: [[$class: 'StringParameterValue', name: 'RUNTIME_ID', value: buildInfo.get().getVersion()]], propagate: false, wait: false, quietPeriod: 60 - } - } - - } // end over stages - post { - failure { - node('linux') { - script { - if(env.BRANCH_NAME == "master") { - emailext( - to: "mateusz@h2o.ai, jmckinney@h2o.ai", - subject: "BUILD FAILED: Job '${env.JOB_NAME} [${env.BUILD_NUMBER}]'", - body: '''${JELLY_SCRIPT, template="html_gmail"}''', - attachLog: true, - compressLog: true, - recipientProviders: [ - [$class: 'DevelopersRecipientProvider'], - ] - ) - } - } - } - } - } -} - - diff --git a/Jenkinsfile-nccl-cuda9-benchmark.base b/Jenkinsfile-nccl-cuda9-benchmark.base deleted file mode 100644 index ffdb6debd..000000000 --- a/Jenkinsfile-nccl-cuda9-benchmark.base +++ /dev/null @@ -1,25 +0,0 @@ -// TOOD: rename to @Library('h2o-jenkins-pipeline-lib') _ -@Library('test-shared-library') _ - -import ai.h2o.ci.Utils -import static ai.h2o.ci.Utils.banner -def utilsLib = new Utils() -import ai.h2o.ci.BuildInfo - -def commitMessage = '' -def h2o4gpuUtils = null - -def dist = "dist6" -def BUILDTYPE = "nccl-cuda9-benchmark" -def cuda = "nvidia/cuda:9.0-cudnn7-devel-ubuntu16.04" -def cudart = "nvidia/cuda:9.0-cudnn7-runtime-ubuntu16.04" -def extratag = "-nccl-cuda9-benchmark" -def linuxwheel = "linux_whl6" -def testtype = "dotestperf" -def labelbuild = "mr-dl3" -def labeltest = "mr-dl3" -def labelruntime = "mr-dl3" -def doingbenchmark = "1" -def dobenchmark = "0" -def doruntime = "0" - diff --git a/Jenkinsfile-nccl-cuda9.base b/Jenkinsfile-nccl-cuda9.base deleted file mode 100644 index 10cf20cfd..000000000 --- a/Jenkinsfile-nccl-cuda9.base +++ /dev/null @@ -1,25 +0,0 @@ -// TOOD: rename to @Library('h2o-jenkins-pipeline-lib') _ -@Library('test-shared-library') _ - -import ai.h2o.ci.Utils -import static ai.h2o.ci.Utils.banner -def utilsLib = new Utils() -import ai.h2o.ci.BuildInfo - -def commitMessage = '' -def h2o4gpuUtils = null - -def dist = "dist4" -def BUILDTYPE = "nccl-cuda9" -def cuda = "nvidia/cuda:9.0-cudnn7-devel-ubuntu16.04" -def cudart = "nvidia/cuda:9.0-cudnn7-runtime-ubuntu16.04" -def extratag = "-nccl-cuda9" -def linuxwheel = "linux_whl4" -def testtype = "dotest" -def labelbuild = "nvidia-docker && (mr-dl11 || mr-dl16 || mr-dl10)" -def labeltest = "gpu && nvidia-docker && (mr-dl11 || mr-dl16 || mr-dl10)" -def labelruntime = "nvidia-docker" -def doingbenchmark = "0" -def dobenchmark = "1" -def doruntime = "1" - diff --git a/Jenkinsfile-nonccl-cuda8 b/Jenkinsfile-nonccl-cuda8 deleted file mode 100644 index 071e6c702..000000000 --- a/Jenkinsfile-nonccl-cuda8 +++ /dev/null @@ -1,280 +0,0 @@ -#!/usr/bin/groovy - -//################ FILE IS AUTO-GENERATED from .base files -//################ DO NOT MODIFY -//################ See scripts/make_jenkinsfiles.sh - -// TOOD: rename to @Library('h2o-jenkins-pipeline-lib') _ -@Library('test-shared-library') _ - -import ai.h2o.ci.Utils -import static ai.h2o.ci.Utils.banner -def utilsLib = new Utils() -import ai.h2o.ci.BuildInfo - -def commitMessage = '' -def h2o4gpuUtils = null - -def dist = "dist2" -def BUILDTYPE = "nonccl-cuda8" -def cuda = "nvidia/cuda:8.0-cudnn5-devel-ubuntu16.04" -def cudart = "nvidia/cuda:8.0-cudnn5-runtime-ubuntu16.04" -def extratag = "-nonccl-cuda8" -def linuxwheel = "linux_whl2" -def testtype = "dotestfast_nonccl" -def labelbuild = "nvidia-docker && (mr-dl11 || mr-dl16 || mr-dl10)" -def labeltest = "gpu && nvidia-docker && (mr-dl11 || mr-dl16 || mr-dl10)" -def labelruntime = "nvidia-docker" -def doingbenchmark = "0" -def dobenchmark = "0" -def doruntime = "1" - -//################ BELOW IS COPY/PASTE of Jenkinsfile.utils2 (except stage names) -// Just Notes: -// -//def jobnums = [0 , 1 , 2 , 3] -//def tags = ["nccl" , "nonccl" , "nccl" , "nonccl"] -//def cudatags = ["cuda8", "cuda8" , "cuda9" , "cuda9"] -//def dobuilds = [1, 0, 0, 0] -//def dofulltests = [1, 0, 0, 0] -//def dopytests = [1, 0, 0, 0] -//def doruntimes = [1, 1, 1, 1] -//def dockerimagesbuild = ["nvidia/cuda:8.0-cudnn5-devel-ubuntu16.04", "nvidia/cuda:8.0-cudnn5-devel-ubuntu16.04", "nvidia/cuda:9.0-cudnn7-devel-ubuntu16.04", "nvidia/cuda:9.0-cudnn7-devel-ubuntu16.04"] -//def dockerimagesruntime = ["nvidia/cuda:8.0-cudnn5-runtime-ubuntu16.04", "nvidia/cuda:8.0-cudnn5-runtime-ubuntu16.04", "nvidia/cuda:9.0-cudnn7-runtime-ubuntu16.04", "nvidia/cuda:9.0-cudnn7-runtime-ubuntu16.04"] -//def dists = ["dist1","dist2","dist3","dist4"] - -def benchmark_commit_trigger - -pipeline { - agent none - - // Setup job options - options { - ansiColor('xterm') - timestamps() - timeout(time: 300, unit: 'MINUTES') - buildDiscarder(logRotator(numToKeepStr: '10')) - disableConcurrentBuilds() - skipDefaultCheckout() - } - - environment { - MAKE_OPTS = "-s CI=1" // -s: silent mode - BUILD_TYPE = "${BUILDTYPE}" - } - - stages { - ///////////////////////////////////////////////////////////////////// - // - // - // Avoid mr-dl8 and mr-dl10 for build for now due to permission denied issue - ///////////////////////////////////////////////////////////////////// - stage("Git clone on Linux nonccl-cuda8") { - - agent { - label "${labelbuild}" - } - steps { - dumpInfo 'Linux Build Info' - // Do checkout - retryWithTimeout(200 /* seconds */, 3 /* retries */) { - deleteDir() - checkout([ - $class : 'GitSCM', - branches : scm.branches, - doGenerateSubmoduleConfigurations: false, - extensions : scm.extensions + [[$class: 'SubmoduleOption', disableSubmodules: true, recursiveSubmodules: false, reference: '', trackingSubmodules: false, shallow: true]], - submoduleCfg : [], - userRemoteConfigs : scm.userRemoteConfigs]) - } - script { - h2o4gpuUtils = load "Jenkinsfile.utils" - buildInfo("h2o4gpu", h2o4gpuUtils.isRelease()) - commitMessage = sh(script: 'git log -1 --pretty=%B | tr "\n" " "', returnStdout: true).trim() - echo "Commit Message: ${commitMessage}" - benchmark_commit_trigger = ("${commitMessage}" ==~ /.*trigger_benchmark.*/) - echo "benchmark_commit_trigger: ${benchmark_commit_trigger}" - } - stash includes: "Jenkinsfile*", name: "jenkinsfiles" - } - } - stage("Build Wheel on Linux nonccl-cuda8") { - - agent { - label "${labelbuild}" - } - when { - expression { - unstash "jenkinsfiles" - h2o4gpuUtils = load "Jenkinsfile.utils" - return "${doingbenchmark}" == "1" || h2o4gpuUtils.doBuild() || h2o4gpuUtils.doTests() - } - } - steps { - // Do checkout - retryWithTimeout(200 /* seconds */, 3 /* retries */) { - deleteDir() - checkout([ - $class : 'GitSCM', - branches : scm.branches, - doGenerateSubmoduleConfigurations: false, - extensions : scm.extensions + [[$class: 'SubmoduleOption', disableSubmodules: true, recursiveSubmodules: false, reference: '', trackingSubmodules: false, shallow: true]], - submoduleCfg : [], - userRemoteConfigs : scm.userRemoteConfigs]) - } - script { - h2o4gpuUtils = load "Jenkinsfile.utils" - h2o4gpuUtils.buildOnLinux("${cuda}", "${extratag}", "${dist}", "${linuxwheel}") - - buildInfo("h2o4gpu", h2o4gpuUtils.isRelease()) - - script { - // Load the version file content - buildInfo.get().setVersion(utilsLib.getCommandOutput("cat build/VERSION.txt")) - utilsLib.setCurrentBuildName(buildInfo.get().getVersion()) - utilsLib.appendBuildDescription("""|Authors: ${buildInfo.get().getAuthorNames().join(" ")} - |Git SHA: ${buildInfo.get().getGitSha().substring(0, 8)} - |""".stripMargin("|")) - } - - } - } - } - - stage("Test Wheel & Pylint & S3up on Linux nonccl-cuda8") { - agent { - label "${labeltest}" - } - when { - expression { - unstash "jenkinsfiles" - h2o4gpuUtils = load "Jenkinsfile.utils" - return "${doingbenchmark}" == "1" || h2o4gpuUtils.doTests() && (h2o4gpuUtils.rerun_disabled(commitMessage) || !h2o4gpuUtils.wasStageSuccessful("Fast Test Wheel & Pylint & S3up on Linux")) - } - } - steps { - dumpInfo 'Linux Test Info' - // Get source code (should put tests into wheel, then wouldn't have to checkout) - retryWithTimeout(200 /* seconds */, 3 /* retries */) { - checkout scm - } - script { - unstash 'version_info' - sh """ - echo "Before Stashed wheel file:" - ls -l src/interface_py/${dist}/ || true - rm -rf src/interface_py/${dist}/ || true - """ - unstash "${linuxwheel}" - sh """ - echo "After Stashed wheel file:" - ls -l src/interface_py/${dist}/ || true - """ - h2o4gpuUtils.runTests(buildInfo.get(), "${cuda}", "${extratag}", "${dist}", "${testtype}") - } - retryWithTimeout(500 /* seconds */, 5 /* retries */) { - withCredentials([[$class: 'AmazonWebServicesCredentialsBinding', credentialsId: "awsArtifactsUploader"]]) { - script { - h2o4gpuUtils.publishToS3(buildInfo.get(), "${extratag}" , "${dist}") - } - } - } - } - } - stage("Build/Publish Runtime Docker Linux nonccl-cuda8") { - agent { - label "${labelruntime}" - } - when { - expression { - unstash "jenkinsfiles" - h2o4gpuUtils = load "Jenkinsfile.utils" - return "${doruntime}" == "1" && h2o4gpuUtils.doRuntime() - } - } - steps { - dumpInfo 'Linux Build Info' - // Do checkout - retryWithTimeout(200 /* seconds */, 3 /* retries */) { - deleteDir() - checkout([ - $class : 'GitSCM', - branches : scm.branches, - doGenerateSubmoduleConfigurations: false, - extensions : scm.extensions + [[$class: 'SubmoduleOption', disableSubmodules: true, recursiveSubmodules: false, reference: '', trackingSubmodules: false, shallow: true]], - submoduleCfg : [], - userRemoteConfigs : scm.userRemoteConfigs]) - } - script { - sh """ - echo "Before Stashed wheel file:" - ls -l src/interface_py/${dist}/ || true - rm -rf src/interface_py/${dist}/ || true - """ - unstash "${linuxwheel}" - sh """ - echo "After Stashed wheel file:" - ls -l src/interface_py/${dist}/ || true - """ - unstash 'version_info' - sh 'echo "Stashed version file:" && ls -l build/' - } - script { - h2o4gpuUtils.buildRuntime(buildInfo.get(), "${cudart}", "${dist}", "${extratag}") - } - retryWithTimeout(1000 /* seconds */, 5 /* retries */) { - withCredentials([[$class: 'AmazonWebServicesCredentialsBinding', credentialsId: "awsArtifactsUploader"]]) { - script { - h2o4gpuUtils.publishRuntimeToS3(buildInfo.get(), "${extratag}") - } - } - } - } - } - - stage("Benchmarking Linux nonccl-cuda8") { - agent { - label 'master' - } - when { - expression { - unstash "jenkinsfiles" - h2o4gpuUtils = load "Jenkinsfile.utils" - echo "benchmark_commit_trigger: ${benchmark_commit_trigger}" - return "${doingbenchmark}" == "1" || (("${benchmark_commit_trigger}"=="true" || h2o4gpuUtils.doTriggerBenchmarksJob()) && "${dobenchmark}" == "1" && env.BRANCH_NAME == "master") - } - } - steps { - script { - utilsLib.appendBuildDescription("BENCH \u2713") - } - - echo banner("Triggering downstream jobs h2o4gpu${extratag}-benchmark : RUNTIME_ID=${buildInfo.get().getVersion()}") - build job: "/h2o4gpu${extratag}-benchmark/${env.BRANCH_NAME}", parameters: [[$class: 'StringParameterValue', name: 'RUNTIME_ID', value: buildInfo.get().getVersion()]], propagate: false, wait: false, quietPeriod: 60 - } - } - - } // end over stages - post { - failure { - node('linux') { - script { - if(env.BRANCH_NAME == "master") { - emailext( - to: "mateusz@h2o.ai, jmckinney@h2o.ai", - subject: "BUILD FAILED: Job '${env.JOB_NAME} [${env.BUILD_NUMBER}]'", - body: '''${JELLY_SCRIPT, template="html_gmail"}''', - attachLog: true, - compressLog: true, - recipientProviders: [ - [$class: 'DevelopersRecipientProvider'], - ] - ) - } - } - } - } - } -} - - diff --git a/Jenkinsfile-nonccl-cuda8.base b/Jenkinsfile-nonccl-cuda8.base deleted file mode 100644 index 979219e5f..000000000 --- a/Jenkinsfile-nonccl-cuda8.base +++ /dev/null @@ -1,25 +0,0 @@ -// TOOD: rename to @Library('h2o-jenkins-pipeline-lib') _ -@Library('test-shared-library') _ - -import ai.h2o.ci.Utils -import static ai.h2o.ci.Utils.banner -def utilsLib = new Utils() -import ai.h2o.ci.BuildInfo - -def commitMessage = '' -def h2o4gpuUtils = null - -def dist = "dist2" -def BUILDTYPE = "nonccl-cuda8" -def cuda = "nvidia/cuda:8.0-cudnn5-devel-ubuntu16.04" -def cudart = "nvidia/cuda:8.0-cudnn5-runtime-ubuntu16.04" -def extratag = "-nonccl-cuda8" -def linuxwheel = "linux_whl2" -def testtype = "dotestfast_nonccl" -def labelbuild = "nvidia-docker && (mr-dl11 || mr-dl16 || mr-dl10)" -def labeltest = "gpu && nvidia-docker && (mr-dl11 || mr-dl16 || mr-dl10)" -def labelruntime = "nvidia-docker" -def doingbenchmark = "0" -def dobenchmark = "0" -def doruntime = "1" - diff --git a/Jenkinsfile-nonccl-cuda9 b/Jenkinsfile-nonccl-cuda9 deleted file mode 100644 index 951f66878..000000000 --- a/Jenkinsfile-nonccl-cuda9 +++ /dev/null @@ -1,280 +0,0 @@ -#!/usr/bin/groovy - -//################ FILE IS AUTO-GENERATED from .base files -//################ DO NOT MODIFY -//################ See scripts/make_jenkinsfiles.sh - -// TOOD: rename to @Library('h2o-jenkins-pipeline-lib') _ -@Library('test-shared-library') _ - -import ai.h2o.ci.Utils -import static ai.h2o.ci.Utils.banner -def utilsLib = new Utils() -import ai.h2o.ci.BuildInfo - -def commitMessage = '' -def h2o4gpuUtils = null - -def dist = "dist3" -def BUILDTYPE = "nonccl-cuda9" -def cuda = "nvidia/cuda:9.0-cudnn7-devel-ubuntu16.04" -def cudart = "nvidia/cuda:9.0-cudnn7-runtime-ubuntu16.04" -def extratag = "-nonccl-cuda9" -def linuxwheel = "linux_whl3" -def testtype = "dotestfast_nonccl" -def labelbuild = "nvidia-docker && (mr-dl11 || mr-dl16 || mr-dl10)" -def labeltest = "gpu && nvidia-docker && (mr-dl11 || mr-dl16 || mr-dl10)" -def labelruntime = "nvidia-docker" -def doingbenchmark = "0" -def dobenchmark = "0" -def doruntime = "1" - -//################ BELOW IS COPY/PASTE of Jenkinsfile.utils2 (except stage names) -// Just Notes: -// -//def jobnums = [0 , 1 , 2 , 3] -//def tags = ["nccl" , "nonccl" , "nccl" , "nonccl"] -//def cudatags = ["cuda8", "cuda8" , "cuda9" , "cuda9"] -//def dobuilds = [1, 0, 0, 0] -//def dofulltests = [1, 0, 0, 0] -//def dopytests = [1, 0, 0, 0] -//def doruntimes = [1, 1, 1, 1] -//def dockerimagesbuild = ["nvidia/cuda:8.0-cudnn5-devel-ubuntu16.04", "nvidia/cuda:8.0-cudnn5-devel-ubuntu16.04", "nvidia/cuda:9.0-cudnn7-devel-ubuntu16.04", "nvidia/cuda:9.0-cudnn7-devel-ubuntu16.04"] -//def dockerimagesruntime = ["nvidia/cuda:8.0-cudnn5-runtime-ubuntu16.04", "nvidia/cuda:8.0-cudnn5-runtime-ubuntu16.04", "nvidia/cuda:9.0-cudnn7-runtime-ubuntu16.04", "nvidia/cuda:9.0-cudnn7-runtime-ubuntu16.04"] -//def dists = ["dist1","dist2","dist3","dist4"] - -def benchmark_commit_trigger - -pipeline { - agent none - - // Setup job options - options { - ansiColor('xterm') - timestamps() - timeout(time: 300, unit: 'MINUTES') - buildDiscarder(logRotator(numToKeepStr: '10')) - disableConcurrentBuilds() - skipDefaultCheckout() - } - - environment { - MAKE_OPTS = "-s CI=1" // -s: silent mode - BUILD_TYPE = "${BUILDTYPE}" - } - - stages { - ///////////////////////////////////////////////////////////////////// - // - // - // Avoid mr-dl8 and mr-dl10 for build for now due to permission denied issue - ///////////////////////////////////////////////////////////////////// - stage("Git clone on Linux nonccl-cuda9") { - - agent { - label "${labelbuild}" - } - steps { - dumpInfo 'Linux Build Info' - // Do checkout - retryWithTimeout(200 /* seconds */, 3 /* retries */) { - deleteDir() - checkout([ - $class : 'GitSCM', - branches : scm.branches, - doGenerateSubmoduleConfigurations: false, - extensions : scm.extensions + [[$class: 'SubmoduleOption', disableSubmodules: true, recursiveSubmodules: false, reference: '', trackingSubmodules: false, shallow: true]], - submoduleCfg : [], - userRemoteConfigs : scm.userRemoteConfigs]) - } - script { - h2o4gpuUtils = load "Jenkinsfile.utils" - buildInfo("h2o4gpu", h2o4gpuUtils.isRelease()) - commitMessage = sh(script: 'git log -1 --pretty=%B | tr "\n" " "', returnStdout: true).trim() - echo "Commit Message: ${commitMessage}" - benchmark_commit_trigger = ("${commitMessage}" ==~ /.*trigger_benchmark.*/) - echo "benchmark_commit_trigger: ${benchmark_commit_trigger}" - } - stash includes: "Jenkinsfile*", name: "jenkinsfiles" - } - } - stage("Build Wheel on Linux nonccl-cuda9") { - - agent { - label "${labelbuild}" - } - when { - expression { - unstash "jenkinsfiles" - h2o4gpuUtils = load "Jenkinsfile.utils" - return "${doingbenchmark}" == "1" || h2o4gpuUtils.doBuild() || h2o4gpuUtils.doTests() - } - } - steps { - // Do checkout - retryWithTimeout(200 /* seconds */, 3 /* retries */) { - deleteDir() - checkout([ - $class : 'GitSCM', - branches : scm.branches, - doGenerateSubmoduleConfigurations: false, - extensions : scm.extensions + [[$class: 'SubmoduleOption', disableSubmodules: true, recursiveSubmodules: false, reference: '', trackingSubmodules: false, shallow: true]], - submoduleCfg : [], - userRemoteConfigs : scm.userRemoteConfigs]) - } - script { - h2o4gpuUtils = load "Jenkinsfile.utils" - h2o4gpuUtils.buildOnLinux("${cuda}", "${extratag}", "${dist}", "${linuxwheel}") - - buildInfo("h2o4gpu", h2o4gpuUtils.isRelease()) - - script { - // Load the version file content - buildInfo.get().setVersion(utilsLib.getCommandOutput("cat build/VERSION.txt")) - utilsLib.setCurrentBuildName(buildInfo.get().getVersion()) - utilsLib.appendBuildDescription("""|Authors: ${buildInfo.get().getAuthorNames().join(" ")} - |Git SHA: ${buildInfo.get().getGitSha().substring(0, 8)} - |""".stripMargin("|")) - } - - } - } - } - - stage("Test Wheel & Pylint & S3up on Linux nonccl-cuda9") { - agent { - label "${labeltest}" - } - when { - expression { - unstash "jenkinsfiles" - h2o4gpuUtils = load "Jenkinsfile.utils" - return "${doingbenchmark}" == "1" || h2o4gpuUtils.doTests() && (h2o4gpuUtils.rerun_disabled(commitMessage) || !h2o4gpuUtils.wasStageSuccessful("Fast Test Wheel & Pylint & S3up on Linux")) - } - } - steps { - dumpInfo 'Linux Test Info' - // Get source code (should put tests into wheel, then wouldn't have to checkout) - retryWithTimeout(200 /* seconds */, 3 /* retries */) { - checkout scm - } - script { - unstash 'version_info' - sh """ - echo "Before Stashed wheel file:" - ls -l src/interface_py/${dist}/ || true - rm -rf src/interface_py/${dist}/ || true - """ - unstash "${linuxwheel}" - sh """ - echo "After Stashed wheel file:" - ls -l src/interface_py/${dist}/ || true - """ - h2o4gpuUtils.runTests(buildInfo.get(), "${cuda}", "${extratag}", "${dist}", "${testtype}") - } - retryWithTimeout(500 /* seconds */, 5 /* retries */) { - withCredentials([[$class: 'AmazonWebServicesCredentialsBinding', credentialsId: "awsArtifactsUploader"]]) { - script { - h2o4gpuUtils.publishToS3(buildInfo.get(), "${extratag}" , "${dist}") - } - } - } - } - } - stage("Build/Publish Runtime Docker Linux nonccl-cuda9") { - agent { - label "${labelruntime}" - } - when { - expression { - unstash "jenkinsfiles" - h2o4gpuUtils = load "Jenkinsfile.utils" - return "${doruntime}" == "1" && h2o4gpuUtils.doRuntime() - } - } - steps { - dumpInfo 'Linux Build Info' - // Do checkout - retryWithTimeout(200 /* seconds */, 3 /* retries */) { - deleteDir() - checkout([ - $class : 'GitSCM', - branches : scm.branches, - doGenerateSubmoduleConfigurations: false, - extensions : scm.extensions + [[$class: 'SubmoduleOption', disableSubmodules: true, recursiveSubmodules: false, reference: '', trackingSubmodules: false, shallow: true]], - submoduleCfg : [], - userRemoteConfigs : scm.userRemoteConfigs]) - } - script { - sh """ - echo "Before Stashed wheel file:" - ls -l src/interface_py/${dist}/ || true - rm -rf src/interface_py/${dist}/ || true - """ - unstash "${linuxwheel}" - sh """ - echo "After Stashed wheel file:" - ls -l src/interface_py/${dist}/ || true - """ - unstash 'version_info' - sh 'echo "Stashed version file:" && ls -l build/' - } - script { - h2o4gpuUtils.buildRuntime(buildInfo.get(), "${cudart}", "${dist}", "${extratag}") - } - retryWithTimeout(1000 /* seconds */, 5 /* retries */) { - withCredentials([[$class: 'AmazonWebServicesCredentialsBinding', credentialsId: "awsArtifactsUploader"]]) { - script { - h2o4gpuUtils.publishRuntimeToS3(buildInfo.get(), "${extratag}") - } - } - } - } - } - - stage("Benchmarking Linux nonccl-cuda9") { - agent { - label 'master' - } - when { - expression { - unstash "jenkinsfiles" - h2o4gpuUtils = load "Jenkinsfile.utils" - echo "benchmark_commit_trigger: ${benchmark_commit_trigger}" - return "${doingbenchmark}" == "1" || (("${benchmark_commit_trigger}"=="true" || h2o4gpuUtils.doTriggerBenchmarksJob()) && "${dobenchmark}" == "1" && env.BRANCH_NAME == "master") - } - } - steps { - script { - utilsLib.appendBuildDescription("BENCH \u2713") - } - - echo banner("Triggering downstream jobs h2o4gpu${extratag}-benchmark : RUNTIME_ID=${buildInfo.get().getVersion()}") - build job: "/h2o4gpu${extratag}-benchmark/${env.BRANCH_NAME}", parameters: [[$class: 'StringParameterValue', name: 'RUNTIME_ID', value: buildInfo.get().getVersion()]], propagate: false, wait: false, quietPeriod: 60 - } - } - - } // end over stages - post { - failure { - node('linux') { - script { - if(env.BRANCH_NAME == "master") { - emailext( - to: "mateusz@h2o.ai, jmckinney@h2o.ai", - subject: "BUILD FAILED: Job '${env.JOB_NAME} [${env.BUILD_NUMBER}]'", - body: '''${JELLY_SCRIPT, template="html_gmail"}''', - attachLog: true, - compressLog: true, - recipientProviders: [ - [$class: 'DevelopersRecipientProvider'], - ] - ) - } - } - } - } - } -} - - diff --git a/Jenkinsfile-nonccl-cuda9.base b/Jenkinsfile-nonccl-cuda9.base deleted file mode 100644 index 4679fdea5..000000000 --- a/Jenkinsfile-nonccl-cuda9.base +++ /dev/null @@ -1,25 +0,0 @@ -// TOOD: rename to @Library('h2o-jenkins-pipeline-lib') _ -@Library('test-shared-library') _ - -import ai.h2o.ci.Utils -import static ai.h2o.ci.Utils.banner -def utilsLib = new Utils() -import ai.h2o.ci.BuildInfo - -def commitMessage = '' -def h2o4gpuUtils = null - -def dist = "dist3" -def BUILDTYPE = "nonccl-cuda9" -def cuda = "nvidia/cuda:9.0-cudnn7-devel-ubuntu16.04" -def cudart = "nvidia/cuda:9.0-cudnn7-runtime-ubuntu16.04" -def extratag = "-nonccl-cuda9" -def linuxwheel = "linux_whl3" -def testtype = "dotestfast_nonccl" -def labelbuild = "nvidia-docker && (mr-dl11 || mr-dl16 || mr-dl10)" -def labeltest = "gpu && nvidia-docker && (mr-dl11 || mr-dl16 || mr-dl10)" -def labelruntime = "nvidia-docker" -def doingbenchmark = "0" -def dobenchmark = "0" -def doruntime = "1" - diff --git a/Makefile b/Makefile index 4421fe048..e29a056ea 100644 --- a/Makefile +++ b/Makefile @@ -1,85 +1,16 @@ -include Makefile_header.mk -location = $(CURDIR)/$(word $(words $(MAKEFILE_LIST)),$(MAKEFILE_LIST)) -WHERE := $(location) -$(info ** -> $(WHERE)) -$(info ** ------------------------------------------------------------------ **) -NVCC := $(shell command -v nvcc 2> /dev/null) - -SHELL := /bin/bash # force avoidance of dash as shell -# -# Build specific config -# -CONFIG=make/config.mk -include $(CONFIG) - -VERSION=make/version.mk -include $(VERSION) - -MAJOR_MINOR=$(shell echo $(BASE_VERSION) | sed 's/.*\(^[0-9][0-9]*\.[0-9][0-9]*\).*/\1/g' ) - -# System specific stuff -include src/config2.mk - -ifdef NVCC -ifeq ($(shell test $(CUDA_MAJOR) -ge 9; echo $$?),0) - $(warning Compiling with Cuda9 or higher) - XGB_CUDA ?= -DGPU_COMPUTE_VER="35;52;60;61;70" -else - $(warning Compiling with Cuda8 or lower) - # >=52 required for kmeans for larger data of size rows/32>2^16 - XGB_CUDA ?= -DGPU_COMPUTE_VER="35;52;60;61" -endif -endif - -# Location of local directory with dependencies -DEPS_DIR = deps - -# Detect OS -OS := $(shell uname) -## Python has crazy ideas about os names -ifeq ($(OS), Darwin) - PY_OS ?= "macosx" -else - PY_OS ?= $(OS) -endif - -# see if have ccache for faster compile times if no changes to file -theccache=$(shell echo `which ccache`) -ifeq ($(theccache),) - theccacheclean= -else - theccacheclean=$(theccache) -C -endif - -RANDOM := $(shell bash -c 'echo $$RANDOM') -LOGEXT=$(RANDOM)$(shell date +'_%Y.%m.%d-%H:%M:%S') -NUMPROCS := $(shell cat /proc/cpuinfo|grep processor|wc -l) - -# -# Docker image tagging -# -DOCKER_VERSION_TAG ?= "latest" - -# -# Setup S3 access credentials -# -S3_CMD_LINE := aws s3 - -# -# BUILD_INFO setup -# -H2O4GPU_COMMIT ?= $(shell git rev-parse HEAD) -H2O4GPU_BUILD_DATE := $(shell date) -H2O4GPU_BUILD ?= "LOCAL BUILD @ $(shell git rev-parse --short HEAD) build at $(H2O4GPU_BUILD_DATE)" -H2O4GPU_SUFFIX ?= "+local_$(shell git describe --always --dirty)" +include make/Makefile_header.mk +include make/version.mk +include make/config.mk help: + $(call inform, " -------- Test data sync ---------") + $(call inform, "make sync_open_data Downloads the test data.") + $(call inform, "make sync_small_data Downloads the small test data.") $(call inform, " -------- Build and Install ---------") $(call inform, "make clean Clean all build files.") $(call inform, "make fullinstall") $(call inform, "make fullinstall Clean everything then compile and install everything (for cuda9 with nccl in xgboost).") - $(call inform, "make cpu-fullinstall Clean everything then compile and isntall everything only with CPU") $(call inform, "make build Just Build the whole project.") $(call inform, " -------- Test ---------") $(call inform, "make test Run tests.") @@ -97,6 +28,12 @@ help: $(call inform, "Example Pycharm environment flags: PYTHONPATH=/home/jon/h2o4gpu/src/interface_py:/home/jon/h2o4gpu;PYTHONUNBUFFERED=1;LD_LIBRARY_PATH=/opt/clang+llvm-4.0.0-x86_64-linux-gnu-ubuntu-16.04//lib/:/home/jon/lib:/opt/rstudio-1.0.136/bin/:/usr/local/cuda/lib64:/usr/local/cuda/extras/CUPTI/lib64::/home/jon/lib/:$LD_LIBRARY_PATH;LLVM4=/opt/clang+llvm-4.0.0-x86_64-linux-gnu-ubuntu-16.04/") $(call inform, "Example Pycharm working directory: /home/jon/h2o4gpu/") +default: fullinstall + +######################################### +# DATA TARGETS +######################################### + sync_small_data: @echo "---- Synchronizing test data ----" mkdir -p $(DATA_DIR) @@ -111,8 +48,31 @@ sync_open_data: mkdir -p $(OPEN_DATA_DIR) $(S3_CMD_LINE) sync --no-sign-request "$(OPEN_DATA_BUCKET)" "$(OPEN_DATA_DIR)" -default: fullinstall +######################################### +# DEPENDENCY MANAGEMENT TARGETS +######################################### + +alldeps-install: deps_install fullinstall-xgboost libsklearn + +alldeps: deps_fetch alldeps-install + +deps_fetch: + @echo "---- Fetch dependencies ---- " + bash scripts/gitshallow_submodules.sh + git submodule update +deps_install: + @echo "---- Install dependencies ----" + #-xargs -a requirements.txt -n 1 -P 1 $(PYTHON) -m pip install + easy_install pip + easy_install setuptools + cat src/interface_py/requirements_buildonly.txt src/interface_py/requirements_runtime.txt > requirements.txt + $(PYTHON) -m pip install -r requirements.txt + rm -rf requirements.txt + bash scripts/install_r_deps.sh + +######################################### +# SUBMODULE BUILD TARGETS ######################################### update_submodule: @@ -129,196 +89,146 @@ cpp: py: apply-sklearn_simple build/VERSION.txt $(MAKE) -j all -C src/interface_py -pylint: - $(MAKE) pylint -C src/interface_py +.PHONY: xgboost +xgboost: + @echo "----- Building XGboost target $(XGBOOST_TARGET) -----" + cd xgboost ; make -f Makefile2 $(XGBOOST_TARGET) -fullpy: apply-sklearn_simple pylint +fullinstall-xgboost: xgboost install_xgboost -pyinstall: - $(MAKE) -j install -C src/interface_py +######################################### +# SOURCE QUALITY CHECK TARGETS +######################################### +pylint: + $(MAKE) pylint -C src/interface_py -############################################## +######################################### +# PROJECT BUILD TARGETS +######################################### -alldeps-nccl-cuda8: deps_fetch alldeps_install-nccl-cuda8 -alldeps-nonccl-cuda8: deps_fetch alldeps_install-nonccl-cuda8 -alldeps-nccl-cuda9: deps_fetch alldeps_install-nccl-cuda9 -alldeps-nonccl-cuda9: deps_fetch alldeps_install-nonccl-cuda9 -alldeps-cpuonly: deps_fetch alldeps_install-cpuonly +build: update_submodule build_quick -clean: cleanbuild deps_clean xgboost_clean py3nvml_clean - -rm -rf ./build - -rm -rf ./results/ ./tmp/ +build_quick: cpp py -cleanbuild: cleanpy +build_py: update_submodule clean_py py # avoid cpp -cleanpy: - $(MAKE) -j clean -C src/interface_py +######################################### +# INSTALL TARGETS +######################################### -xgboost_clean: - -pip uninstall -y xgboost - rm -rf xgboost/build/ +install_xgboost: + @echo "----- pip install xgboost built locally -----" + cd xgboost/python-package/dist && $(PYTHON) -m pip install xgboost-0.71-py3-none-any.whl --target ../ -buildquick: cpp py -build: update_submodule buildquick -buildnocpp: update_submodule cleanpy py # avoid cpp +install_py: + $(MAKE) -j install -C src/interface_py -install: pyinstall +install: install_py -### for direct building of xgboost -# https://xgboost.readthedocs.io/en/latest/build.html -libxgboost-nccl-local: - cd xgboost ; make -f Makefile2 libxgboost -libxgboost-nonccl-local: - cd xgboost ; make -f Makefile2 libxgboost2 -libxgboost-cpu-local: - cd xgboost ; make -f Makefile2 libxgboost-cpu +######################################### +# CLEANING TARGETS +######################################### -apply-xgboost-nccl-local: libxgboost-nccl-local pipxgboost -apply-xgboost-nonccl-local: libxgboost-nonccl-local pipxgboost -apply-xgboost-cpu-local: libxgboost-cpu-local pipxgboost +clean: clean_py3nvml clean_xgboost clean_deps clean_py clean_cpp + -rm -rf ./build + -rm -rf ./results/ ./tmp/ -pipxgboost: - @echo "----- pip install xgboost built locally -----" - cd xgboost/python-package/dist && pip install *.whl --upgrade --target ../ +clean_cpp: + rm -rf src/interface_c/_ch2o4gpu_*pu.so + rm -rf src/interface_py/h2o4gpu/libs/ch2o4gpu_*pu.py -alldeps-nccl-local: deps_fetch alldeps-install-nccl-local -alldeps-nonccl-local: deps_fetch alldeps-install-nonccl-local -alldeps-cpu-local: deps_fetch alldeps-install-cpu-local +clean_py: + $(MAKE) -j clean -C src/interface_py + +clean_xgboost: + -$(PYTHON) -m pip uninstall -y xgboost + rm -rf xgboost/build/ -# lib for sklearn because don't want to fully apply yet -alldeps-install-nccl-local: deps_install apply-xgboost-nccl-local apply_py3nvml libsklearn -alldeps-install-nonccl-local: deps_install apply-xgboost-nonccl-local apply_py3nvml libsklearn -alldeps-install-cpu-local: deps_install apply-xgboost-cpu-local apply_py3nvml libsklearn -alldeps_install-cpuonly: deps_install apply-xgboost-cpu-local apply_py3nvml libsklearn install_daal_x86_64 +clean_py3nvml: + -$(PYTHON) -m pip uninstall -y py3nvml -##### dependencies -deps_clean: - @echo "----- Cleaning deps -----" +clean_deps: + @echo "----- Cleaning dependencies -----" rm -rf "$(DEPS_DIR)" # sometimes --upgrade leaves extra packages around - cat requirements_buildonly.txt requirements_runtime.txt requirements_runtime_demos.txt > requirements.txt + cat src/interface_py/requirements_buildonly.txt src/interface_py/requirements_runtime.txt src/interface_py/requirements_runtime_demos.txt > requirements.txt sed 's/==.*//g' requirements.txt|grep -v "#" > requirements_plain.txt - -xargs -a requirements_plain.txt -n 1 -P $(NUMPROCS) pip uninstall -y + -xargs -a requirements_plain.txt -n 1 -P $(NUMPROCS) $(PYTHON) -m pip uninstall -y rm -rf requirements_plain.txt requirements.txt -deps_fetch: - @echo "---- Fetch dependencies ---- " - bash scripts/gitshallow_submodules.sh - git submodule update - -deps_install: - @echo "---- Install dependencies ----" - #-xargs -a requirements.txt -n 1 -P 1 pip install --upgrade - easy_install pip - easy_install setuptools - cat requirements_buildonly.txt requirements_runtime.txt > requirements.txt - pip install -r requirements.txt --upgrade - rm -rf requirements.txt - bash scripts/install_r_deps.sh - # issue with their package, have to do this here (still fails sometimes, so remove) -# pip install sphinxcontrib-osexample - -# lib for sklearn because don't want to fully apply yet -alldeps_install-nccl-cuda8: deps_install apply-xgboost-nccl-cuda8 apply_py3nvml libsklearn -alldeps_install-nonccl-cuda8: deps_install apply-xgboost-nonccl-cuda8 apply_py3nvml libsklearn -alldeps_install-nccl-cuda9: deps_install apply-xgboost-nccl-cuda9 apply_py3nvml libsklearn -alldeps_install-nonccl-cuda9: deps_install apply-xgboost-nonccl-cuda9 apply_py3nvml libsklearn - -fullinstall: fullinstall-nccl-cuda9 -fullinstalllocal: fullinstall-nccl-local -cpu-fullinstall: fullinstall-cpuonly - -fullinstall-nccl-local: clean alldeps-nccl-local build install - mkdir -p src/interface_py/dist-nccl-local/ && mv src/interface_py/dist/*.whl src/interface_py/dist-nccl-local/ -fullinstall-nonccl-local: clean alldeps-nonccl-local build install - mkdir -p src/interface_py/dist-nonccl-local/ && mv src/interface_py/dist/*.whl src/interface_py/dist-nonccl-local/ - -fullinstall-nccl-cuda8: clean alldeps-nccl-cuda8 build install - mkdir -p src/interface_py/dist1/ && mv src/interface_py/dist/*.whl src/interface_py/dist1/ +######################################### +# FULL BUILD AND INSTALL TARGETS +######################################### -fullinstall-nonccl-cuda8: clean alldeps-nonccl-cuda8 build install - mkdir -p src/interface_py/dist2/ && mv src/interface_py/dist/*.whl src/interface_py/dist2/ +fullinstall: clean alldeps build install + mkdir -p src/interface_py/$(DIST_DIR)/$(PLATFORM)/ && mv src/interface_py/dist/*.whl src/interface_py/$(DIST_DIR)/$(PLATFORM)/ -fullinstall-nccl-cuda9: clean alldeps-nccl-cuda9 build install - mkdir -p src/interface_py/dist4/ && mv src/interface_py/dist/*.whl src/interface_py/dist4/ +buildinstall: alldeps build install + mkdir -p src/interface_py/$(DIST_DIR)/$(PLATFORM)/ && mv src/interface_py/dist/*.whl src/interface_py/$(DIST_DIR)/$(PLATFORM)/ -fullinstall-nonccl-cuda9: clean alldeps-nonccl-cuda9 build install - mkdir -p src/interface_py/dist3/ && mv src/interface_py/dist/*.whl src/interface_py/dist3/ +######################################### +# DOCKER TARGETS +######################################### -fullinstall-cpuonly: clean alldeps-cpuonly build install - mkdir -p src/interface_py/dist-cpuonly-local/ && mv src/interface_py/dist/*.whl src/interface_py/dist-cpuonly-local/ +DOCKER_CUDA_VERSION?=9.0 -#################################################### -# Docker stuff +ifeq (${DOCKER_CUDA_VERSION},8.0) + DOCKER_CUDNN_VERSION?=5 +else + DOCKER_CUDNN_VERSION?=7 +endif -# default for docker is nccl-cuda9 -docker-build: docker-build-nccl-cuda9 -docker-runtime: docker-runtime-nccl-cuda9 -docker-runtests: docker-runtests-nccl-cuda9 -get_docker: get_docker-nccl-cuda9 -load_docker: docker-runtime-nccl-cuda9-load -run_in_docker: run_in_docker-nccl-cuda9 +centos7_cuda80_in_docker: + $(MAKE) DOCKER_CUDA_VERSION=8.0 docker-build +centos7_cuda90_in_docker: + $(MAKE) DOCKER_CUDA_VERSION=9.0 docker-build -############### CUDA9 +centos7_cuda92_in_docker: + $(MAKE) DOCKER_CUDA_VERSION=9.2 docker-build -docker-build-nccl-cuda9: - @echo "+-- Building Wheel in Docker (-nccl-cuda9) --+" - rm -rf src/interface_py/dist/*.whl ; rm -rf src/interface_py/dist4/*.whl - export CONTAINER_NAME="localmake-build" ;\ +docker-build: + @echo "+-- Building Wheel in Docker --+" + export CONTAINER_NAME="local-make-build-cuda$(DOCKER_CUDA_VERSION)" ;\ export versionTag=$(BASE_VERSION) ;\ - export extratag="-nccl-cuda9" ;\ - export dockerimage="nvidia/cuda:9.0-cudnn7-devel-ubuntu16.04" ;\ - export H2O4GPU_BUILD="" ;\ - export H2O4GPU_SUFFIX="" ;\ - export makeopts="" ;\ - export dist="dist4" ;\ + export extratag="-cuda$(DOCKER_CUDA_VERSION)" ;\ + export dockerimage="nvidia/cuda${DOCKER_ARCH}:$(DOCKER_CUDA_VERSION)-cudnn$(DOCKER_CUDNN_VERSION)-devel-centos7" ;\ bash scripts/make-docker-devel.sh -docker-runtime-nccl-cuda9: +docker-runtime: @echo "+--Building Runtime Docker Image Part 2 (-nccl-cuda9) --+" - export CONTAINER_NAME="localmake-runtime" ;\ + export CONTAINER_NAME="local-make-runtime-cuda$(DOCKER_CUDA_VERSION)" ;\ export versionTag=$(BASE_VERSION) ;\ - export extratag="-nccl-cuda9" ;\ - export encodedFullVersionTag=$(BASE_VERSION) ;\ + export extratag="-cuda$(DOCKER_CUDA_VERSION)" ;\ export fullVersionTag=$(BASE_VERSION) ;\ - export buckettype="releases/bleeding-edge" ;\ - export dockerimage="nvidia/cuda:9.0-cudnn7-runtime-ubuntu16.04" ;\ + export dockerimage="nvidia/cuda${DOCKER_ARCH}:$(DOCKER_CUDA_VERSION)-cudnn$(DOCKER_CUDNN_VERSION)-runtime-centos7" ;\ bash scripts/make-docker-runtime.sh -.PHONY: docker-runtime-nccl-cuda9-run - -docker-runtime-nccl-cuda9-run: - @echo "+-Running Docker Runtime Image (-nccl-cuda9) --+" - export CONTAINER_NAME="localmake-runtime-run" ;\ - export versionTag=$(BASE_VERSION) ;\ - export extratag="-nccl-cuda9" ;\ - export encodedFullVersionTag=$(BASE_VERSION) ;\ +docker-runtime-run: + @echo "+-Running Docker Runtime Image (-cuda9) --+" export fullVersionTag=$(BASE_VERSION) ;\ - export buckettype="releases/bleeding-edge" ;\ - export dockerimage="nvidia/cuda:9.0-cudnn7-devel-ubuntu16.04" ;\ - nvidia-docker run --init --rm --name $${CONTAINER_NAME} -d -t -u `id -u`:`id -g` --entrypoint=bash opsh2oai/h2o4gpu-$${versionTag}$${extratag}-runtime:latest + nvidia-docker run --init --rm --name "localmake-runtime-run" -d -t -u `id -u`:`id -g` --entrypoint=bash opsh2oai/h2o4gpu-$$(BASE_VERSION)-cuda$(DOCKER_CUDA_VERSION)-runtime:latest -docker-runtests-nccl-cuda9: +docker-runtests: @echo "+-- Run tests in docker (-nccl-cuda9) --+" export CONTAINER_NAME="localmake-runtests" ;\ - export extratag="-nccl-cuda9" ;\ - export dockerimage="nvidia/cuda:9.0-cudnn7-devel-ubuntu16.04" ;\ - export dist="dist4" ;\ + export extratag="-cuda$(DOCKER_CUDA_VERSION)" ;\ + export dockerimage="nvidia/cuda${DOCKER_ARCH}:$(DOCKER_CUDA_VERSION)-cudnn$(DOCKER_CUDNN_VERSION)-devel-centos7" ;\ export target="dotest" ;\ bash scripts/make-docker-runtests.sh -get_docker-nccl-cuda9: - wget https://s3.amazonaws.com/h2o-release/h2o4gpu/releases/bleeding-edge/ai/h2o/h2o4gpu/$(MAJOR_MINOR)-nccl-cuda9/h2o4gpu-$(BASE_VERSION)-nccl-cuda9-runtime.tar.bz2 +get_docker: + wget https://s3.amazonaws.com/h2o-release/h2o4gpu/releases/bleeding-edge/ai/h2o/h2o4gpu/$(MAJOR_MINOR)-cuda$(DOCKER_CUDA_VERSION)/h2o4gpu-$(BASE_VERSION)-cuda$(DOCKER_CUDA_VERSION)-runtime.tar.bz2 -docker-runtime-nccl-cuda9-load: - pbzip2 -dc h2o4gpu-$(BASE_VERSION)-nccl-cuda9-runtime.tar.bz2 | nvidia-docker load +docker-runtime-load: + pbzip2 -dc h2o4gpu-$(BASE_VERSION)-cuda$(DOCKER_CUDA_VERSION)-runtime.tar.bz2 | nvidia-docker load -run_in_docker-nccl-cuda9: - -mkdir -p log ; nvidia-docker run --name localhost --rm -p 8888:8888 -u `id -u`:`id -g` -v `pwd`/log:/log --entrypoint=./run.sh opsh2oai/h2o4gpu-$(BASE_VERSION)-nccl-cuda9-runtime & +run_in_docker: + -mkdir -p log ; nvidia-docker run --name localhost --rm -p 8888:8888 -u `id -u`:`id -g` -v `pwd`/log:/log --entrypoint=./run.sh opsh2oai/h2o4gpu-$(BASE_VERSION)-cuda$(DOCKER_CUDA_VERSION)-runtime & -find log -name jupyter* -type f -printf '%T@ %p\n' | sort -k1 -n | awk '{print $2}' | tail -1 | xargs cat | grep token | grep http | grep -v NotebookApp +.PHONY: docker-build docker-runtime docker-runtime-run docker-runtests get-docker docker-runtime-load run-rin-docker ############### CPU docker-build-cpu: @@ -327,7 +237,7 @@ docker-build-cpu: export CONTAINER_NAME="localmake-build" ;\ export versionTag=$(BASE_VERSION) ;\ export extratag="-cpu" ;\ - export dockerimage="ubuntu:16.04" ;\ + export dockerimage="centos:6" ;\ export H2O4GPU_BUILD="" ;\ export H2O4GPU_SUFFIX="" ;\ export makeopts="" ;\ @@ -342,7 +252,7 @@ docker-runtime-cpu: export encodedFullVersionTag=$(BASE_VERSION) ;\ export fullVersionTag=$(BASE_VERSION) ;\ export buckettype="releases/bleeding-edge" ;\ - export dockerimage="ubuntu:16.04" ;\ + export dockerimage="centos:6" ;\ bash scripts/make-docker-runtime.sh docker-runtime-cpu-run: @@ -353,14 +263,14 @@ docker-runtime-cpu-run: export encodedFullVersionTag=$(BASE_VERSION) ;\ export fullVersionTag=$(BASE_VERSION) ;\ export buckettype="releases/bleeding-edge" ;\ - export dockerimage="ubuntu:16.04" ;\ + export dockerimage="centos:6" ;\ docker run --init --rm --name $${CONTAINER_NAME} -d -t -u `id -u`:`id -g` --entrypoint=bash opsh2oai/h2o4gpu-$${versionTag}$${extratag}-runtime:latest docker-runtests-cpu: @echo "+-- Run tests in docker (-nccl-cuda9) --+" export CONTAINER_NAME="localmake-runtests" ;\ export extratag="-cpu" ;\ - export dockerimage="ubuntu:16.04" ;\ + export dockerimage="centos:6" ;\ export dist="dist4" ;\ export target="dotest" ;\ bash scripts/make-docker-runtests.sh @@ -376,76 +286,9 @@ run_in_docker-cpu: -find log -name jupyter* -type f -printf '%T@ %p\n' | sort -k1 -n | awk '{print $2}' | tail -1 | xargs cat | grep token | grep http | grep -v NotebookApp -######### CUDA8 (copy/paste above, and then replace cuda9 -> cuda8 and cuda:9.0-cudnn7 -> cuda:8.0-cudnn5 and dist4->dist1) - -docker-build-nccl-cuda8: - @echo "+-- Building Wheel in Docker (-nccl-cuda8) --+" - rm -rf src/interface_py/dist/*.whl - export CONTAINER_NAME="localmake-build" ;\ - export versionTag=$(BASE_VERSION) ;\ - export extratag="-nccl-cuda8" ;\ - export dockerimage="nvidia/cuda:8.0-cudnn5-devel-ubuntu16.04" ;\ - export H2O4GPU_BUILD="" ;\ - export H2O4GPU_SUFFIX="" ;\ - export makeopts="" ;\ - export dist="dist1" ;\ - bash scripts/make-docker-devel.sh - -docker-runtime-nccl-cuda8: - @echo "+--Building Runtime Docker Image Part 2 (-nccl-cuda8) --+" - export CONTAINER_NAME="localmake-runtime" ;\ - export versionTag=$(BASE_VERSION) ;\ - export extratag="-nccl-cuda8" ;\ - export encodedFullVersionTag=$(BASE_VERSION) ;\ - export fullVersionTag=$(BASE_VERSION) ;\ - export buckettype="releases/bleeding-edge" ;\ - export dockerimage="nvidia/cuda:8.0-cudnn5-runtime-ubuntu16.04" ;\ - bash scripts/make-docker-runtime.sh - -docker-runtime-nccl-cuda8-load: - pbzip2 -dc h2o4gpu-$(BASE_VERSION)-nccl-cuda8-runtime.tar.bz2 | nvidia-docker load - -.PHONY: docker-runtime-nccl-cuda8-run - -docker-runtime-nccl-cuda8-run: - @echo "+-Running Docker Runtime Image (-nccl-cuda8) --+" - export CONTAINER_NAME="localmake-runtime-run" ;\ - export versionTag=$(BASE_VERSION) ;\ - export extratag="-nccl-cuda8" ;\ - export encodedFullVersionTag=$(BASE_VERSION) ;\ - export fullVersionTag=$(BASE_VERSION) ;\ - export buckettype="releases/bleeding-edge" ;\ - export dockerimage="nvidia/cuda:8.0-cudnn5-devel-ubuntu16.04" ;\ - nvidia-docker run --init --rm --name $${CONTAINER_NAME} -d -t -u `id -u`:`id -g` --entrypoint=bash opsh2oai/h2o4gpu-$${versionTag}$${extratag}-runtime:latest - -docker-runtests-nccl-cuda8: - @echo "+-- Run tests in docker (-nccl-cuda8) --+" - export CONTAINER_NAME="localmake-runtests" ;\ - export extratag="-nccl-cuda8" ;\ - export dockerimage="nvidia/cuda:8.0-cudnn5-devel-ubuntu16.04" ;\ - export dist="dist1" ;\ - export target="dotest" ;\ - bash scripts/make-docker-runtests.sh - -get_docker-nccl-cuda8: - wget https://s3.amazonaws.com/h2o-release/h2o4gpu/releases/bleeding-edge/ai/h2o/h2o4gpu/$(MAJOR_MINOR)-nccl-cuda8/h2o4gpu-$(BASE_VERSION)-nccl-cuda8-runtime.tar.bz2 - -run_in_docker-nccl-cuda8: - mkdir -p log ; nvidia-docker run --name localhost --rm -p 8888:8888 -u `id -u`:`id -g` -v `pwd`/log:/log --entrypoint=./run.sh opsh2oai/h2o4gpu-$(BASE_VERSION)-nccl-cuda8-runtime & - find log -name jupyter* | xargs cat | grep token | grep http | grep -v NotebookApp - -# uses https://github.com/Azure/fast_retraining -testxgboost: # liblightgbm (assumes one installs lightgdm yourself or run make liblightgbm) - bash testsxgboost/runtestxgboost.sh - bash testsxgboost/extracttestxgboost.sh - bash tests_open/showresults.sh # same for all tests - -# install daal -install_daal_x86_64: - @echo "----- Install Daal Python library -----" - bash scripts/daal/install_daal_locally.sh - -################ +######################################### +# TARGETS INSTALLING LIBRARIES +######################################### # http://developer2.download.nvidia.com/compute/cuda/9.0/secure/rc/docs/sidebar/CUDA_Quick_Start_Guide.pdf?_ZyOB0PlGZzBUluXp3FtoWC-LMsTsc5H6SxIaU0i9pGNyWzZCgE-mhnAg2m66Nc3WMDvxWvvQWsXGMqr1hUliGOZvoothMTVnDe12dQQgxwS4Asjoz8XiOvPYOjV6yVQtkFhvDztUlJbNSD4srPWUU2-XegCRFII8_FIpxXERaWV libcuda9: @@ -467,42 +310,16 @@ libnccl2: sudo apt-key add /var/nccl-repo-2.0.5-ga-cuda9.0/7fa2af80.pub sudo apt install libnccl2 libnccl-dev -apply-xgboost-nccl-cuda8: apply-xgboost-nccl-local #pipxgboost-nccl-cuda8 -apply-xgboost-nonccl-cuda8: apply-xgboost-nonccl-local #pipxgboost-nonccl-cuda8 -apply-xgboost-nccl-cuda9: apply-xgboost-nccl-local #pipxgboost-nccl-cuda9 -apply-xgboost-nonccl-cuda9: apply-xgboost-nonccl-local #pipxgboost-nonccl-cuda9 - -pipxgboost-nccl-cuda8: pipxgboost - @echo "----- pip install xgboost-nccl-cuda8 from S3 -----" - mkdir -p xgboost/python-package/dist ; cd xgboost/python-package/dist && pip install https://s3.amazonaws.com/artifacts.h2o.ai/releases/bleeding-edge/ai/h2o/xgboost/0.7-nccl-cuda8/xgboost-0.71-py3-none-any.whl --upgrade --target ../ -pipxgboost-nonccl-cuda8: pipxgboost - @echo "----- pip install xgboost-nonccl-cuda8 from S3 -----" - mkdir -p xgboost/python-package/dist ; cd xgboost/python-package/dist && pip install https://s3.amazonaws.com/artifacts.h2o.ai/releases/bleeding-edge/ai/h2o/xgboost/0.7-nonccl-cuda8/xgboost-0.71-py3-none-any.whl --upgrade --target ../ -pipxgboost-nccl-cuda9: pipxgboost - @echo "----- pip install xgboost-nccl-cuda9 from S3 -----" - mkdir -p xgboost/python-package/dist ; cd xgboost/python-package/dist && pip install https://s3.amazonaws.com/artifacts.h2o.ai/releases/bleeding-edge/ai/h2o/xgboost/0.7-nccl-cuda9/xgboost-0.71-py3-none-any.whl --upgrade --target ../ -pipxgboost-nonccl-cuda9: pipxgboost - @echo "----- pip install xgboost-nonccl-cuda9 from S3 -----" - mkdir -p xgboost/python-package/dist ; cd xgboost/python-package/dist && pip install https://s3.amazonaws.com/artifacts.h2o.ai/releases/bleeding-edge/ai/h2o/xgboost/0.7-nonccl-cuda9/xgboost-0.71-py3-none-any.whl --upgrade --target ../ - - -py3nvml_clean: - -pip uninstall -y py3nvml - -apply_py3nvml: - mkdir -p py3nvml ; cd py3nvml # ; pip install -e git+https://github.com/fbcotter/py3nvml#egg=py3nvml --upgrade --root=. - - liblightgbm: # only done if user directly requests, never an explicit dependency echo "See https://github.com/Microsoft/LightGBM/wiki/Installation-Guide#with-gpu-support for details" echo "sudo apt-get install libboost-dev libboost-system-dev libboost-filesystem-dev cmake" rm -rf LightGBM ; result=`git clone --recursive https://github.com/Microsoft/LightGBM` - cd LightGBM && mkdir build ; cd build && cmake .. -DUSE_GPU=1 -DOpenCL_LIBRARY=$(CUDA_HOME)/lib64/libOpenCL.so -DOpenCL_INCLUDE_DIR=$(CUDA_HOME)/include/ && make -j && cd ../python-package ; python setup.py install --precompile --gpu && cd ../ && pip install arff tqdm keras runipy h5py --upgrade + cd LightGBM && mkdir build ; cd build && cmake .. -DUSE_GPU=1 -DOpenCL_LIBRARY=$(CUDA_HOME)/lib64/libOpenCL.so -DOpenCL_INCLUDE_DIR=$(CUDA_HOME)/include/ && make -j && cd ../python-package ; $(PYTHON) setup.py install --precompile --gpu && cd ../ && $(PYTHON) -m pip install arff tqdm keras runipy h5py libsklearn: # assume already submodule gets sklearn @echo "----- Make sklearn wheel -----" bash scripts/prepare_sklearn.sh # repeated calls don't hurt - rm -rf sklearn && mkdir -p sklearn && cd scikit-learn && python setup.py sdist bdist_wheel + rm -rf sklearn && mkdir -p sklearn && cd scikit-learn && $(PYTHON) setup.py sdist bdist_wheel apply-sklearn: libsklearn apply-sklearn_simple @@ -524,36 +341,14 @@ apply-sklearn_link: apply-sklearn_initmerge: bash ./scripts/apply_sklearn_initmerge.sh -#################### Jenkins specific - -######### h2o.ai systems -# for nccl cuda8 build -fullinstalljenkins-nccl-cuda8: mrproper fullinstall-nccl-cuda8 -fullinstalljenkins-nonccl-cuda8: mrproper fullinstall-nonccl-cuda8 -fullinstalljenkins-nccl-cuda9: mrproper fullinstall-nccl-cuda9 -fullinstalljenkins-nonccl-cuda9: mrproper fullinstall-nonccl-cuda9 -fullinstalljenkins-cpu: mrproper fullinstall-cpu-local - -# for nccl cuda9 build benchmark -fullinstalljenkins-nccl-cuda9-benchmark: mrproper clean alldeps-nccl-cuda9 build install - mkdir -p src/interface_py/dist6/ && mv src/interface_py/dist/*.whl src/interface_py/dist6/ - -########## AWS -# for nccl cuda9 build aws build/test -fullinstalljenkins-nccl-cuda9-aws1: mrproper clean alldeps-nccl-cuda9 build install - mkdir -p src/interface_py/dist5/ && mv src/interface_py/dist/*.whl src/interface_py/dist5/ - -# for nccl cuda9 build benchmark on aws1 -fullinstalljenkins-nccl-cuda9-aws1-benchmark: mrproper clean alldeps-nccl-cuda9 build install - mkdir -p src/interface_py/dist7/ && mv src/interface_py/dist/*.whl src/interface_py/dist7/ - .PHONY: mrproper mrproper: clean @echo "----- Cleaning properly -----" git clean -f -d -x - -################## +######################################### +# TEST TARGETS +######################################### #WIP dotestdemos: @@ -562,7 +357,7 @@ dotestdemos: bash scripts/convert_ipynb2py.sh # can't do -n auto due to limits on GPU memory #pytest -s --verbose --durations=10 -n 1 --fulltrace --full-trace --junit-xml=build/test-reports/h2o4gpu-test.xml examples/py 2> ./tmp/h2o4gpu-examplespy.$(LOGEXT).log - -pip install pytest-ipynb # can't put in requirements since problem with jenkins and runipy + -$(PYTHON) -m pip install pytest-ipynb # can't put in requirements since problem with jenkins and runipy py.test -v -s examples/py 2> ./tmp/h2o4gpu-examplespy.$(LOGEXT).log @@ -570,7 +365,7 @@ dotest: rm -rf ./tmp/ mkdir -p ./tmp/ # can't do -n auto due to limits on GPU memory - pytest -s --verbose --durations=10 -n 3 --fulltrace --full-trace --junit-xml=build/test-reports/h2o4gpu-test.xml tests_open 2> ./tmp/h2o4gpu-test.$(LOGEXT).log + pytest -s --verbose --durations=10 -n 3 --fulltrace --full-trace --junit-xml=build/test-reports/h2o4gpu-test.xml tests/python/open_data 2> ./tmp/h2o4gpu-test.$(LOGEXT).log # Test R package when appropriate bash scripts/test_r_pkg.sh @@ -578,86 +373,88 @@ dotestfast: rm -rf ./tmp/ mkdir -p ./tmp/ # can't do -n auto due to limits on GPU memory - pytest -s --verbose --durations=10 -n 3 --fulltrace --full-trace --junit-xml=build/test-reports/h2o4gpu-testfast1.xml tests_open/glm/test_glm_simple.py 2> ./tmp/h2o4gpu-testfast1.$(LOGEXT).log - pytest -s --verbose --durations=10 -n 3 --fulltrace --full-trace --junit-xml=build/test-reports/h2o4gpu-testfast2.xml tests_open/gbm/test_xgb_sklearn_wrapper.py 2> ./tmp/h2o4gpu-testfast2.$(LOGEXT).log - pytest -s --verbose --durations=10 -n 3 --fulltrace --full-trace --junit-xml=build/test-reports/h2o4gpu-testfast3.xml tests_open/svd/test_tsvd.py 2> ./tmp/h2o4gpu-testfast3.$(LOGEXT).log - pytest -s --verbose --durations=10 -n 3 --fulltrace --full-trace --junit-xml=build/test-reports/h2o4gpu-testfast4.xml tests_open/kmeans/test_kmeans.py 2> ./tmp/h2o4gpu-testfast4.$(LOGEXT).log + pytest -s --verbose --durations=10 -n 3 --fulltrace --full-trace --junit-xml=build/test-reports/h2o4gpu-testfast1.xml tests/python/open_data/glm/test_glm_simple.py 2> ./tmp/h2o4gpu-testfast1.$(LOGEXT).log + pytest -s --verbose --durations=10 -n 3 --fulltrace --full-trace --junit-xml=build/test-reports/h2o4gpu-testfast2.xml tests/python/open_data/gbm/test_xgb_sklearn_wrapper.py 2> ./tmp/h2o4gpu-testfast2.$(LOGEXT).log + pytest -s --verbose --durations=10 -n 3 --fulltrace --full-trace --junit-xml=build/test-reports/h2o4gpu-testfast3.xml tests/python/open_data/svd/test_tsvd.py 2> ./tmp/h2o4gpu-testfast3.$(LOGEXT).log + pytest -s --verbose --durations=10 -n 3 --fulltrace --full-trace --junit-xml=build/test-reports/h2o4gpu-testfast4.xml tests/python/open_data/kmeans/test_kmeans.py 2> ./tmp/h2o4gpu-testfast4.$(LOGEXT).log dotestfast_nonccl: rm -rf ./tmp/ mkdir -p ./tmp/ # can't do -n auto due to limits on GPU memory - pytest -s --verbose --durations=10 -n 3 --fulltrace --full-trace --junit-xml=build/test-reports/h2o4gpu-testfast1.xml tests_open/glm/test_glm_simple.py 2> ./tmp/h2o4gpu-testfast1.$(LOGEXT).log - pytest -s --verbose --durations=10 -n 3 --fulltrace --full-trace --junit-xml=build/test-reports/h2o4gpu-testfast3.xml tests_open/svd/test_tsvd.py 2> ./tmp/h2o4gpu-testfast3.$(LOGEXT).log - pytest -s --verbose --durations=10 -n 3 --fulltrace --full-trace --junit-xml=build/test-reports/h2o4gpu-testfast4.xml tests_open/kmeans/test_kmeans.py 2> ./tmp/h2o4gpu-testfast4.$(LOGEXT).log + pytest -s --verbose --durations=10 -n 3 --fulltrace --full-trace --junit-xml=build/test-reports/h2o4gpu-testfast1.xml tests/python/open_data/glm/test_glm_simple.py 2> ./tmp/h2o4gpu-testfast1.$(LOGEXT).log + pytest -s --verbose --durations=10 -n 3 --fulltrace --full-trace --junit-xml=build/test-reports/h2o4gpu-testfast3.xml tests/python/open_data/svd/test_tsvd.py 2> ./tmp/h2o4gpu-testfast3.$(LOGEXT).log + pytest -s --verbose --durations=10 -n 3 --fulltrace --full-trace --junit-xml=build/test-reports/h2o4gpu-testfast4.xml tests/python/open_data/kmeans/test_kmeans.py 2> ./tmp/h2o4gpu-testfast4.$(LOGEXT).log dotestsmall: rm -rf ./tmp/ rm -rf build/test-reports 2>/dev/null mkdir -p ./tmp/ # can't do -n auto due to limits on GPU memory - pytest -s --verbose --durations=10 -n 3 --fulltrace --full-trace --junit-xml=build/test-reports/h2o4gpu-testsmall.xml tests_small 2> ./tmp/h2o4gpu-testsmall.$(LOGEXT).log + pytest -s --verbose --durations=10 -n 3 --fulltrace --full-trace --junit-xml=build/test-reports/h2o4gpu-testsmall.xml tests/python/small 2> ./tmp/h2o4gpu-testsmall.$(LOGEXT).log dotestbig: mkdir -p ./tmp/ - pytest -s --verbose --durations=10 -n 1 --fulltrace --full-trace --junit-xml=build/test-reports/h2o4gpu-testbig.xml tests_big 2> ./tmp/h2o4gpu-testbig.$(LOGEXT).log + pytest -s --verbose --durations=10 -n 1 --fulltrace --full-trace --junit-xml=build/test-reports/h2o4gpu-testbig.xml tests/python/big 2> ./tmp/h2o4gpu-testbig.$(LOGEXT).log -##################### +######################################### +# BENCHMARKING TARGETS +######################################### dotestperf: mkdir -p ./tmp/ - -CHECKPERFORMANCE=1 DISABLEPYTEST=1 pytest -s --verbose --durations=10 -n 1 --fulltrace --full-trace --junit-xml=build/test-reports/h2o4gpu-test.xml tests_open 2> ./tmp/h2o4gpu-testperf.$(LOGEXT).log - bash tests_open/showresults.sh &> ./tmp/h2o4gpu-testperf-results.$(LOGEXT).log + -CHECKPERFORMANCE=1 DISABLEPYTEST=1 pytest -s --verbose --durations=10 -n 1 --fulltrace --full-trace --junit-xml=build/test-reports/h2o4gpu-test.xml tests/python/open_data 2> ./tmp/h2o4gpu-testperf.$(LOGEXT).log + bash tests/python/open_data/showresults.sh &> ./tmp/h2o4gpu-testperf-results.$(LOGEXT).log dotestsmallperf: mkdir -p ./tmp/ - -CHECKPERFORMANCE=1 DISABLEPYTEST=1 pytest -s --verbose --durations=10 -n 1 --fulltrace --full-trace --junit-xml=build/test-reports/h2o4gpu-testsmallperf.xml tests_small 2> ./tmp/h2o4gpu-testsmallperf.$(LOGEXT).log - bash tests_open/showresults.sh &> ./tmp/h2o4gpu-testsmallperf-results.$(LOGEXT).log + -CHECKPERFORMANCE=1 DISABLEPYTEST=1 pytest -s --verbose --durations=10 -n 1 --fulltrace --full-trace --junit-xml=build/test-reports/h2o4gpu-testsmallperf.xml tests/python/small 2> ./tmp/h2o4gpu-testsmallperf.$(LOGEXT).log + bash tests/python/open_data/showresults.sh &> ./tmp/h2o4gpu-testsmallperf-results.$(LOGEXT).log dotestbigperf: mkdir -p ./tmp/ - -CHECKPERFORMANCE=1 DISABLEPYTEST=1 pytest -s --verbose --durations=10 -n 1 --fulltrace --full-trace --junit-xml=build/test-reports/h2o4gpu-testbigperf.xml tests_big 2> ./tmp/h2o4gpu-testbigperf.$(LOGEXT).log - bash tests_open/showresults.sh &> ./tmp/h2o4gpu-testbigperf-results.$(LOGEXT).log # still just references results directory in base path + -CHECKPERFORMANCE=1 DISABLEPYTEST=1 pytest -s --verbose --durations=10 -n 1 --fulltrace --full-trace --junit-xml=build/test-reports/h2o4gpu-testbigperf.xml tests/python/big 2> ./tmp/h2o4gpu-testbigperf.$(LOGEXT).log + bash tests/python/open_data/showresults.sh &> ./tmp/h2o4gpu-testbigperf-results.$(LOGEXT).log # still just references results directory in base path ######################### use python instead of pytest (required in some cases if pytest leads to hang) dotestperfpython: mkdir -p ./tmp/ - -bash tests_open/getresults.sh $(LOGEXT) - bash tests_open/showresults.sh + -bash tests/python/open_data/getresults.sh $(LOGEXT) + bash tests/python/open_data/showresults.sh dotestbigperfpython: mkdir -p ./tmp/ -bash testsbig/getresultsbig.sh $(LOGEXT) - bash tests_open/showresults.sh # still just references results directory in base path + bash tests/python/open_data/showresults.sh # still just references results directory in base path ################### H2O.ai public tests for pass/fail testdemos: dotestdemos -test: buildquick dotest +test: build_quick dotest testquick: dotest ################ H2O.ai public tests for performance -testperf: buildquick dotestperf # faster if also run sync_open_data before doing this test +testperf: build_quick dotestperf # faster if also run sync_open_data before doing this test ################### H2O.ai private tests for pass/fail -testsmall: buildquick sync_open_data sync_other_data dotestsmall +testsmall: build_quick sync_open_data sync_other_data dotestsmall testsmallquick: dotestsmall -testbig: buildquick sync_open_data sync_other_data dotestbig +testbig: build_quick sync_open_data sync_other_data dotestbig testbigquick: dotestbig ################ H2O.ai private tests for performance -testsmallperf: buildquick sync_open_data sync_other_data dotestsmallperf +testsmallperf: build_quick sync_open_data sync_other_data dotestsmallperf -testbigperf: buildquick sync_open_data sync_other_data dotestbigperf +testbigperf: build_quick sync_open_data sync_other_data dotestbigperf testsmallperfquick: dotestsmallperf @@ -671,7 +468,9 @@ test_cpp: clean_test_cpp: $(MAKE) -j clean_cpp_tests -C src/ -#################### Build info +######################################### +# BUILD INFO TARGETS +######################################### # Generate local build info src/interface_py/h2o4gpu/BUILD_INFO.txt: @@ -689,7 +488,7 @@ src/interface_py/h2o4gpu/BUILD_INFO.txt: build/VERSION.txt: src/interface_py/h2o4gpu/BUILD_INFO.txt @mkdir -p build - cd src/interface_py/; python setup.py --version > ../../build/VERSION.txt + cd src/interface_py/; $(PYTHON) setup.py --version > ../../build/VERSION.txt .PHONY: base_version base_version: @@ -703,137 +502,5 @@ endif Jenkinsfiles: bash scripts/make_jenkinsfiles.sh -#---------------------------------------------------------------------- -# CentOS 7 build API BEGIN -# -# Summary -# -# command: make centos7_cuda8_in_docker -# output: dist/x86_64-centos7-cuda8/h2o4gpu*.whl -# -# command: make centos7_cuda9_in_docker -# output: dist/x86_64-centos7-cuda9/h2o4gpu*.whl -# -# Details -# -# This is only supported in a docker environment. -# -# The 'centos7' make target does the actual work. -# -# The 'centos7_cudaN_in_docker' make target sets up the docker environment -# and then invokes the work inside that environment. -# -# The build output is put in the 'dist' directory in h2o4gpu level. -#---------------------------------------------------------------------- - -DIST_DIR = dist - -ARCH := $(shell arch) -PLATFORM = $(ARCH)-centos7-cuda$(MY_CUDA_VERSION) - -CONTAINER_NAME_SUFFIX ?= -$(USER) -CONTAINER_NAME ?= opsh2oai/dai-h2o4gpu$(CONTAINER_NAME_SUFFIX) - -PROJECT_VERSION := $(BASE_VERSION) -BRANCH_NAME ?= $(shell git rev-parse --abbrev-ref HEAD) -BRANCH_NAME_SUFFIX = +$(BRANCH_NAME) -BUILD_NUM ?= local -BUILD_NUM_SUFFIX = .$(BUILD_NUM) -VERSION = $(PROJECT_VERSION)$(BRANCH_NAME_SUFFIX)$(BUILD_NUM_SUFFIX) -CONTAINER_TAG := $(shell echo $(VERSION) | sed 's/+/-/g') - -CONTAINER_NAME_TAG = $(CONTAINER_NAME):$(CONTAINER_TAG) - -ARCH_SUBST = undefined -FROM_SUBST = undefined -ifeq ($(ARCH),x86_64) - FROM_SUBST = nvidia\/cuda:$(MY_CUDA_VERSION)-cudnn$(MY_CUDNN_VERSION)-devel-centos7 - ARCH_SUBST = $(ARCH) -endif -ifeq ($(ARCH),ppc64le) - FROM_SUBST = nvidia\/cuda-ppc64le:$(MY_CUDA_VERSION)-cudnn$(MY_CUDNN_VERSION)-devel-centos7 - ARCH_SUBST = $(ARCH) -endif - -fullinstalljenkins-nonccl-cuda8-centos: mrproper centos7_in_docker - -Dockerfile-build-centos7.$(PLATFORM): Dockerfile-build-centos7.in - cat $< | sed 's/FROM_SUBST/$(FROM_SUBST)/'g | sed 's/ARCH_SUBST/$(ARCH_SUBST)/g' | sed 's/MY_CUDA_VERSION_SUBST/$(MY_CUDA_VERSION)/g' > $@ - -centos7_cuda8_in_docker: MY_CUDA_VERSION=8.0 -centos7_cuda8_in_docker: MY_CUDNN_VERSION=5 -centos7_cuda8_in_docker: - $(MAKE) MY_CUDA_VERSION=$(MY_CUDA_VERSION) MY_CUDNN_VERSION=$(MY_CUDNN_VERSION) centos7_in_docker_impl - -centos7_cuda9_in_docker: MY_CUDA_VERSION=9.0 -centos7_cuda9_in_docker: MY_CUDNN_VERSION=7 -centos7_cuda9_in_docker: - $(MAKE) MY_CUDA_VERSION=$(MY_CUDA_VERSION) MY_CUDNN_VERSION=$(MY_CUDNN_VERSION) centos7_in_docker_impl - -centos7_cuda91_in_docker: MY_CUDA_VERSION=9.1 -centos7_cuda91_in_docker: MY_CUDNN_VERSION=7 -centos7_cuda91_in_docker: - $(MAKE) MY_CUDA_VERSION=$(MY_CUDA_VERSION) MY_CUDNN_VERSION=$(MY_CUDNN_VERSION) centos7_in_docker_impl - -centos7_in_docker_impl: Dockerfile-build-centos7.$(PLATFORM) - mkdir -p $(DIST_DIR)/$(PLATFORM) - docker build \ - -t $(CONTAINER_NAME_TAG) \ - -f Dockerfile-build-centos7.$(PLATFORM) \ - . - docker run \ - --rm \ - --init \ - -u `id -u`:`id -g` \ - -v `pwd`:/dot \ - -w /dot \ - --entrypoint /bin/bash \ - -e "MY_CUDA_VERSION=$(MY_CUDA_VERSION)" \ - -e "MY_CUDNN_VERSION=$(MY_CUDNN_VERSION)" \ - $(CONTAINER_NAME_TAG) \ - -c 'make centos7' - echo $(VERSION) > $(DIST_DIR)/$(PLATFORM)/VERSION.txt - -centos7_setup: - rm -fr /tmp/build - cp -a /dot/. /tmp/build - sed -i 's/cmake/# cmake/' /tmp/build/requirements_buildonly.txt - -centos7_build: - (cd /tmp/build && \ - IFLAGS="-I/usr/include/openblas" \ - OPENBLAS_PREFIX="open" \ - USEPARALLEL=0 \ - $(MAKE) \ - deps_fetch \ - apply-xgboost-nonccl-local \ - apply_py3nvml \ - libsklearn \ - build) - mkdir -p dist/$(PLATFORM) - cp /tmp/build/src/interface_py/dist/h2o4gpu*.whl dist/$(PLATFORM) - chmod -R o+rwx dist/$(PLATFORM) - -centos7: - $(MAKE) centos7_setup - $(MAKE) centos7_build - -# Note: We don't actually need to run mrproper in docker (as root) because -# the build step runs as the user. But keep the API for consistency. -mrproper_in_docker: - git clean -f -d -x - -printvars: MY_CUDA_VERSION=8.0 -printvars: MY_CUDNN_VERSION=5 -printvars: - @echo $(PLATFORM) - @echo $(PROJECT_VERSION) - @echo $(VERSION) - @echo $(CONTAINER_TAG) - -#---------------------------------------------------------------------- -# CentOS 7 build API END -#---------------------------------------------------------------------- - .PHONY: ALWAYS_REBUILD .ALWAYS_REBUILD: diff --git a/README.md b/README.md index ca54df121..b3a0265cb 100644 --- a/README.md +++ b/README.md @@ -9,14 +9,12 @@ Daal library added for CPU, currently supported only x86_64 architecture. ## Requirements -* PC with Ubuntu 16.04+ w/ numpy 1.14.x +* PC running Linux wit glibc 2.17+ * Install CUDA with bundled display drivers ( [CUDA 8](https://developer.nvidia.com/cuda-downloads) or - [CUDA 9.0](https://developer.nvidia.com/cuda-release-candidate-download) ) - -NOTE: CUDA9.1 is not currently compatible with H2O4GPU. + [CUDA 9](https://developer.nvidia.com/cuda-release-candidate-download) ) When installing, choose to link the cuda install to /usr/local/cuda . Ensure to reboot after installing the new nvidia drivers. @@ -53,24 +51,18 @@ sudo apt-get -y install libcurl4-openssl-dev libssl-dev libxml2-dev Download the Python wheel file (For Python 3.6 on linux_x86_64): * Stable: - * [CUDA8 nccl](https://s3.amazonaws.com/h2o-release/h2o4gpu/releases/stable/ai/h2o/h2o4gpu/0.2-nccl-cuda8/h2o4gpu-0.2.0-cp36-cp36m-linux_x86_64.whl) - * [CUDA8 nonccl](https://s3.amazonaws.com/h2o-release/h2o4gpu/releases/stable/ai/h2o/h2o4gpu/0.2-nonccl-cuda8/h2o4gpu-0.2.0-cp36-cp36m-linux_x86_64.whl) - * [CUDA9 nccl](https://s3.amazonaws.com/h2o-release/h2o4gpu/releases/stable/ai/h2o/h2o4gpu/0.2-nccl-cuda9/h2o4gpu-0.2.0-cp36-cp36m-linux_x86_64.whl) - * [CUDA9 nonccl](https://s3.amazonaws.com/h2o-release/h2o4gpu/releases/stable/ai/h2o/h2o4gpu/0.2-nonccl-cuda9/h2o4gpu-0.2.0-cp36-cp36m-linux_x86_64.whl) + * [CUDA8](https://s3.amazonaws.com/h2o-release/h2o4gpu/releases/stable/ai/h2o/h2o4gpu/0.2-nccl-cuda8/h2o4gpu-0.2.0-cp36-cp36m-linux_x86_64.whl) + * [CUDA9](https://s3.amazonaws.com/h2o-release/h2o4gpu/releases/stable/ai/h2o/h2o4gpu/0.2-nccl-cuda9/h2o4gpu-0.2.0-cp36-cp36m-linux_x86_64.whl) * Bleeding edge (changes with every successful master branch build): - * [CUDA8 nccl](https://s3.amazonaws.com/h2o-release/h2o4gpu/releases/bleeding-edge/ai/h2o/h2o4gpu/0.2-nccl-cuda8/h2o4gpu-0.2.0.9999-cp36-cp36m-linux_x86_64.whl) - * [CUDA8 nonccl](https://s3.amazonaws.com/h2o-release/h2o4gpu/releases/bleeding-edge/ai/h2o/h2o4gpu/0.2-nonccl-cuda8/h2o4gpu-0.2.0.9999-cp36-cp36m-linux_x86_64.whl) - * [CUDA9 nccl](https://s3.amazonaws.com/h2o-release/h2o4gpu/releases/bleeding-edge/ai/h2o/h2o4gpu/0.2-nccl-cuda9/h2o4gpu-0.2.0.9999-cp36-cp36m-linux_x86_64.whl) - * [CUDA9 nonccl](https://s3.amazonaws.com/h2o-release/h2o4gpu/releases/bleeding-edge/ai/h2o/h2o4gpu/0.2-nonccl-cuda9/h2o4gpu-0.2.0.9999-cp36-cp36m-linux_x86_64.whl) + * [CUDA8](https://s3.amazonaws.com/h2o-release/h2o4gpu/releases/bleeding-edge/ai/h2o/h2o4gpu/0.2-nccl-cuda8/h2o4gpu-0.2.0.9999-cp36-cp36m-linux_x86_64.whl) + * [CUDA9.0](https://s3.amazonaws.com/h2o-release/h2o4gpu/releases/bleeding-edge/ai/h2o/h2o4gpu/0.2-nccl-cuda90/h2o4gpu-0.2.0.9999-cp36-cp36m-linux_x86_64.whl) + * [CUDA9.2](https://s3.amazonaws.com/h2o-release/h2o4gpu/releases/bleeding-edge/ai/h2o/h2o4gpu/0.2-nccl-cuda92/h2o4gpu-0.2.0.9999-cp36-cp36m-linux_x86_64.whl) * [For Conda (unsupported and untested by H2O.ai)] ``` pip install --extra-index-url https://pypi.anaconda.org/gpuopenanalytics/simple h2o4gpu ``` -The "nccl" (NCCL) versions give support to multi-GPU in xgboost and in other algorithms. The "nonccl" versions are provided -in case of system instability in production environments due to NCCL. - -Start a fresh pyenv or virtualenv session. + Start a fresh pyenv or virtualenv session. Install the Python wheel file. NOTE: If you don't use a fresh environment, this will overwrite your py3nvml and xgboost installations to use our validated @@ -131,9 +123,9 @@ predictions <- model %>% predict(x) ## Next Steps -For more examples using Python API, please check out our [Jupyter notebook demos](https://github.com/h2oai/h2o4gpu/tree/master/examples/py/demos). To run the demos using a local wheel run, at least download `requirements_runtime_demos.txt` from the Github repo and do: +For more examples using Python API, please check out our [Jupyter notebook demos](https://github.com/h2oai/h2o4gpu/tree/master/examples/py/demos). To run the demos using a local wheel run, at least download `src/interface_py/requirements_runtime_demos.txt` from the Github repo and do: ``` -pip install -r requirements_runtime_demos.txt +pip install -r src/interface_py/requirements_runtime_demos.txt ``` and then run the jupyter notebook demos. @@ -151,10 +143,8 @@ Download the Docker file (for linux_x86_64): * Bleeding edge (changes with every successful master branch build): * [CUDA8 nccl](https://s3.amazonaws.com/h2o-release/h2o4gpu/releases/bleeding-edge/ai/h2o/h2o4gpu/0.2-nccl-cuda8/h2o4gpu-0.2.0.9999-nccl-cuda8-runtime.tar.bz2) - * [CUDA8 nonccl](https://s3.amazonaws.com/h2o-release/h2o4gpu/releases/bleeding-edge/ai/h2o/h2o4gpu/0.2-nonccl-cuda8/h2o4gpu-nonccl-cuda8-0.2.0.9999-runtime.tar.bz2) * [CUDA9 nccl](https://s3.amazonaws.com/h2o-release/h2o4gpu/releases/bleeding-edge/ai/h2o/h2o4gpu/0.2-nccl-cuda9/h2o4gpu-0.2.0.9999-nccl-cuda9-runtime.tar.bz2) - * [CUDA9 nonccl](https://s3.amazonaws.com/h2o-release/h2o4gpu/releases/bleeding-edge/ai/h2o/h2o4gpu/0.2-nonccl-cuda9/h2o4gpu-0.2.0.9999-nonccl-cuda9-runtime.tar.bz2) - + Load and run docker file (e.g. for bleeding-edge of nccl-cuda9): ``` pbzip2 -dc h2o4gpu-0.2.0.9999-nccl-cuda9-runtime.tar.bz2 | nvidia-docker load diff --git a/TROUBLESHOOTING.md b/TROUBLESHOOTING.md index 81bf319f1..08aab0721 100644 --- a/TROUBLESHOOTING.md +++ b/TROUBLESHOOTING.md @@ -46,7 +46,7 @@ To see if things are missing. 3) Run `ldd --version`, we currently require version `2.23` or higher. If your system is running a lower version please update if possible or build the project yourself on your machine. -4) Make sure you are running CUDA 8.x or CUDA 9.0, we currently don't support CUDA 9.1. +4) Make sure you are running CUDA 8.x or CUDA 9.x. 5) If compiled with icc (default if present) and have conda, need to do: diff --git a/Jenkinsfile-nccl-cuda9-aws1 b/ci/Jenkinsfile-ppc64le-cuda8 similarity index 76% rename from Jenkinsfile-nccl-cuda9-aws1 rename to ci/Jenkinsfile-ppc64le-cuda8 index c17682df0..194bf1b3b 100644 --- a/Jenkinsfile-nccl-cuda9-aws1 +++ b/ci/Jenkinsfile-ppc64le-cuda8 @@ -15,34 +15,21 @@ import ai.h2o.ci.BuildInfo def commitMessage = '' def h2o4gpuUtils = null -def dist = "dist5" -def BUILDTYPE = "nccl-cuda9-aws1" -def cuda = "nvidia/cuda:9.0-cudnn7-devel-ubuntu16.04" -def cudart = "nvidia/cuda:9.0-cudnn7-runtime-ubuntu16.04" -def extratag = "-nccl-cuda9-aws1" -def linuxwheel = "linux_whl5" +def platform = "ppc64le-centos7-cuda8.0" +def BUILDTYPE = "cuda8" +def cuda = "nvidia/cuda-ppc64le:8.0-cudnn5-devel-centos7" +def cudart = "nvidia/cuda-ppc64le:8.0-cudnn5-runtime-centos7" +def extratag = "-cuda8" +def linuxwheel = "ppc64le-centos7-cuda8.whl" def testtype = "dotest" -def labelbuild = "ec2P32xlarge" -def labeltest = "ec2P32xlarge" -def labelruntime = "ec2P32xlarge" +def labelbuild = "ibm-power || ibm-power-gpu" +def labeltest = "ibm-power || ibm-power-gpu" +def labelruntime = "ibm-power || ibm-power-gpu" def doingbenchmark = "0" -def dobenchmark = "1" -def doruntime = "0" - -//################ BELOW IS COPY/PASTE of Jenkinsfile.utils2 (except stage names) -// Just Notes: -// -//def jobnums = [0 , 1 , 2 , 3] -//def tags = ["nccl" , "nonccl" , "nccl" , "nonccl"] -//def cudatags = ["cuda8", "cuda8" , "cuda9" , "cuda9"] -//def dobuilds = [1, 0, 0, 0] -//def dofulltests = [1, 0, 0, 0] -//def dopytests = [1, 0, 0, 0] -//def doruntimes = [1, 1, 1, 1] -//def dockerimagesbuild = ["nvidia/cuda:8.0-cudnn5-devel-ubuntu16.04", "nvidia/cuda:8.0-cudnn5-devel-ubuntu16.04", "nvidia/cuda:9.0-cudnn7-devel-ubuntu16.04", "nvidia/cuda:9.0-cudnn7-devel-ubuntu16.04"] -//def dockerimagesruntime = ["nvidia/cuda:8.0-cudnn5-runtime-ubuntu16.04", "nvidia/cuda:8.0-cudnn5-runtime-ubuntu16.04", "nvidia/cuda:9.0-cudnn7-runtime-ubuntu16.04", "nvidia/cuda:9.0-cudnn7-runtime-ubuntu16.04"] -//def dists = ["dist1","dist2","dist3","dist4"] - +def dobenchmark = "0" +def doruntime = "1" +def data_dirs = "-v /home/jenkins/smalldata:/smalldata -v /home/jenkins/open_data:/open_data" +//################ BELOW IS COPY/PASTE of ci/Jenkinsfile.template (except stage names) def benchmark_commit_trigger pipeline { @@ -64,12 +51,7 @@ pipeline { } stages { - ///////////////////////////////////////////////////////////////////// - // - // - // Avoid mr-dl8 and mr-dl10 for build for now due to permission denied issue - ///////////////////////////////////////////////////////////////////// - stage("Git clone on Linux nccl-cuda9-aws1") { + stage("Git clone on Linux ppc64le-cuda8") { agent { label "${labelbuild}" @@ -88,25 +70,24 @@ pipeline { userRemoteConfigs : scm.userRemoteConfigs]) } script { - h2o4gpuUtils = load "Jenkinsfile.utils" + h2o4gpuUtils = load "ci/Jenkinsfile.utils" buildInfo("h2o4gpu", h2o4gpuUtils.isRelease()) commitMessage = sh(script: 'git log -1 --pretty=%B | tr "\n" " "', returnStdout: true).trim() echo "Commit Message: ${commitMessage}" benchmark_commit_trigger = ("${commitMessage}" ==~ /.*trigger_benchmark.*/) echo "benchmark_commit_trigger: ${benchmark_commit_trigger}" } - stash includes: "Jenkinsfile*", name: "jenkinsfiles" + stash includes: "ci/Jenkinsfile*", name: "jenkinsfiles" } } - stage("Build Wheel on Linux nccl-cuda9-aws1") { - + stage("Build on Centos7 ppc64le-cuda8") { agent { label "${labelbuild}" } when { expression { unstash "jenkinsfiles" - h2o4gpuUtils = load "Jenkinsfile.utils" + h2o4gpuUtils = load "ci/Jenkinsfile.utils" return "${doingbenchmark}" == "1" || h2o4gpuUtils.doBuild() || h2o4gpuUtils.doTests() } } @@ -123,8 +104,8 @@ pipeline { userRemoteConfigs : scm.userRemoteConfigs]) } script { - h2o4gpuUtils = load "Jenkinsfile.utils" - h2o4gpuUtils.buildOnLinux("${cuda}", "${extratag}", "${dist}", "${linuxwheel}") + h2o4gpuUtils = load "ci/Jenkinsfile.utils" + h2o4gpuUtils.buildOnLinux("${cuda}", "${extratag}", "${platform}", "${linuxwheel}") buildInfo("h2o4gpu", h2o4gpuUtils.isRelease()) @@ -141,14 +122,14 @@ pipeline { } } - stage("Test Wheel & Pylint & S3up on Linux nccl-cuda9-aws1") { + stage("Test | Lint | S3up on Centos7 ppc64le-cuda8") { agent { label "${labeltest}" } when { expression { unstash "jenkinsfiles" - h2o4gpuUtils = load "Jenkinsfile.utils" + h2o4gpuUtils = load "ci/Jenkinsfile.utils" return "${doingbenchmark}" == "1" || h2o4gpuUtils.doTests() && (h2o4gpuUtils.rerun_disabled(commitMessage) || !h2o4gpuUtils.wasStageSuccessful("Fast Test Wheel & Pylint & S3up on Linux")) } } @@ -156,39 +137,46 @@ pipeline { dumpInfo 'Linux Test Info' // Get source code (should put tests into wheel, then wouldn't have to checkout) retryWithTimeout(200 /* seconds */, 3 /* retries */) { - checkout scm + deleteDir() + checkout([ + $class : 'GitSCM', + branches : scm.branches, + doGenerateSubmoduleConfigurations: false, + extensions : scm.extensions + [[$class: 'SubmoduleOption', disableSubmodules: true, recursiveSubmodules: false, reference: '', trackingSubmodules: false, shallow: true]], + submoduleCfg : [], + userRemoteConfigs : scm.userRemoteConfigs]) } script { unstash 'version_info' sh """ echo "Before Stashed wheel file:" - ls -l src/interface_py/${dist}/ || true - rm -rf src/interface_py/${dist}/ || true + ls -l src/interface_py/dist/${platform}/ || true + rm -rf src/interface_py/dist/${platform}/ || true """ unstash "${linuxwheel}" sh """ echo "After Stashed wheel file:" - ls -l src/interface_py/${dist}/ || true + ls -l src/interface_py/dist/${platform}/ || true """ - h2o4gpuUtils.runTests(buildInfo.get(), "${cuda}", "${extratag}", "${dist}", "${testtype}") + //h2o4gpuUtils.runTests(buildInfo.get(), "${cuda}", "${extratag}", "${platform}", "${testtype}", "${data_dirs}") } retryWithTimeout(500 /* seconds */, 5 /* retries */) { withCredentials([[$class: 'AmazonWebServicesCredentialsBinding', credentialsId: "awsArtifactsUploader"]]) { script { - h2o4gpuUtils.publishToS3(buildInfo.get(), "${extratag}" , "${dist}") + h2o4gpuUtils.publishToS3(buildInfo.get(), "${extratag}" , "${platform}") } } } } } - stage("Build/Publish Runtime Docker Linux nccl-cuda9-aws1") { + stage("Build/Publish Runtime Docker Centos7 ppc64le-cuda8") { agent { label "${labelruntime}" } when { expression { unstash "jenkinsfiles" - h2o4gpuUtils = load "Jenkinsfile.utils" + h2o4gpuUtils = load "ci/Jenkinsfile.utils" return "${doruntime}" == "1" && h2o4gpuUtils.doRuntime() } } @@ -208,19 +196,19 @@ pipeline { script { sh """ echo "Before Stashed wheel file:" - ls -l src/interface_py/${dist}/ || true - rm -rf src/interface_py/${dist}/ || true + ls -l src/interface_py/dist/${platform}/ || true + rm -rf src/interface_py/dist/${platform}/ || true """ unstash "${linuxwheel}" sh """ echo "After Stashed wheel file:" - ls -l src/interface_py/${dist}/ || true + ls -l src/interface_py/dist/${platform} || true """ unstash 'version_info' sh 'echo "Stashed version file:" && ls -l build/' } script { - h2o4gpuUtils.buildRuntime(buildInfo.get(), "${cudart}", "${dist}", "${extratag}") + h2o4gpuUtils.buildRuntime(buildInfo.get(), "${cudart}", "${platform}", "${extratag}", "${data_dirs}") } retryWithTimeout(1000 /* seconds */, 5 /* retries */) { withCredentials([[$class: 'AmazonWebServicesCredentialsBinding', credentialsId: "awsArtifactsUploader"]]) { @@ -232,14 +220,14 @@ pipeline { } } - stage("Benchmarking Linux nccl-cuda9-aws1") { + stage("Benchmarking Linux ppc64le-cuda8") { agent { label 'master' } when { expression { unstash "jenkinsfiles" - h2o4gpuUtils = load "Jenkinsfile.utils" + h2o4gpuUtils = load "ci/Jenkinsfile.utils" echo "benchmark_commit_trigger: ${benchmark_commit_trigger}" return "${doingbenchmark}" == "1" || (("${benchmark_commit_trigger}"=="true" || h2o4gpuUtils.doTriggerBenchmarksJob()) && "${dobenchmark}" == "1" && env.BRANCH_NAME == "master") } diff --git a/ci/Jenkinsfile-ppc64le-cuda9 b/ci/Jenkinsfile-ppc64le-cuda9 new file mode 100644 index 000000000..ad7dc05b4 --- /dev/null +++ b/ci/Jenkinsfile-ppc64le-cuda9 @@ -0,0 +1,268 @@ +#!/usr/bin/groovy + +//################ FILE IS AUTO-GENERATED from .base files +//################ DO NOT MODIFY +//################ See scripts/make_jenkinsfiles.sh + +// TOOD: rename to @Library('h2o-jenkins-pipeline-lib') _ +@Library('test-shared-library') _ + +import ai.h2o.ci.Utils +import static ai.h2o.ci.Utils.banner +def utilsLib = new Utils() +import ai.h2o.ci.BuildInfo + +def commitMessage = '' +def h2o4gpuUtils = null + +def platform = "ppc64le-centos7-cuda9.0" +def BUILDTYPE = "cuda9" +def cuda = "nvidia/cuda-ppc64le:9.0-cudnn7-devel-centos7" +def cudart = "nvidia/cuda-ppc64le:9.0-cudnn7-runtime-centos7" +def extratag = "-cuda9" +def linuxwheel = "ppc64le-centos7-cuda9.whl" +def testtype = "dotest" +def labelbuild = "ibm-power || ibm-power-gpu" +def labeltest = "ibm-power || ibm-power-gpu" +def labelruntime = "ibm-power || ibm-power-gpu" +def doingbenchmark = "0" +def dobenchmark = "0" +def doruntime = "1" +def data_dirs = "-v /home/jenkins/smalldata:/smalldata -v /home/jenkins/open_data:/open_data" +//################ BELOW IS COPY/PASTE of ci/Jenkinsfile.template (except stage names) +def benchmark_commit_trigger + +pipeline { + agent none + + // Setup job options + options { + ansiColor('xterm') + timestamps() + timeout(time: 300, unit: 'MINUTES') + buildDiscarder(logRotator(numToKeepStr: '10')) + disableConcurrentBuilds() + skipDefaultCheckout() + } + + environment { + MAKE_OPTS = "-s CI=1" // -s: silent mode + BUILD_TYPE = "${BUILDTYPE}" + } + + stages { + stage("Git clone on Linux ppc64le-cuda9") { + + agent { + label "${labelbuild}" + } + steps { + dumpInfo 'Linux Build Info' + // Do checkout + retryWithTimeout(200 /* seconds */, 3 /* retries */) { + deleteDir() + checkout([ + $class : 'GitSCM', + branches : scm.branches, + doGenerateSubmoduleConfigurations: false, + extensions : scm.extensions + [[$class: 'SubmoduleOption', disableSubmodules: true, recursiveSubmodules: false, reference: '', trackingSubmodules: false, shallow: true]], + submoduleCfg : [], + userRemoteConfigs : scm.userRemoteConfigs]) + } + script { + h2o4gpuUtils = load "ci/Jenkinsfile.utils" + buildInfo("h2o4gpu", h2o4gpuUtils.isRelease()) + commitMessage = sh(script: 'git log -1 --pretty=%B | tr "\n" " "', returnStdout: true).trim() + echo "Commit Message: ${commitMessage}" + benchmark_commit_trigger = ("${commitMessage}" ==~ /.*trigger_benchmark.*/) + echo "benchmark_commit_trigger: ${benchmark_commit_trigger}" + } + stash includes: "ci/Jenkinsfile*", name: "jenkinsfiles" + } + } + stage("Build on Centos7 ppc64le-cuda9") { + agent { + label "${labelbuild}" + } + when { + expression { + unstash "jenkinsfiles" + h2o4gpuUtils = load "ci/Jenkinsfile.utils" + return "${doingbenchmark}" == "1" || h2o4gpuUtils.doBuild() || h2o4gpuUtils.doTests() + } + } + steps { + // Do checkout + retryWithTimeout(200 /* seconds */, 3 /* retries */) { + deleteDir() + checkout([ + $class : 'GitSCM', + branches : scm.branches, + doGenerateSubmoduleConfigurations: false, + extensions : scm.extensions + [[$class: 'SubmoduleOption', disableSubmodules: true, recursiveSubmodules: false, reference: '', trackingSubmodules: false, shallow: true]], + submoduleCfg : [], + userRemoteConfigs : scm.userRemoteConfigs]) + } + script { + h2o4gpuUtils = load "ci/Jenkinsfile.utils" + h2o4gpuUtils.buildOnLinux("${cuda}", "${extratag}", "${platform}", "${linuxwheel}") + + buildInfo("h2o4gpu", h2o4gpuUtils.isRelease()) + + script { + // Load the version file content + buildInfo.get().setVersion(utilsLib.getCommandOutput("cat build/VERSION.txt")) + utilsLib.setCurrentBuildName(buildInfo.get().getVersion()) + utilsLib.appendBuildDescription("""|Authors: ${buildInfo.get().getAuthorNames().join(" ")} + |Git SHA: ${buildInfo.get().getGitSha().substring(0, 8)} + |""".stripMargin("|")) + } + + } + } + } + + stage("Test | Lint | S3up on Centos7 ppc64le-cuda9") { + agent { + label "${labeltest}" + } + when { + expression { + unstash "jenkinsfiles" + h2o4gpuUtils = load "ci/Jenkinsfile.utils" + return "${doingbenchmark}" == "1" || h2o4gpuUtils.doTests() && (h2o4gpuUtils.rerun_disabled(commitMessage) || !h2o4gpuUtils.wasStageSuccessful("Fast Test Wheel & Pylint & S3up on Linux")) + } + } + steps { + dumpInfo 'Linux Test Info' + // Get source code (should put tests into wheel, then wouldn't have to checkout) + retryWithTimeout(200 /* seconds */, 3 /* retries */) { + deleteDir() + checkout([ + $class : 'GitSCM', + branches : scm.branches, + doGenerateSubmoduleConfigurations: false, + extensions : scm.extensions + [[$class: 'SubmoduleOption', disableSubmodules: true, recursiveSubmodules: false, reference: '', trackingSubmodules: false, shallow: true]], + submoduleCfg : [], + userRemoteConfigs : scm.userRemoteConfigs]) + } + script { + unstash 'version_info' + sh """ + echo "Before Stashed wheel file:" + ls -l src/interface_py/dist/${platform}/ || true + rm -rf src/interface_py/dist/${platform}/ || true + """ + unstash "${linuxwheel}" + sh """ + echo "After Stashed wheel file:" + ls -l src/interface_py/dist/${platform}/ || true + """ + //h2o4gpuUtils.runTests(buildInfo.get(), "${cuda}", "${extratag}", "${platform}", "${testtype}", "${data_dirs}") + } + retryWithTimeout(500 /* seconds */, 5 /* retries */) { + withCredentials([[$class: 'AmazonWebServicesCredentialsBinding', credentialsId: "awsArtifactsUploader"]]) { + script { + h2o4gpuUtils.publishToS3(buildInfo.get(), "${extratag}" , "${platform}") + } + } + } + } + } + stage("Build/Publish Runtime Docker Centos7 ppc64le-cuda9") { + agent { + label "${labelruntime}" + } + when { + expression { + unstash "jenkinsfiles" + h2o4gpuUtils = load "ci/Jenkinsfile.utils" + return "${doruntime}" == "1" && h2o4gpuUtils.doRuntime() + } + } + steps { + dumpInfo 'Linux Build Info' + // Do checkout + retryWithTimeout(200 /* seconds */, 3 /* retries */) { + deleteDir() + checkout([ + $class : 'GitSCM', + branches : scm.branches, + doGenerateSubmoduleConfigurations: false, + extensions : scm.extensions + [[$class: 'SubmoduleOption', disableSubmodules: true, recursiveSubmodules: false, reference: '', trackingSubmodules: false, shallow: true]], + submoduleCfg : [], + userRemoteConfigs : scm.userRemoteConfigs]) + } + script { + sh """ + echo "Before Stashed wheel file:" + ls -l src/interface_py/dist/${platform}/ || true + rm -rf src/interface_py/dist/${platform}/ || true + """ + unstash "${linuxwheel}" + sh """ + echo "After Stashed wheel file:" + ls -l src/interface_py/dist/${platform} || true + """ + unstash 'version_info' + sh 'echo "Stashed version file:" && ls -l build/' + } + script { + h2o4gpuUtils.buildRuntime(buildInfo.get(), "${cudart}", "${platform}", "${extratag}", "${data_dirs}") + } + retryWithTimeout(1000 /* seconds */, 5 /* retries */) { + withCredentials([[$class: 'AmazonWebServicesCredentialsBinding', credentialsId: "awsArtifactsUploader"]]) { + script { + h2o4gpuUtils.publishRuntimeToS3(buildInfo.get(), "${extratag}") + } + } + } + } + } + + stage("Benchmarking Linux ppc64le-cuda9") { + agent { + label 'master' + } + when { + expression { + unstash "jenkinsfiles" + h2o4gpuUtils = load "ci/Jenkinsfile.utils" + echo "benchmark_commit_trigger: ${benchmark_commit_trigger}" + return "${doingbenchmark}" == "1" || (("${benchmark_commit_trigger}"=="true" || h2o4gpuUtils.doTriggerBenchmarksJob()) && "${dobenchmark}" == "1" && env.BRANCH_NAME == "master") + } + } + steps { + script { + utilsLib.appendBuildDescription("BENCH \u2713") + } + + echo banner("Triggering downstream jobs h2o4gpu${extratag}-benchmark : RUNTIME_ID=${buildInfo.get().getVersion()}") + build job: "/h2o4gpu${extratag}-benchmark/${env.BRANCH_NAME}", parameters: [[$class: 'StringParameterValue', name: 'RUNTIME_ID', value: buildInfo.get().getVersion()]], propagate: false, wait: false, quietPeriod: 60 + } + } + + } // end over stages + post { + failure { + node('linux') { + script { + if(env.BRANCH_NAME == "master") { + emailext( + to: "mateusz@h2o.ai, jmckinney@h2o.ai", + subject: "BUILD FAILED: Job '${env.JOB_NAME} [${env.BUILD_NUMBER}]'", + body: '''${JELLY_SCRIPT, template="html_gmail"}''', + attachLog: true, + compressLog: true, + recipientProviders: [ + [$class: 'DevelopersRecipientProvider'], + ] + ) + } + } + } + } + } +} + + diff --git a/Jenkinsfile-cpu b/ci/Jenkinsfile-x86_64-cuda8 similarity index 78% rename from Jenkinsfile-cpu rename to ci/Jenkinsfile-x86_64-cuda8 index e467db57b..c2ab847d6 100644 --- a/Jenkinsfile-cpu +++ b/ci/Jenkinsfile-x86_64-cuda8 @@ -15,32 +15,21 @@ import ai.h2o.ci.BuildInfo def commitMessage = '' def h2o4gpuUtils = null -def dist = "dist8" -def BUILDTYPE = "cpu" -def cuda = "ubuntu:16.04" -def extratag = "-cpu" -def linuxwheel = "linux_whl2" -def testtype = "dotestfast_nonccl" -def labelbuild = "docker && linux" -def labeltest = "docker" -def labelruntime = "docker" +def platform = "x86_64-centos7-cuda8.0" +def BUILDTYPE = "cuda8" +def cuda = "nvidia/cuda:8.0-cudnn5-devel-centos7" +def cudart = "nvidia/cuda:8.0-cudnn5-runtime-centos7" +def extratag = "-cuda8" +def linuxwheel = "x86_64-centos7-cuda8.whl" +def testtype = "dotest" +def labelbuild = "nvidia-docker" +def labeltest = "gpu && nvidia-docker" +def labelruntime = "nvidia-docker" def doingbenchmark = "0" -def dobenchmark = "1" +def dobenchmark = "0" def doruntime = "1" -//################ BELOW IS COPY/PASTE of Jenkinsfile.utils2 (except stage names) -// Just Notes: -// -//def jobnums = [0 , 1 , 2 , 3] -//def tags = ["nccl" , "nonccl" , "nccl" , "nonccl"] -//def cudatags = ["cuda8", "cuda8" , "cuda9" , "cuda9"] -//def dobuilds = [1, 0, 0, 0] -//def dofulltests = [1, 0, 0, 0] -//def dopytests = [1, 0, 0, 0] -//def doruntimes = [1, 1, 1, 1] -//def dockerimagesbuild = ["nvidia/cuda:8.0-cudnn5-devel-ubuntu16.04", "nvidia/cuda:8.0-cudnn5-devel-ubuntu16.04", "nvidia/cuda:9.0-cudnn7-devel-ubuntu16.04", "nvidia/cuda:9.0-cudnn7-devel-ubuntu16.04"] -//def dockerimagesruntime = ["nvidia/cuda:8.0-cudnn5-runtime-ubuntu16.04", "nvidia/cuda:8.0-cudnn5-runtime-ubuntu16.04", "nvidia/cuda:9.0-cudnn7-runtime-ubuntu16.04", "nvidia/cuda:9.0-cudnn7-runtime-ubuntu16.04"] -//def dists = ["dist1","dist2","dist3","dist4"] - +def data_dirs = "-v /home/0xdiag/h2o4gpu/data:/data -v /home/0xdiag/h2o4gpu/open_data:/open_data" +//################ BELOW IS COPY/PASTE of ci/Jenkinsfile.template (except stage names) def benchmark_commit_trigger pipeline { @@ -62,12 +51,7 @@ pipeline { } stages { - ///////////////////////////////////////////////////////////////////// - // - // - // Avoid mr-dl8 and mr-dl10 for build for now due to permission denied issue - ///////////////////////////////////////////////////////////////////// - stage("Git clone on Linux cpu") { + stage("Git clone on Linux x86_64-cuda8") { agent { label "${labelbuild}" @@ -86,25 +70,24 @@ pipeline { userRemoteConfigs : scm.userRemoteConfigs]) } script { - h2o4gpuUtils = load "Jenkinsfile.utils" + h2o4gpuUtils = load "ci/Jenkinsfile.utils" buildInfo("h2o4gpu", h2o4gpuUtils.isRelease()) commitMessage = sh(script: 'git log -1 --pretty=%B | tr "\n" " "', returnStdout: true).trim() echo "Commit Message: ${commitMessage}" benchmark_commit_trigger = ("${commitMessage}" ==~ /.*trigger_benchmark.*/) echo "benchmark_commit_trigger: ${benchmark_commit_trigger}" } - stash includes: "Jenkinsfile*", name: "jenkinsfiles" + stash includes: "ci/Jenkinsfile*", name: "jenkinsfiles" } } - stage("Build Wheel on Linux cpu") { - + stage("Build on Centos7 x86_64-cuda8") { agent { label "${labelbuild}" } when { expression { unstash "jenkinsfiles" - h2o4gpuUtils = load "Jenkinsfile.utils" + h2o4gpuUtils = load "ci/Jenkinsfile.utils" return "${doingbenchmark}" == "1" || h2o4gpuUtils.doBuild() || h2o4gpuUtils.doTests() } } @@ -121,8 +104,8 @@ pipeline { userRemoteConfigs : scm.userRemoteConfigs]) } script { - h2o4gpuUtils = load "Jenkinsfile.utils" - h2o4gpuUtils.buildOnLinux("${cuda}", "${extratag}", "${dist}", "${linuxwheel}") + h2o4gpuUtils = load "ci/Jenkinsfile.utils" + h2o4gpuUtils.buildOnLinux("${cuda}", "${extratag}", "${platform}", "${linuxwheel}") buildInfo("h2o4gpu", h2o4gpuUtils.isRelease()) @@ -139,14 +122,14 @@ pipeline { } } - stage("Test Wheel & Pylint & S3up on Linux cpu") { + stage("Test | Lint | S3up on Centos7 x86_64-cuda8") { agent { label "${labeltest}" } when { expression { unstash "jenkinsfiles" - h2o4gpuUtils = load "Jenkinsfile.utils" + h2o4gpuUtils = load "ci/Jenkinsfile.utils" return "${doingbenchmark}" == "1" || h2o4gpuUtils.doTests() && (h2o4gpuUtils.rerun_disabled(commitMessage) || !h2o4gpuUtils.wasStageSuccessful("Fast Test Wheel & Pylint & S3up on Linux")) } } @@ -154,39 +137,46 @@ pipeline { dumpInfo 'Linux Test Info' // Get source code (should put tests into wheel, then wouldn't have to checkout) retryWithTimeout(200 /* seconds */, 3 /* retries */) { - checkout scm + deleteDir() + checkout([ + $class : 'GitSCM', + branches : scm.branches, + doGenerateSubmoduleConfigurations: false, + extensions : scm.extensions + [[$class: 'SubmoduleOption', disableSubmodules: true, recursiveSubmodules: false, reference: '', trackingSubmodules: false, shallow: true]], + submoduleCfg : [], + userRemoteConfigs : scm.userRemoteConfigs]) } script { unstash 'version_info' sh """ echo "Before Stashed wheel file:" - ls -l src/interface_py/${dist}/ || true - rm -rf src/interface_py/${dist}/ || true + ls -l src/interface_py/dist/${platform}/ || true + rm -rf src/interface_py/dist/${platform}/ || true """ unstash "${linuxwheel}" sh """ echo "After Stashed wheel file:" - ls -l src/interface_py/${dist}/ || true + ls -l src/interface_py/dist/${platform}/ || true """ - h2o4gpuUtils.runTests(buildInfo.get(), "${cuda}", "${extratag}", "${dist}", "${testtype}") + h2o4gpuUtils.runTests(buildInfo.get(), "${cuda}", "${extratag}", "${platform}", "${testtype}", "${data_dirs}") } retryWithTimeout(500 /* seconds */, 5 /* retries */) { withCredentials([[$class: 'AmazonWebServicesCredentialsBinding', credentialsId: "awsArtifactsUploader"]]) { script { - h2o4gpuUtils.publishToS3(buildInfo.get(), "${extratag}" , "${dist}") + h2o4gpuUtils.publishToS3(buildInfo.get(), "${extratag}" , "${platform}") } } } } } - stage("Build/Publish Runtime Docker Linux cpu") { + stage("Build/Publish Runtime Docker Centos7 x86_64-cuda8") { agent { label "${labelruntime}" } when { expression { unstash "jenkinsfiles" - h2o4gpuUtils = load "Jenkinsfile.utils" + h2o4gpuUtils = load "ci/Jenkinsfile.utils" return "${doruntime}" == "1" && h2o4gpuUtils.doRuntime() } } @@ -206,19 +196,19 @@ pipeline { script { sh """ echo "Before Stashed wheel file:" - ls -l src/interface_py/${dist}/ || true - rm -rf src/interface_py/${dist}/ || true + ls -l src/interface_py/dist/${platform}/ || true + rm -rf src/interface_py/dist/${platform}/ || true """ unstash "${linuxwheel}" sh """ echo "After Stashed wheel file:" - ls -l src/interface_py/${dist}/ || true + ls -l src/interface_py/dist/${platform} || true """ unstash 'version_info' sh 'echo "Stashed version file:" && ls -l build/' } script { - h2o4gpuUtils.buildRuntime(buildInfo.get(), "${cudart}", "${dist}", "${extratag}") + h2o4gpuUtils.buildRuntime(buildInfo.get(), "${cudart}", "${platform}", "${extratag}", "${data_dirs}") } retryWithTimeout(1000 /* seconds */, 5 /* retries */) { withCredentials([[$class: 'AmazonWebServicesCredentialsBinding', credentialsId: "awsArtifactsUploader"]]) { @@ -230,14 +220,14 @@ pipeline { } } - stage("Benchmarking Linux cpu") { + stage("Benchmarking Linux x86_64-cuda8") { agent { label 'master' } when { expression { unstash "jenkinsfiles" - h2o4gpuUtils = load "Jenkinsfile.utils" + h2o4gpuUtils = load "ci/Jenkinsfile.utils" echo "benchmark_commit_trigger: ${benchmark_commit_trigger}" return "${doingbenchmark}" == "1" || (("${benchmark_commit_trigger}"=="true" || h2o4gpuUtils.doTriggerBenchmarksJob()) && "${dobenchmark}" == "1" && env.BRANCH_NAME == "master") } diff --git a/Jenkinsfile-nccl-cuda9 b/ci/Jenkinsfile-x86_64-cuda9 similarity index 77% rename from Jenkinsfile-nccl-cuda9 rename to ci/Jenkinsfile-x86_64-cuda9 index 4d53ff2f3..ff00fa44f 100644 --- a/Jenkinsfile-nccl-cuda9 +++ b/ci/Jenkinsfile-x86_64-cuda9 @@ -15,34 +15,21 @@ import ai.h2o.ci.BuildInfo def commitMessage = '' def h2o4gpuUtils = null -def dist = "dist4" -def BUILDTYPE = "nccl-cuda9" -def cuda = "nvidia/cuda:9.0-cudnn7-devel-ubuntu16.04" -def cudart = "nvidia/cuda:9.0-cudnn7-runtime-ubuntu16.04" -def extratag = "-nccl-cuda9" -def linuxwheel = "linux_whl4" +def platform = "x86_64-centos7-cuda9.0" +def BUILDTYPE = "cuda9" +def cuda = "nvidia/cuda:9.0-cudnn7-devel-centos7" +def cudart = "nvidia/cuda:9.0-cudnn7-runtime-centos7" +def extratag = "-cuda9" +def linuxwheel = "x86_64-centos7-cuda9.whl" def testtype = "dotest" -def labelbuild = "nvidia-docker && (mr-dl11 || mr-dl16 || mr-dl10)" -def labeltest = "gpu && nvidia-docker && (mr-dl11 || mr-dl16 || mr-dl10)" +def labelbuild = "nvidia-docker" +def labeltest = "gpu && nvidia-docker" def labelruntime = "nvidia-docker" def doingbenchmark = "0" -def dobenchmark = "1" +def dobenchmark = "0" def doruntime = "1" - -//################ BELOW IS COPY/PASTE of Jenkinsfile.utils2 (except stage names) -// Just Notes: -// -//def jobnums = [0 , 1 , 2 , 3] -//def tags = ["nccl" , "nonccl" , "nccl" , "nonccl"] -//def cudatags = ["cuda8", "cuda8" , "cuda9" , "cuda9"] -//def dobuilds = [1, 0, 0, 0] -//def dofulltests = [1, 0, 0, 0] -//def dopytests = [1, 0, 0, 0] -//def doruntimes = [1, 1, 1, 1] -//def dockerimagesbuild = ["nvidia/cuda:8.0-cudnn5-devel-ubuntu16.04", "nvidia/cuda:8.0-cudnn5-devel-ubuntu16.04", "nvidia/cuda:9.0-cudnn7-devel-ubuntu16.04", "nvidia/cuda:9.0-cudnn7-devel-ubuntu16.04"] -//def dockerimagesruntime = ["nvidia/cuda:8.0-cudnn5-runtime-ubuntu16.04", "nvidia/cuda:8.0-cudnn5-runtime-ubuntu16.04", "nvidia/cuda:9.0-cudnn7-runtime-ubuntu16.04", "nvidia/cuda:9.0-cudnn7-runtime-ubuntu16.04"] -//def dists = ["dist1","dist2","dist3","dist4"] - +def data_dirs = "-v /home/0xdiag/h2o4gpu/data:/data -v /home/0xdiag/h2o4gpu/open_data:/open_data" +//################ BELOW IS COPY/PASTE of ci/Jenkinsfile.template (except stage names) def benchmark_commit_trigger pipeline { @@ -64,12 +51,7 @@ pipeline { } stages { - ///////////////////////////////////////////////////////////////////// - // - // - // Avoid mr-dl8 and mr-dl10 for build for now due to permission denied issue - ///////////////////////////////////////////////////////////////////// - stage("Git clone on Linux nccl-cuda9") { + stage("Git clone on Linux x86_64-cuda9") { agent { label "${labelbuild}" @@ -88,25 +70,24 @@ pipeline { userRemoteConfigs : scm.userRemoteConfigs]) } script { - h2o4gpuUtils = load "Jenkinsfile.utils" + h2o4gpuUtils = load "ci/Jenkinsfile.utils" buildInfo("h2o4gpu", h2o4gpuUtils.isRelease()) commitMessage = sh(script: 'git log -1 --pretty=%B | tr "\n" " "', returnStdout: true).trim() echo "Commit Message: ${commitMessage}" benchmark_commit_trigger = ("${commitMessage}" ==~ /.*trigger_benchmark.*/) echo "benchmark_commit_trigger: ${benchmark_commit_trigger}" } - stash includes: "Jenkinsfile*", name: "jenkinsfiles" + stash includes: "ci/Jenkinsfile*", name: "jenkinsfiles" } } - stage("Build Wheel on Linux nccl-cuda9") { - + stage("Build on Centos7 x86_64-cuda9") { agent { label "${labelbuild}" } when { expression { unstash "jenkinsfiles" - h2o4gpuUtils = load "Jenkinsfile.utils" + h2o4gpuUtils = load "ci/Jenkinsfile.utils" return "${doingbenchmark}" == "1" || h2o4gpuUtils.doBuild() || h2o4gpuUtils.doTests() } } @@ -123,8 +104,8 @@ pipeline { userRemoteConfigs : scm.userRemoteConfigs]) } script { - h2o4gpuUtils = load "Jenkinsfile.utils" - h2o4gpuUtils.buildOnLinux("${cuda}", "${extratag}", "${dist}", "${linuxwheel}") + h2o4gpuUtils = load "ci/Jenkinsfile.utils" + h2o4gpuUtils.buildOnLinux("${cuda}", "${extratag}", "${platform}", "${linuxwheel}") buildInfo("h2o4gpu", h2o4gpuUtils.isRelease()) @@ -141,14 +122,14 @@ pipeline { } } - stage("Test Wheel & Pylint & S3up on Linux nccl-cuda9") { + stage("Test | Lint | S3up on Centos7 x86_64-cuda9") { agent { label "${labeltest}" } when { expression { unstash "jenkinsfiles" - h2o4gpuUtils = load "Jenkinsfile.utils" + h2o4gpuUtils = load "ci/Jenkinsfile.utils" return "${doingbenchmark}" == "1" || h2o4gpuUtils.doTests() && (h2o4gpuUtils.rerun_disabled(commitMessage) || !h2o4gpuUtils.wasStageSuccessful("Fast Test Wheel & Pylint & S3up on Linux")) } } @@ -156,39 +137,46 @@ pipeline { dumpInfo 'Linux Test Info' // Get source code (should put tests into wheel, then wouldn't have to checkout) retryWithTimeout(200 /* seconds */, 3 /* retries */) { - checkout scm + deleteDir() + checkout([ + $class : 'GitSCM', + branches : scm.branches, + doGenerateSubmoduleConfigurations: false, + extensions : scm.extensions + [[$class: 'SubmoduleOption', disableSubmodules: true, recursiveSubmodules: false, reference: '', trackingSubmodules: false, shallow: true]], + submoduleCfg : [], + userRemoteConfigs : scm.userRemoteConfigs]) } script { unstash 'version_info' sh """ echo "Before Stashed wheel file:" - ls -l src/interface_py/${dist}/ || true - rm -rf src/interface_py/${dist}/ || true + ls -l src/interface_py/dist/${platform}/ || true + rm -rf src/interface_py/dist/${platform}/ || true """ unstash "${linuxwheel}" sh """ echo "After Stashed wheel file:" - ls -l src/interface_py/${dist}/ || true + ls -l src/interface_py/dist/${platform}/ || true """ - h2o4gpuUtils.runTests(buildInfo.get(), "${cuda}", "${extratag}", "${dist}", "${testtype}") + h2o4gpuUtils.runTests(buildInfo.get(), "${cuda}", "${extratag}", "${platform}", "${testtype}", "${data_dirs}") } retryWithTimeout(500 /* seconds */, 5 /* retries */) { withCredentials([[$class: 'AmazonWebServicesCredentialsBinding', credentialsId: "awsArtifactsUploader"]]) { script { - h2o4gpuUtils.publishToS3(buildInfo.get(), "${extratag}" , "${dist}") + h2o4gpuUtils.publishToS3(buildInfo.get(), "${extratag}" , "${platform}") } } } } } - stage("Build/Publish Runtime Docker Linux nccl-cuda9") { + stage("Build/Publish Runtime Docker Centos7 x86_64-cuda9") { agent { label "${labelruntime}" } when { expression { unstash "jenkinsfiles" - h2o4gpuUtils = load "Jenkinsfile.utils" + h2o4gpuUtils = load "ci/Jenkinsfile.utils" return "${doruntime}" == "1" && h2o4gpuUtils.doRuntime() } } @@ -208,19 +196,19 @@ pipeline { script { sh """ echo "Before Stashed wheel file:" - ls -l src/interface_py/${dist}/ || true - rm -rf src/interface_py/${dist}/ || true + ls -l src/interface_py/dist/${platform}/ || true + rm -rf src/interface_py/dist/${platform}/ || true """ unstash "${linuxwheel}" sh """ echo "After Stashed wheel file:" - ls -l src/interface_py/${dist}/ || true + ls -l src/interface_py/dist/${platform} || true """ unstash 'version_info' sh 'echo "Stashed version file:" && ls -l build/' } script { - h2o4gpuUtils.buildRuntime(buildInfo.get(), "${cudart}", "${dist}", "${extratag}") + h2o4gpuUtils.buildRuntime(buildInfo.get(), "${cudart}", "${platform}", "${extratag}", "${data_dirs}") } retryWithTimeout(1000 /* seconds */, 5 /* retries */) { withCredentials([[$class: 'AmazonWebServicesCredentialsBinding', credentialsId: "awsArtifactsUploader"]]) { @@ -232,14 +220,14 @@ pipeline { } } - stage("Benchmarking Linux nccl-cuda9") { + stage("Benchmarking Linux x86_64-cuda9") { agent { label 'master' } when { expression { unstash "jenkinsfiles" - h2o4gpuUtils = load "Jenkinsfile.utils" + h2o4gpuUtils = load "ci/Jenkinsfile.utils" echo "benchmark_commit_trigger: ${benchmark_commit_trigger}" return "${doingbenchmark}" == "1" || (("${benchmark_commit_trigger}"=="true" || h2o4gpuUtils.doTriggerBenchmarksJob()) && "${dobenchmark}" == "1" && env.BRANCH_NAME == "master") } diff --git a/Jenkinsfile-nccl-cuda8 b/ci/Jenkinsfile-x86_64-cuda92 similarity index 77% rename from Jenkinsfile-nccl-cuda8 rename to ci/Jenkinsfile-x86_64-cuda92 index 61db36def..4dab0d862 100644 --- a/Jenkinsfile-nccl-cuda8 +++ b/ci/Jenkinsfile-x86_64-cuda92 @@ -15,34 +15,21 @@ import ai.h2o.ci.BuildInfo def commitMessage = '' def h2o4gpuUtils = null -def dist = "dist1" -def BUILDTYPE = "nccl-cuda8" -def cuda = "nvidia/cuda:8.0-cudnn5-devel-ubuntu16.04" -def cudart = "nvidia/cuda:8.0-cudnn5-runtime-ubuntu16.04" -def extratag = "-nccl-cuda8" -def linuxwheel = "linux_whl1" -def testtype = "dotestfast" -def labelbuild = "nvidia-docker && (mr-dl11 || mr-dl16 || mr-dl10)" -def labeltest = "gpu && nvidia-docker && (mr-dl11 || mr-dl16 || mr-dl10)" +def platform = "x86_64-centos7-cuda9.2" +def BUILDTYPE = "cuda92" +def cuda = "nvidia/cuda:9.2-cudnn7-devel-centos7" +def cudart = "nvidia/cuda:9.2-cudnn7-runtime-centos7" +def extratag = "-cuda92" +def linuxwheel = "x86_64-centos7-cuda92.whl" +def testtype = "dotest" +def labelbuild = "nvidia-docker" +def labeltest = "gpu && nvidia-docker" def labelruntime = "nvidia-docker" def doingbenchmark = "0" def dobenchmark = "0" def doruntime = "1" - -//################ BELOW IS COPY/PASTE of Jenkinsfile.utils2 (except stage names) -// Just Notes: -// -//def jobnums = [0 , 1 , 2 , 3] -//def tags = ["nccl" , "nonccl" , "nccl" , "nonccl"] -//def cudatags = ["cuda8", "cuda8" , "cuda9" , "cuda9"] -//def dobuilds = [1, 0, 0, 0] -//def dofulltests = [1, 0, 0, 0] -//def dopytests = [1, 0, 0, 0] -//def doruntimes = [1, 1, 1, 1] -//def dockerimagesbuild = ["nvidia/cuda:8.0-cudnn5-devel-ubuntu16.04", "nvidia/cuda:8.0-cudnn5-devel-ubuntu16.04", "nvidia/cuda:9.0-cudnn7-devel-ubuntu16.04", "nvidia/cuda:9.0-cudnn7-devel-ubuntu16.04"] -//def dockerimagesruntime = ["nvidia/cuda:8.0-cudnn5-runtime-ubuntu16.04", "nvidia/cuda:8.0-cudnn5-runtime-ubuntu16.04", "nvidia/cuda:9.0-cudnn7-runtime-ubuntu16.04", "nvidia/cuda:9.0-cudnn7-runtime-ubuntu16.04"] -//def dists = ["dist1","dist2","dist3","dist4"] - +def data_dirs = "-v /home/0xdiag/h2o4gpu/data:/data -v /home/0xdiag/h2o4gpu/open_data:/open_data" +//################ BELOW IS COPY/PASTE of ci/Jenkinsfile.template (except stage names) def benchmark_commit_trigger pipeline { @@ -64,12 +51,7 @@ pipeline { } stages { - ///////////////////////////////////////////////////////////////////// - // - // - // Avoid mr-dl8 and mr-dl10 for build for now due to permission denied issue - ///////////////////////////////////////////////////////////////////// - stage("Git clone on Linux nccl-cuda8") { + stage("Git clone on Linux x86_64-cuda92") { agent { label "${labelbuild}" @@ -88,25 +70,24 @@ pipeline { userRemoteConfigs : scm.userRemoteConfigs]) } script { - h2o4gpuUtils = load "Jenkinsfile.utils" + h2o4gpuUtils = load "ci/Jenkinsfile.utils" buildInfo("h2o4gpu", h2o4gpuUtils.isRelease()) commitMessage = sh(script: 'git log -1 --pretty=%B | tr "\n" " "', returnStdout: true).trim() echo "Commit Message: ${commitMessage}" benchmark_commit_trigger = ("${commitMessage}" ==~ /.*trigger_benchmark.*/) echo "benchmark_commit_trigger: ${benchmark_commit_trigger}" } - stash includes: "Jenkinsfile*", name: "jenkinsfiles" + stash includes: "ci/Jenkinsfile*", name: "jenkinsfiles" } } - stage("Build Wheel on Linux nccl-cuda8") { - + stage("Build on Centos7 x86_64-cuda92") { agent { label "${labelbuild}" } when { expression { unstash "jenkinsfiles" - h2o4gpuUtils = load "Jenkinsfile.utils" + h2o4gpuUtils = load "ci/Jenkinsfile.utils" return "${doingbenchmark}" == "1" || h2o4gpuUtils.doBuild() || h2o4gpuUtils.doTests() } } @@ -123,8 +104,8 @@ pipeline { userRemoteConfigs : scm.userRemoteConfigs]) } script { - h2o4gpuUtils = load "Jenkinsfile.utils" - h2o4gpuUtils.buildOnLinux("${cuda}", "${extratag}", "${dist}", "${linuxwheel}") + h2o4gpuUtils = load "ci/Jenkinsfile.utils" + h2o4gpuUtils.buildOnLinux("${cuda}", "${extratag}", "${platform}", "${linuxwheel}") buildInfo("h2o4gpu", h2o4gpuUtils.isRelease()) @@ -141,14 +122,14 @@ pipeline { } } - stage("Test Wheel & Pylint & S3up on Linux nccl-cuda8") { + stage("Test | Lint | S3up on Centos7 x86_64-cuda92") { agent { label "${labeltest}" } when { expression { unstash "jenkinsfiles" - h2o4gpuUtils = load "Jenkinsfile.utils" + h2o4gpuUtils = load "ci/Jenkinsfile.utils" return "${doingbenchmark}" == "1" || h2o4gpuUtils.doTests() && (h2o4gpuUtils.rerun_disabled(commitMessage) || !h2o4gpuUtils.wasStageSuccessful("Fast Test Wheel & Pylint & S3up on Linux")) } } @@ -156,39 +137,46 @@ pipeline { dumpInfo 'Linux Test Info' // Get source code (should put tests into wheel, then wouldn't have to checkout) retryWithTimeout(200 /* seconds */, 3 /* retries */) { - checkout scm + deleteDir() + checkout([ + $class : 'GitSCM', + branches : scm.branches, + doGenerateSubmoduleConfigurations: false, + extensions : scm.extensions + [[$class: 'SubmoduleOption', disableSubmodules: true, recursiveSubmodules: false, reference: '', trackingSubmodules: false, shallow: true]], + submoduleCfg : [], + userRemoteConfigs : scm.userRemoteConfigs]) } script { unstash 'version_info' sh """ echo "Before Stashed wheel file:" - ls -l src/interface_py/${dist}/ || true - rm -rf src/interface_py/${dist}/ || true + ls -l src/interface_py/dist/${platform}/ || true + rm -rf src/interface_py/dist/${platform}/ || true """ unstash "${linuxwheel}" sh """ echo "After Stashed wheel file:" - ls -l src/interface_py/${dist}/ || true + ls -l src/interface_py/dist/${platform}/ || true """ - h2o4gpuUtils.runTests(buildInfo.get(), "${cuda}", "${extratag}", "${dist}", "${testtype}") + h2o4gpuUtils.runTests(buildInfo.get(), "${cuda}", "${extratag}", "${platform}", "${testtype}", "${data_dirs}") } retryWithTimeout(500 /* seconds */, 5 /* retries */) { withCredentials([[$class: 'AmazonWebServicesCredentialsBinding', credentialsId: "awsArtifactsUploader"]]) { script { - h2o4gpuUtils.publishToS3(buildInfo.get(), "${extratag}" , "${dist}") + h2o4gpuUtils.publishToS3(buildInfo.get(), "${extratag}" , "${platform}") } } } } } - stage("Build/Publish Runtime Docker Linux nccl-cuda8") { + stage("Build/Publish Runtime Docker Centos7 x86_64-cuda92") { agent { label "${labelruntime}" } when { expression { unstash "jenkinsfiles" - h2o4gpuUtils = load "Jenkinsfile.utils" + h2o4gpuUtils = load "ci/Jenkinsfile.utils" return "${doruntime}" == "1" && h2o4gpuUtils.doRuntime() } } @@ -208,19 +196,19 @@ pipeline { script { sh """ echo "Before Stashed wheel file:" - ls -l src/interface_py/${dist}/ || true - rm -rf src/interface_py/${dist}/ || true + ls -l src/interface_py/dist/${platform}/ || true + rm -rf src/interface_py/dist/${platform}/ || true """ unstash "${linuxwheel}" sh """ echo "After Stashed wheel file:" - ls -l src/interface_py/${dist}/ || true + ls -l src/interface_py/dist/${platform} || true """ unstash 'version_info' sh 'echo "Stashed version file:" && ls -l build/' } script { - h2o4gpuUtils.buildRuntime(buildInfo.get(), "${cudart}", "${dist}", "${extratag}") + h2o4gpuUtils.buildRuntime(buildInfo.get(), "${cudart}", "${platform}", "${extratag}", "${data_dirs}") } retryWithTimeout(1000 /* seconds */, 5 /* retries */) { withCredentials([[$class: 'AmazonWebServicesCredentialsBinding', credentialsId: "awsArtifactsUploader"]]) { @@ -232,14 +220,14 @@ pipeline { } } - stage("Benchmarking Linux nccl-cuda8") { + stage("Benchmarking Linux x86_64-cuda92") { agent { label 'master' } when { expression { unstash "jenkinsfiles" - h2o4gpuUtils = load "Jenkinsfile.utils" + h2o4gpuUtils = load "ci/Jenkinsfile.utils" echo "benchmark_commit_trigger: ${benchmark_commit_trigger}" return "${doingbenchmark}" == "1" || (("${benchmark_commit_trigger}"=="true" || h2o4gpuUtils.doTriggerBenchmarksJob()) && "${dobenchmark}" == "1" && env.BRANCH_NAME == "master") } diff --git a/Jenkinsfile.utils2 b/ci/Jenkinsfile.template similarity index 80% rename from Jenkinsfile.utils2 rename to ci/Jenkinsfile.template index 3bce21ade..376d437d5 100644 --- a/Jenkinsfile.utils2 +++ b/ci/Jenkinsfile.template @@ -1,16 +1,3 @@ -// Just Notes: -// -//def jobnums = [0 , 1 , 2 , 3] -//def tags = ["nccl" , "nonccl" , "nccl" , "nonccl"] -//def cudatags = ["cuda8", "cuda8" , "cuda9" , "cuda9"] -//def dobuilds = [1, 0, 0, 0] -//def dofulltests = [1, 0, 0, 0] -//def dopytests = [1, 0, 0, 0] -//def doruntimes = [1, 1, 1, 1] -//def dockerimagesbuild = ["nvidia/cuda:8.0-cudnn5-devel-ubuntu16.04", "nvidia/cuda:8.0-cudnn5-devel-ubuntu16.04", "nvidia/cuda:9.0-cudnn7-devel-ubuntu16.04", "nvidia/cuda:9.0-cudnn7-devel-ubuntu16.04"] -//def dockerimagesruntime = ["nvidia/cuda:8.0-cudnn5-runtime-ubuntu16.04", "nvidia/cuda:8.0-cudnn5-runtime-ubuntu16.04", "nvidia/cuda:9.0-cudnn7-runtime-ubuntu16.04", "nvidia/cuda:9.0-cudnn7-runtime-ubuntu16.04"] -//def dists = ["dist1","dist2","dist3","dist4"] - def benchmark_commit_trigger pipeline { @@ -32,11 +19,6 @@ pipeline { } stages { - ///////////////////////////////////////////////////////////////////// - // - // - // Avoid mr-dl8 and mr-dl10 for build for now due to permission denied issue - ///////////////////////////////////////////////////////////////////// stage("Git clone on Linux") { agent { @@ -56,25 +38,24 @@ pipeline { userRemoteConfigs : scm.userRemoteConfigs]) } script { - h2o4gpuUtils = load "Jenkinsfile.utils" + h2o4gpuUtils = load "ci/Jenkinsfile.utils" buildInfo("h2o4gpu", h2o4gpuUtils.isRelease()) commitMessage = sh(script: 'git log -1 --pretty=%B | tr "\n" " "', returnStdout: true).trim() echo "Commit Message: ${commitMessage}" benchmark_commit_trigger = ("${commitMessage}" ==~ /.*trigger_benchmark.*/) echo "benchmark_commit_trigger: ${benchmark_commit_trigger}" } - stash includes: "Jenkinsfile*", name: "jenkinsfiles" + stash includes: "ci/Jenkinsfile*", name: "jenkinsfiles" } } - stage("Build Wheel on Linux") { - + stage("Build on Centos7") { agent { label "${labelbuild}" } when { expression { unstash "jenkinsfiles" - h2o4gpuUtils = load "Jenkinsfile.utils" + h2o4gpuUtils = load "ci/Jenkinsfile.utils" return "${doingbenchmark}" == "1" || h2o4gpuUtils.doBuild() || h2o4gpuUtils.doTests() } } @@ -91,8 +72,8 @@ pipeline { userRemoteConfigs : scm.userRemoteConfigs]) } script { - h2o4gpuUtils = load "Jenkinsfile.utils" - h2o4gpuUtils.buildOnLinux("${cuda}", "${extratag}", "${dist}", "${linuxwheel}") + h2o4gpuUtils = load "ci/Jenkinsfile.utils" + h2o4gpuUtils.buildOnLinux("${cuda}", "${extratag}", "${platform}", "${linuxwheel}") buildInfo("h2o4gpu", h2o4gpuUtils.isRelease()) @@ -109,14 +90,14 @@ pipeline { } } - stage("Test Wheel & Pylint & S3up on Linux") { + stage("Test | Lint | S3up on Centos7") { agent { label "${labeltest}" } when { expression { unstash "jenkinsfiles" - h2o4gpuUtils = load "Jenkinsfile.utils" + h2o4gpuUtils = load "ci/Jenkinsfile.utils" return "${doingbenchmark}" == "1" || h2o4gpuUtils.doTests() && (h2o4gpuUtils.rerun_disabled(commitMessage) || !h2o4gpuUtils.wasStageSuccessful("Fast Test Wheel & Pylint & S3up on Linux")) } } @@ -124,39 +105,46 @@ pipeline { dumpInfo 'Linux Test Info' // Get source code (should put tests into wheel, then wouldn't have to checkout) retryWithTimeout(200 /* seconds */, 3 /* retries */) { - checkout scm + deleteDir() + checkout([ + $class : 'GitSCM', + branches : scm.branches, + doGenerateSubmoduleConfigurations: false, + extensions : scm.extensions + [[$class: 'SubmoduleOption', disableSubmodules: true, recursiveSubmodules: false, reference: '', trackingSubmodules: false, shallow: true]], + submoduleCfg : [], + userRemoteConfigs : scm.userRemoteConfigs]) } script { unstash 'version_info' sh """ echo "Before Stashed wheel file:" - ls -l src/interface_py/${dist}/ || true - rm -rf src/interface_py/${dist}/ || true + ls -l src/interface_py/dist/${platform}/ || true + rm -rf src/interface_py/dist/${platform}/ || true """ unstash "${linuxwheel}" sh """ echo "After Stashed wheel file:" - ls -l src/interface_py/${dist}/ || true + ls -l src/interface_py/dist/${platform}/ || true """ - h2o4gpuUtils.runTests(buildInfo.get(), "${cuda}", "${extratag}", "${dist}", "${testtype}") + h2o4gpuUtils.runTests(buildInfo.get(), "${cuda}", "${extratag}", "${platform}", "${testtype}", "${data_dirs}") } retryWithTimeout(500 /* seconds */, 5 /* retries */) { withCredentials([[$class: 'AmazonWebServicesCredentialsBinding', credentialsId: "awsArtifactsUploader"]]) { script { - h2o4gpuUtils.publishToS3(buildInfo.get(), "${extratag}" , "${dist}") + h2o4gpuUtils.publishToS3(buildInfo.get(), "${extratag}" , "${platform}") } } } } } - stage("Build/Publish Runtime Docker Linux") { + stage("Build/Publish Runtime Docker Centos7") { agent { label "${labelruntime}" } when { expression { unstash "jenkinsfiles" - h2o4gpuUtils = load "Jenkinsfile.utils" + h2o4gpuUtils = load "ci/Jenkinsfile.utils" return "${doruntime}" == "1" && h2o4gpuUtils.doRuntime() } } @@ -176,19 +164,19 @@ pipeline { script { sh """ echo "Before Stashed wheel file:" - ls -l src/interface_py/${dist}/ || true - rm -rf src/interface_py/${dist}/ || true + ls -l src/interface_py/dist/${platform}/ || true + rm -rf src/interface_py/dist/${platform}/ || true """ unstash "${linuxwheel}" sh """ echo "After Stashed wheel file:" - ls -l src/interface_py/${dist}/ || true + ls -l src/interface_py/dist/${platform} || true """ unstash 'version_info' sh 'echo "Stashed version file:" && ls -l build/' } script { - h2o4gpuUtils.buildRuntime(buildInfo.get(), "${cudart}", "${dist}", "${extratag}") + h2o4gpuUtils.buildRuntime(buildInfo.get(), "${cudart}", "${platform}", "${extratag}", "${data_dirs}") } retryWithTimeout(1000 /* seconds */, 5 /* retries */) { withCredentials([[$class: 'AmazonWebServicesCredentialsBinding', credentialsId: "awsArtifactsUploader"]]) { @@ -207,7 +195,7 @@ pipeline { when { expression { unstash "jenkinsfiles" - h2o4gpuUtils = load "Jenkinsfile.utils" + h2o4gpuUtils = load "ci/Jenkinsfile.utils" echo "benchmark_commit_trigger: ${benchmark_commit_trigger}" return "${doingbenchmark}" == "1" || (("${benchmark_commit_trigger}"=="true" || h2o4gpuUtils.doTriggerBenchmarksJob()) && "${dobenchmark}" == "1" && env.BRANCH_NAME == "master") } diff --git a/Jenkinsfile.utils b/ci/Jenkinsfile.utils similarity index 78% rename from Jenkinsfile.utils rename to ci/Jenkinsfile.utils index 8b6d7d1dd..e9f0044cf 100644 --- a/Jenkinsfile.utils +++ b/ci/Jenkinsfile.utils @@ -68,14 +68,16 @@ String changeId() { return "-master" } -void publishToS3(BuildInfo buildInfo, String extratag, String dist) { +void publishToS3(BuildInfo buildInfo, String extratag, String platform) { echo "Publishing artifact to S3" + def buildArch = platform.split('-')[0] + def versionTag = buildInfo.getVersion() def majorVersionTag = buildInfo.getMajorVersion() def artifactId = "h2o4gpu" - def artifact = "${artifactId}-${versionTag}-cp36-cp36m-linux_x86_64.whl" - def localArtifact = "src/interface_py/${dist}/${artifact}" + def artifact = "${artifactId}-${versionTag}-cp36-cp36m-linux_${buildArch}.whl" + def localArtifact = "src/interface_py/dist/${platform}/${artifact}" sh 'echo "S3 defs: $versionTag $artifactId $artifact $localArtifact" ' @@ -83,19 +85,34 @@ void publishToS3(BuildInfo buildInfo, String extratag, String dist) { def bucket = "s3://h2o-release/h2o4gpu/snapshots/ai/h2o/${artifactId}/${majorVersionTag}${extratag}/" sh "s3cmd put ${localArtifact} ${bucket}" sh "s3cmd setacl --acl-public ${bucket}${artifact}" + sh "s3cmd del ${bucket}${localArtifact}" if (isRelease()) { bucket = "s3://h2o-release/h2o4gpu/releases/stable/ai/h2o/${artifactId}/${majorVersionTag}${extratag}/" sh "s3cmd put ${localArtifact} ${bucket}" sh "s3cmd setacl --acl-public ${bucket}${artifact}" + + // Temporarily also push to a bucket containing build_id, in the long run only this upload should stay + // the above one should get deprecated + build_id_bucket = "s3://artifacts.h2o.ai/releases/ai/h2o/${artifactId}/${env.BRANCH_NAME}/${env.BUILD_ID}/${platform}${extratag}/" + sh "s3cmd put ${localArtifact} ${build_id_bucket}" + sh "s3cmd setacl --acl-public ${build_id_bucket}${artifact}" } if (isBleedingEdge()) { bucket = "s3://h2o-release/h2o4gpu/releases/bleeding-edge/ai/h2o/${artifactId}/${majorVersionTag}${extratag}/" def nonLocalVersionTag = versionTag.split('\\+')[0] - def bleedingEdgeArtifact = "${artifactId}-${nonLocalVersionTag}-cp36-cp36m-linux_x86_64.whl" + def bleedingEdgeArtifact = "${artifactId}-${nonLocalVersionTag}-cp36-cp36m-linux_${buildArch}.whl" + sh "s3cmd put ${localArtifact} ${bucket}${bleedingEdgeArtifact}" sh "s3cmd setacl --acl-public ${bucket}${bleedingEdgeArtifact}" + + // Temporarily also push to a bucket containing build_id, in the long run only this upload should stay + // the above one should get deprecated + build_id_bucket = "s3://artifacts.h2o.ai/snapshots/ai/h2o/${artifactId}/${versionTag}/${platform}${extratag}/" + sh "s3cmd put ${localArtifact} ${build_id_bucket}${bleedingEdgeArtifact}" + sh "s3cmd setacl --acl-public ${build_id_bucket}${bleedingEdgeArtifact}" + } } @@ -130,50 +147,62 @@ void publishRuntimeToS3(BuildInfo buildInfo,String extratag) { } } -void runTests(BuildInfo buildInfo, String dockerimage, String extratag, String dist, String target) { +void runTests(BuildInfo buildInfo, String dockerimage, String extratag, String platform, String target, String data_dirs) { echo "Running tests" try { sh """ - CONTAINER_NAME=${CONTAINER_NAME} extratag=${extratag} dockerimage=${dockerimage} target=${target} dist=${dist} ./scripts/make-docker-runtests.sh + DATA_DIRS="${data_dirs}" \ + CONTAINER_NAME=${CONTAINER_NAME} \ + extratag=${extratag} \ + dockerimage=${dockerimage} \ + target=${target} \ + platform=${platform} ./scripts/make-docker-runtests.sh """ currentBuild.result = "SUCCESS" } catch (error) { currentBuild.result = "FAILURE" throw error } finally { - sh "nvidia-docker stop ${CONTAINER_NAME} || true" + sh "docker stop ${CONTAINER_NAME} || true" // if snapshot and using buildID or hash in docker image, need to rm that container and image here. arch 'tmp/*.log' arch 'results/*.dat' - junit testResults: 'build/test-reports/*.xml', keepLongStdio: true, allowEmptyResults: false + junit testResults: 'build/test-reports/*.xml', keepLongStdio: true, allowEmptyResults: true } } -void buildOnLinux(String dockerimage, String extratag, String dist, String stashName) { +void buildOnLinux(String dockerimage, String extratag, String platform, String stashName) { echo "Building on linux" withCredentials([[$class: 'AmazonWebServicesCredentialsBinding', credentialsId: "awsArtifactsUploader"]]) { try { sh """ - CONTAINER_NAME=${CONTAINER_NAME} extratag=${extratag} dockerimage=${dockerimage} H2O4GPU_BUILD=${env.BUILD_ID} H2O4GPU_SUFFIX=${isRelease() ? "" : "+" + ciVersionSuffix()} makeopts=${env.MAKE_OPTS} dist=${dist} ./scripts/make-docker-devel.sh + git clean -f -d -x + CONTAINER_NAME=${CONTAINER_NAME} \ + extratag=${extratag} \ + dockerimage=${dockerimage} \ + H2O4GPU_BUILD=${env.BUILD_ID} \ + H2O4GPU_SUFFIX=${isRelease() ? "" : "+" + ciVersionSuffix()} \ + makeopts=${env.MAKE_OPTS} \ + platform=${platform} ./scripts/make-docker-devel.sh """ - stash includes: "src/interface_py/${dist}/*h2o4gpu-*.whl", name: stashName + stash includes: "src/interface_py/dist/${platform}/*h2o4gpu-*.whl", name: stashName stash includes: 'build/VERSION.txt', name: 'version_info' sh "echo \"Archive artifacts\"" - arch "src/interface_py/${dist}/*h2o4gpu-*.whl" + arch "src/interface_py/dist/${platform}/*h2o4gpu-*.whl" currentBuild.result = "SUCCESS" } catch (error) { currentBuild.result = "FAILURE" throw error } finally { - sh "nvidia-docker stop ${CONTAINER_NAME} || true" + sh "docker stop ${CONTAINER_NAME} || true" // if snapshot and using buildID or hash in docker image, need to rm that container and image here. } } } -void buildRuntime(BuildInfo buildInfo, String dockerimage, String dist, String extratag) { +void buildRuntime(BuildInfo buildInfo, String dockerimage, String platform, String extratag, String data_dirs) { echo "Building runtime" def buckettype = "snapshots" @@ -184,14 +213,21 @@ void buildRuntime(BuildInfo buildInfo, String dockerimage, String dist, String e withCredentials([[$class: 'AmazonWebServicesCredentialsBinding', credentialsId: "awsArtifactsUploader"]]) { try { sh """ - CONTAINER_NAME=${CONTAINER_NAME} versionTag=${versionTag} extratag=${extratag} encodedFullVersionTag=${encodedFullVersionTag} fullVersionTag=${fullVersionTag} dist=${dist} dockerimage=${dockerimage} ./scripts/make-docker-runtime.sh + DATA_DIRS="${data_dirs}" \ + CONTAINER_NAME=${CONTAINER_NAME} \ + versionTag=${versionTag} \ + extratag=${extratag} \ + encodedFullVersionTag=${encodedFullVersionTag} \ + fullVersionTag=${fullVersionTag} \ + platform=${platform} \ + dockerimage=${dockerimage} ./scripts/make-docker-runtime.sh """ currentBuild.result = "SUCCESS" } catch (error) { currentBuild.result = "FAILURE" throw error } finally { - sh "nvidia-docker stop ${CONTAINER_NAME} || true" + sh "docker stop ${CONTAINER_NAME} || true" // if snapshot and using buildID or hash in docker image, need to rm that container and image here. } } @@ -271,15 +307,15 @@ def doTests() { def changedFiles = buildInfo.get().getChangedFiles() if (changedFiles) { echo "Looking for 'tests' in ${changedFiles.join(',')}" - // Check if the code change touches tests_open + // Check if the code change touches tests/python/open_data def doTrigger1 = changedFiles.any { filepath -> - filepath.startsWith("tests_open") + filepath.startsWith("tests/python/open_data") } def doTrigger2 = changedFiles.any { filepath -> - filepath.startsWith("tests_big") + filepath.startsWith("tests/python/big") } def doTrigger3 = changedFiles.any { filepath -> - filepath.startsWith("tests_small") + filepath.startsWith("tests/python/small") } def doTrigger4 = changedFiles.any { filepath -> filepath.startsWith("data") @@ -303,16 +339,15 @@ def doTestperf() { def changedFiles = buildInfo.get().getChangedFiles() if (changedFiles) { - echo "Looking for 'testsxgboost' in ${changedFiles.join(',')}" - // Check if the code change touches tests_open + echo "Looking for 'tests/python/xgboost' in ${changedFiles.join(',')}" def doTrigger1 = changedFiles.any { filepath -> - filepath.startsWith("testsxgboost") + filepath.startsWith("tests/python/xgboost") } def doTrigger2 = changedFiles.any { filepath -> - filepath.startsWith("tests_big") + filepath.startsWith("tests/python/big") } def doTrigger3 = changedFiles.any { filepath -> - filepath.startsWith("tests_small") + filepath.startsWith("tests/python/small") } def doTrigger4 = doBuild() def doTrigger5 = doTests() @@ -329,15 +364,14 @@ def doRuntime() { def changedFiles = buildInfo.get().getChangedFiles() if (changedFiles) { echo "Looking for 'examples' in ${changedFiles.join(',')}" - // Check if the code change touches tests_open def doTrigger1 = changedFiles.any { filepath -> filepath.startsWith("examples") } def doTrigger2 = changedFiles.any { filepath -> - filepath.startsWith("tests_big") + filepath.startsWith("tests/python/big") } def doTrigger3 = changedFiles.any { filepath -> - filepath.startsWith("tests_small") + filepath.startsWith("tests/python/small") } def doTrigger4 = doBuild() echo "doRuntime() Triggers: ${doTrigger1} ${doTrigger2} ${doTrigger3} ${doTrigger4}" diff --git a/ci/base/Jenkinsfile-ppc64le-cuda8.base b/ci/base/Jenkinsfile-ppc64le-cuda8.base new file mode 100644 index 000000000..f3a193c8e --- /dev/null +++ b/ci/base/Jenkinsfile-ppc64le-cuda8.base @@ -0,0 +1,25 @@ +// TOOD: rename to @Library('h2o-jenkins-pipeline-lib') _ +@Library('test-shared-library') _ + +import ai.h2o.ci.Utils +import static ai.h2o.ci.Utils.banner +def utilsLib = new Utils() +import ai.h2o.ci.BuildInfo + +def commitMessage = '' +def h2o4gpuUtils = null + +def platform = "ppc64le-centos7-cuda8.0" +def BUILDTYPE = "cuda8" +def cuda = "nvidia/cuda-ppc64le:8.0-cudnn5-devel-centos7" +def cudart = "nvidia/cuda-ppc64le:8.0-cudnn5-runtime-centos7" +def extratag = "-cuda8" +def linuxwheel = "ppc64le-centos7-cuda8.whl" +def testtype = "dotest" +def labelbuild = "ibm-power || ibm-power-gpu" +def labeltest = "ibm-power || ibm-power-gpu" +def labelruntime = "ibm-power || ibm-power-gpu" +def doingbenchmark = "0" +def dobenchmark = "0" +def doruntime = "1" +def data_dirs = "-v /home/jenkins/smalldata:/smalldata -v /home/jenkins/open_data:/open_data" diff --git a/ci/base/Jenkinsfile-ppc64le-cuda9.base b/ci/base/Jenkinsfile-ppc64le-cuda9.base new file mode 100644 index 000000000..623d2b89f --- /dev/null +++ b/ci/base/Jenkinsfile-ppc64le-cuda9.base @@ -0,0 +1,25 @@ +// TOOD: rename to @Library('h2o-jenkins-pipeline-lib') _ +@Library('test-shared-library') _ + +import ai.h2o.ci.Utils +import static ai.h2o.ci.Utils.banner +def utilsLib = new Utils() +import ai.h2o.ci.BuildInfo + +def commitMessage = '' +def h2o4gpuUtils = null + +def platform = "ppc64le-centos7-cuda9.0" +def BUILDTYPE = "cuda9" +def cuda = "nvidia/cuda-ppc64le:9.0-cudnn7-devel-centos7" +def cudart = "nvidia/cuda-ppc64le:9.0-cudnn7-runtime-centos7" +def extratag = "-cuda9" +def linuxwheel = "ppc64le-centos7-cuda9.whl" +def testtype = "dotest" +def labelbuild = "ibm-power || ibm-power-gpu" +def labeltest = "ibm-power || ibm-power-gpu" +def labelruntime = "ibm-power || ibm-power-gpu" +def doingbenchmark = "0" +def dobenchmark = "0" +def doruntime = "1" +def data_dirs = "-v /home/jenkins/smalldata:/smalldata -v /home/jenkins/open_data:/open_data" diff --git a/ci/base/Jenkinsfile-x86_64-cuda8.base b/ci/base/Jenkinsfile-x86_64-cuda8.base new file mode 100644 index 000000000..516dc4ece --- /dev/null +++ b/ci/base/Jenkinsfile-x86_64-cuda8.base @@ -0,0 +1,25 @@ +// TOOD: rename to @Library('h2o-jenkins-pipeline-lib') _ +@Library('test-shared-library') _ + +import ai.h2o.ci.Utils +import static ai.h2o.ci.Utils.banner +def utilsLib = new Utils() +import ai.h2o.ci.BuildInfo + +def commitMessage = '' +def h2o4gpuUtils = null + +def platform = "x86_64-centos7-cuda8.0" +def BUILDTYPE = "cuda8" +def cuda = "nvidia/cuda:8.0-cudnn5-devel-centos7" +def cudart = "nvidia/cuda:8.0-cudnn5-runtime-centos7" +def extratag = "-cuda8" +def linuxwheel = "x86_64-centos7-cuda8.whl" +def testtype = "dotest" +def labelbuild = "nvidia-docker" +def labeltest = "gpu && nvidia-docker" +def labelruntime = "nvidia-docker" +def doingbenchmark = "0" +def dobenchmark = "0" +def doruntime = "1" +def data_dirs = "-v /home/0xdiag/h2o4gpu/data:/data -v /home/0xdiag/h2o4gpu/open_data:/open_data" diff --git a/ci/base/Jenkinsfile-x86_64-cuda9.base b/ci/base/Jenkinsfile-x86_64-cuda9.base new file mode 100644 index 000000000..5875eac99 --- /dev/null +++ b/ci/base/Jenkinsfile-x86_64-cuda9.base @@ -0,0 +1,25 @@ +// TOOD: rename to @Library('h2o-jenkins-pipeline-lib') _ +@Library('test-shared-library') _ + +import ai.h2o.ci.Utils +import static ai.h2o.ci.Utils.banner +def utilsLib = new Utils() +import ai.h2o.ci.BuildInfo + +def commitMessage = '' +def h2o4gpuUtils = null + +def platform = "x86_64-centos7-cuda9.0" +def BUILDTYPE = "cuda9" +def cuda = "nvidia/cuda:9.0-cudnn7-devel-centos7" +def cudart = "nvidia/cuda:9.0-cudnn7-runtime-centos7" +def extratag = "-cuda9" +def linuxwheel = "x86_64-centos7-cuda9.whl" +def testtype = "dotest" +def labelbuild = "nvidia-docker" +def labeltest = "gpu && nvidia-docker" +def labelruntime = "nvidia-docker" +def doingbenchmark = "0" +def dobenchmark = "0" +def doruntime = "1" +def data_dirs = "-v /home/0xdiag/h2o4gpu/data:/data -v /home/0xdiag/h2o4gpu/open_data:/open_data" diff --git a/ci/base/Jenkinsfile-x86_64-cuda92.base b/ci/base/Jenkinsfile-x86_64-cuda92.base new file mode 100644 index 000000000..5ad2fcb9c --- /dev/null +++ b/ci/base/Jenkinsfile-x86_64-cuda92.base @@ -0,0 +1,25 @@ +// TOOD: rename to @Library('h2o-jenkins-pipeline-lib') _ +@Library('test-shared-library') _ + +import ai.h2o.ci.Utils +import static ai.h2o.ci.Utils.banner +def utilsLib = new Utils() +import ai.h2o.ci.BuildInfo + +def commitMessage = '' +def h2o4gpuUtils = null + +def platform = "x86_64-centos7-cuda9.2" +def BUILDTYPE = "cuda92" +def cuda = "nvidia/cuda:9.2-cudnn7-devel-centos7" +def cudart = "nvidia/cuda:9.2-cudnn7-runtime-centos7" +def extratag = "-cuda92" +def linuxwheel = "x86_64-centos7-cuda92.whl" +def testtype = "dotest" +def labelbuild = "nvidia-docker" +def labeltest = "gpu && nvidia-docker" +def labelruntime = "nvidia-docker" +def doingbenchmark = "0" +def dobenchmark = "0" +def doruntime = "1" +def data_dirs = "-v /home/0xdiag/h2o4gpu/data:/data -v /home/0xdiag/h2o4gpu/open_data:/open_data" diff --git a/Makefile_header.mk b/make/Makefile_header.mk similarity index 100% rename from Makefile_header.mk rename to make/Makefile_header.mk diff --git a/make/config.mk b/make/config.mk index 706438995..dd3355373 100644 --- a/make/config.mk +++ b/make/config.mk @@ -1,13 +1,52 @@ # -# Build configuration +# BUILD CONFIGURATION VARIABLES +# + +# Set to 1 or ON to build with NVTX support +USENVTX=0 + +# By default 0 means Release, set to "Debug" if you want to compile sources with debug flags +CMAKE_BUILD_TYPE=0 + +$(warning USENVTX is $(USENVTX)) +$(warning CMAKE_BUILD_TYPE is $(CMAKE_BUILD_TYPE)) + +# +# PROJECT DEPENDENCY RELATED VARIABLES # -# Location of artifacts -# E.g. "s3://bucket/dirname" -ARTIFACTS_BUCKET = s3://artifacts.h2o.ai/releases # Location of local directory with dependencies DEPS_DIR = deps +# NCCL support in XGBoost. To turn off set USENCCL=0 during build +USENCCL=1 + +# By default build both CPU and GPU variant +USECUDA=1 + +ifeq ($(USECUDA), 0) + $(warning Building with only CPU support ON.) + XGBOOST_TARGET=libxgboost-cpu +else + ifeq ($(USENCCL), 0) + $(warning XGBoost NCCL support is OFF.) + XGBOOST_TARGET=libxgboost2 + else + $(warning XGBoost NCCL support is ON.) + XGBOOST_TARGET=libxgboost + endif + CUDA_LIB=$(CUDA_HOME)/lib64 + MAKEFILE_CUDA_VERSION ?= $(shell ls $(CUDA_LIB)/libcudart.so.* | head -1 | rev | cut -d "." -f -2 | rev) + CUDA_MAJOR_VERSION = $(shell echo $(MAKEFILE_CUDA_VERSION) | cut -d "." -f 1) +endif + +# PyDataTable version. Currently not used in the code. +#PYDATATABLE_VERSION = 0.1.0+master.97 + +# +# TEST DATA VARIABLES +# + # Location of datasets SMALLDATA_BUCKET = s3://h2o-public-test-data/smalldata @@ -24,17 +63,58 @@ OPEN_DATA_BUCKET = s3://h2o-public-test-data/h2o4gpu/open_data OPEN_DATA_DIR = open_data # -# PyDataTable +# R PACKAGE CONFIGURATIONS # -#PYDATATABLE_VERSION = 0.1.0+master.97 +INSTALL_R = 1 +R_VERSION = 3.1.0 # -# XGBoost +# VARIABLES USED DURING BUILD - YOU PROBABLY DON'T WANT TO CHANGE THESE # -XGBOOST_VERSION = 0.6 -# -# R package Configurations -# -INSTALL_R = 1 -R_VERSION = 3.1.0 +# Build version +MAJOR_MINOR=$(shell echo $(BASE_VERSION) | sed 's/.*\(^[0-9][0-9]*\.[0-9][0-9]*\).*/\1/g' ) + +# OS info for Python +# Python has crazy ideas about os names +OS := $(shell uname) +ifeq ($(OS), Darwin) + PY_OS ?= "macosx" +else + PY_OS ?= $(OS) +endif + +PYTHON ?= python + +# UUID for logs +RANDOM := $(shell bash -c 'echo $$RANDOM') +LOGEXT=$(RANDOM)$(shell date +'_%Y.%m.%d-%H:%M:%S') + +# Utilize all procs in certain tasks +NUMPROCS := $(shell cat /proc/cpuinfo|grep processor|wc -l) + +# Docker image tagging +DOCKER_VERSION_TAG ?= "latest" + +# BUILD_INFO setup +H2O4GPU_COMMIT ?= $(shell git rev-parse HEAD) +H2O4GPU_BUILD_DATE := $(shell date) +H2O4GPU_BUILD ?= "LOCAL BUILD @ $(shell git rev-parse --short HEAD) build at $(H2O4GPU_BUILD_DATE)" +H2O4GPU_SUFFIX ?= "+local_$(shell git describe --always --dirty)" + +# Setup S3 access credentials +S3_CMD_LINE := aws s3 + +DIST_DIR = dist + +ARCH := $(shell arch) +ifdef CUDA_MAJOR_VERSION + PLATFORM = $(ARCH)-centos7-cuda$(MAKEFILE_CUDA_VERSION) +else + PLATFORM = $(ARCH)-centos7-cpu +endif + +DOCKER_ARCH= +ifeq (${ARCH}, ppc64le) + DOCKER_ARCH="-ppc64le" +endif diff --git a/scripts/daal/install_daal.sh b/scripts/daal/install_daal.sh index 8729bc79d..907c262e6 100644 --- a/scripts/daal/install_daal.sh +++ b/scripts/daal/install_daal.sh @@ -8,14 +8,20 @@ set -e _intel_dall_tar="https://s3.amazonaws.com/intel-daal/daal-linux_x86_64__cp36.tar.gz" -axel -a -n 20 $_intel_dall_tar && tar xzvf daal-linux_x86_64__cp36.tar.gz -C $HOME && -rm -rf daal-linux_x86_64__cp36.tar.gz && -eval "$(/root/.pyenv/bin/pyenv init -)" && -pip install $HOME/daal/pydaal-2018.0.1.20171012-cp36-none-linux_x86_64.whl && -ln -sf $HOME/daal/lib/libtbb.so.2 /usr/lib/libtbb.so.2 && -ln -sf $HOME/daal/lib/libtbb.so /usr/lib/libtbb.so && -ln -sf $HOME/daal/lib/libtbbmalloc.so.2 /usr/lib/libtbbmalloc.so.2 && -ln -sf $HOME/daal/lib/libtbbmalloc.so /usr/lib/libtbbmalloc.so && -ln -sf $HOME/daal/lib/libdaal_sequential.so /usr/lib/libdaal_sequential.so && -ln -sf $HOME/daal/lib/libdaal_core.so /usr/lib/libdaal_core.so && +if hash axel 2>/dev/null; then + axel -a -n 20 $_intel_dall_tar +else + wget $_intel_dall_tar +fi + +tar xzvf daal-linux_x86_64__cp36.tar.gz -C $HOME +rm -rf daal-linux_x86_64__cp36.tar.gz + +pip install $HOME/daal/pydaal-2018.0.1.20171012-cp36-none-linux_x86_64.whl +ln -sf $HOME/daal/lib/libtbb.so.2 /usr/lib/libtbb.so.2 +ln -sf $HOME/daal/lib/libtbb.so /usr/lib/libtbb.so +ln -sf $HOME/daal/lib/libtbbmalloc.so.2 /usr/lib/libtbbmalloc.so.2 +ln -sf $HOME/daal/lib/libtbbmalloc.so /usr/lib/libtbbmalloc.so +ln -sf $HOME/daal/lib/libdaal_sequential.so /usr/lib/libdaal_sequential.so +ln -sf $HOME/daal/lib/libdaal_core.so /usr/lib/libdaal_core.so ln -sf $HOME/daal/lib/libdaal_thread.so /usr/lib/libdaal_thread.so diff --git a/scripts/daal/install_daal_locally.sh b/scripts/daal/install_daal_locally.sh deleted file mode 100644 index 5f771ea61..000000000 --- a/scripts/daal/install_daal_locally.sh +++ /dev/null @@ -1,49 +0,0 @@ -#!/bin/bash -#=============================================================================== -# name: install_daal_locally.sh -# -# installs to the system intel daal libraries and pydaal (python version of daal) -#=============================================================================== -set -e - -_intel_dall_tar="https://s3.amazonaws.com/intel-daal/daal-linux_x86_64__cp36.tar.gz" - -function daal_downloaded { - if [ -f "$HOME/daal/pydaal-2018.0.1.20171012-cp36-none-linux_x86_64.whl" ]; then - echo "PyDAAL wheel already downloaded"; - else - echo "PyDAAL wheel must be downloaded, this may take a while."; - fi -} - -function pip_wheel { - echo "Installing PyDAAL ..." - pip install $HOME/daal/pydaal-2018.0.1.20171012-cp36-none-linux_x86_64.whl && - sudo ln -sf $HOME/daal/lib/libtbb.so.2 /usr/lib/libtbb.so.2 && - sudo ln -sf $HOME/daal/lib/libtbb.so /usr/lib/libtbb.so && - sudo ln -sf $HOME/daal/lib/libtbbmalloc.so.2 /usr/lib/libtbbmalloc.so.2 && - sudo ln -sf $HOME/daal/lib/libtbbmalloc.so /usr/lib/libtbbmalloc.so && - sudo ln -sf $HOME/daal/lib/libdaal_sequential.so /usr/lib/libdaal_sequential.so && - sudo ln -sf $HOME/daal/lib/libdaal_core.so /usr/lib/libdaal_core.so && - sudo ln -sf $HOME/daal/lib/libdaal_thread.so /usr/lib/libdaal_thread.so -} - -function install_daal { - echo "Unpacking PyDAAL wheel ..." - tar xzvf daal-linux_x86_64__cp36.tar.gz -C $HOME && - rm -rf daal-linux_x86_64__cp36.tar.gz && - eval "$(/root/.pyenv/bin/pyenv init -)" && - pip_wheel -} - -# detect if axel is installed -daal_downloaded -if [[ $? -ne 0 ]]; then - if hash axel 2>/dev/null; then - axel -a -n 20 $_intel_dall_tar && install_daal - else - wget $_intel_dall_tar && install_daal - fi -else - pip_wheel -fi diff --git a/scripts/data_prep.R b/scripts/data_prep.R index 1b0dfb1ed..70539cc92 100644 --- a/scripts/data_prep.R +++ b/scripts/data_prep.R @@ -5,19 +5,19 @@ #'@param save_csv_path Path to save processed data as a csv #'@param max_label_encoding_levels The maximum number of uniques required in a column to consider it a categorical variable. Default is 1000 prep_data <- function(data_table, response, save_csv_path = NULL, max_label_encoding_levels = 1000){ - + if (!is.data.table(data_table)) { stop ("Input data should be of type data.table") } - + if (is.character(response)) { print(paste0("Response is -> ",response)) } else { print(paste0("Response is -> ",colnames(data_table)[response])) } - + print(paste0("Number of columns: ", ncol(data_table))) - + print(paste0("Number of rows: ", nrow(data_table))) ## Label-encoding of categoricals (those cols with fewer than `label_encoding_levels` levels, but not constant) @@ -26,7 +26,7 @@ prep_data <- function(data_table, response, save_csv_path = NULL, max_label_enco for (ff in feature.names) { tt <- uniqueN(data_table[[ff]]) if (tt <= max_label_encoding_levels && tt > 1) { - data_table[, (ff):=factor(data_table[[ff]])] + data_table[, (ff):=factor(data_table[[ff]])] print(paste0(ff," has ",tt," levels")) } if (tt < 2) { @@ -34,37 +34,37 @@ prep_data <- function(data_table, response, save_csv_path = NULL, max_label_enco data_table[, (ff):=NULL] } } - + print(paste0("Number of columns after label encoding: ", ncol(data_table))) - + num_cols <- names(data_table)[which(sapply(data_table, is.numeric))] cat_cols <- names(data_table)[which(sapply(data_table, is.factor))] print(paste0("Number of numeric columns: ", length(num_cols))) print(paste0("Number of categorical columns: ", length(cat_cols))) - + ## impute missing values, drop near-const cols and standardize the data print("Imputing missing values using mean...") cols <- setdiff(num_cols,c(response)) for (c in cols) { data_table[!is.finite(data_table[[c]]), (c):=mean(data_table[[c]], na.rm=TRUE)] - if (!is.finite(sd(data_table[[c]])) || sd(data_table[[c]])<1e-4) + if (!is.finite(sd(data_table[[c]])) || sd(data_table[[c]])<1e-4) data_table[,(c):=NULL] else data_table[,(c):=scale(as.numeric(data_table[[c]]))] } print(paste0("Number of columns after mean imputation: ", ncol(data_table))) - + ## one-hot encode the categoricals print("One hot encoding data table categoricals only...") data_table2 <- as.data.table(model.matrix(data_table[[response]]~., data = data_table[,c(cat_cols), with=FALSE], sparse=FALSE))[,-1] print(paste0("Number of columns that have been one hot encoded: ", ncol(data_table2))) - + ## add back the numeric columns and assign back to data_table print("Add back numeric columns and assign to data table") data_table <- data_table2[,(num_cols):=data_table[,num_cols,with=FALSE]] - + print(paste0("Final dimensions of data table after pre processing: ", nrow(data_table), " by ", ncol(data_table))) - + ## check validity of data print(paste0("Number of NA's in final data table after pre processing: ", sum(sapply(data_table, is.na)))) print(paste0("Number of numeric's in final data table after pre processing: ", sum(sapply(data_table, is.numeric)))) @@ -73,12 +73,12 @@ prep_data <- function(data_table, response, save_csv_path = NULL, max_label_enco } else { print("Some entries are not finite in final data table after pre processing. Please inspect final data table") } - + ## save preprocessed file as CSV if (!is.null(save_csv_path)) { print(paste0("Saving processed data to ", save_csv_path)) fwrite(data_table, save_csv_path) } - + return(data_table) } diff --git a/scripts/g++_wrapper.sh b/scripts/g++_wrapper.sh deleted file mode 100755 index 2a3e4cd78..000000000 --- a/scripts/g++_wrapper.sh +++ /dev/null @@ -1,8 +0,0 @@ -#!/bin/bash - -ARGS=() -for var in "$@"; do - [ "$var" != '-fno-plt' ] && [ "$var" != '-mtune=haswell' ] && ARGS+=("$var") -done -/usr/bin/g++ "${ARGS[@]}" - diff --git a/scripts/gcc_wrapper.sh b/scripts/gcc_wrapper.sh deleted file mode 100755 index de461e95d..000000000 --- a/scripts/gcc_wrapper.sh +++ /dev/null @@ -1,8 +0,0 @@ -#!/bin/bash - -ARGS=() -for var in "$@"; do - [ "$var" != '-fno-plt' ] && [ "$var" != '-mtune=haswell' ] && ARGS+=("$var") -done -/usr/bin/gcc "${ARGS[@]}" - diff --git a/scripts/gitshallow_submodules.sh b/scripts/gitshallow_submodules.sh index 7bb25cedd..d2ecc68b4 100755 --- a/scripts/gitshallow_submodules.sh +++ b/scripts/gitshallow_submodules.sh @@ -1,15 +1,12 @@ - #!/bin/bash git submodule init for i in $(git submodule | awk '{print $2}'); do spath=$(git config -f .gitmodules --get submodule.$i.path) surl=$(git config -f .gitmodules --get submodule.$i.url) echo "submodule:" $i $spath $surl -# if [ $spath == "cub" ] || [ $spath == "nccl" ] || [ $spath == "py3nvml" ] || [ $spath == "scikit-learn || [ $spath == "xgboost" ] ; then - if [ $spath == "cub" ] || [ $spath == "nccl" ] || [ $spath == "py3nvml" ] || [ $spath == "scikit-learn" ] ; then # can't add xgboost because not pulling from master -# if [ $spath == "cub" ] || [ $spath == "nccl" ] || [ $spath == "py3nvml" ] ; then # can't add xgboost because not pulling from master - git submodule update --depth 1 $spath - else + if [ $spath == "xgboost" ] || [ $spath == "tests/googletest" ] ; then git submodule update $spath + else + git submodule update --depth 1 $spath fi done diff --git a/scripts/make-docker-devel.sh b/scripts/make-docker-devel.sh index ae28b6571..b0c7eade3 100755 --- a/scripts/make-docker-devel.sh +++ b/scripts/make-docker-devel.sh @@ -1,39 +1,33 @@ #!/bin/bash set -e -# split layer and version -IFS=':' read -ra LAYER_VERSION <<< "${dockerimage}" -layer=${LAYER_VERSION[0]} -version=${LAYER_VERSION[1]} - -if [ "$layer" == "ubuntu" ] -then - docker=docker -else - docker=nvidia-docker -fi +H2O4GPU_BUILD="${H2O4GPU_BUILD:-0}" +H2O4GPU_SUFFIX="${H2O4GPU_SUFFIX:-''}" +CONTAINER_NAME="${CONTAINER_NAME:-$(cat /dev/urandom | tr -dc 'a-zA-Z0-9' | fold -w 32 | head -n 1)}" +makeopts="${makeopts:-}" +DOCKER_CLI='nvidia-docker' #--build-arg http_proxy=http://172.16.2.142:3128/ echo "Docker devel - BEGIN" -$docker build -t opsh2oai/h2o4gpu-buildversion${extratag}-build -f Dockerfile-build --rm=false --build-arg layer=$layer --build-arg version=$version . -#-u `id -u`:`id -g` -w `pwd` -v `pwd`:`pwd`:rw -$docker run --init --rm --name ${CONTAINER_NAME} -d -t -u root -v /home/0xdiag/h2o4gpu/data:/data -v /home/0xdiag/h2o4gpu/open_data:/open_data -v `pwd`:/dot --entrypoint=bash opsh2oai/h2o4gpu-buildversion${extratag}-build +$DOCKER_CLI build -t opsh2oai/h2o4gpu-buildversion${extratag}-build -f Dockerfile-build --rm=false --build-arg docker_name=${dockerimage} . + +$DOCKER_CLI run --init --rm --name ${CONTAINER_NAME} -d -t -u root -v `pwd`:/dot --entrypoint=bash opsh2oai/h2o4gpu-buildversion${extratag}-build echo "Docker devel - Copying files" -$docker exec ${CONTAINER_NAME} bash -c 'mkdir -p repo ; cp -a /dot/. ./repo' +$DOCKER_CLI exec ${CONTAINER_NAME} bash -c 'mkdir -p repo ; cp -a /dot/. ./repo' -echo "setup pyenv, shallow clone, and make fullinstalljenkins with ${H2O4GPU_BUILD} and ${H2O4GPU_SUFFIX}" -$docker exec ${CONTAINER_NAME} bash -c "eval \"\$(/root/.pyenv/bin/pyenv init -)\" ; /root/.pyenv/bin/pyenv global 3.6.1; cd repo ; ./scripts/gitshallow_submodules.sh ; make ${makeopts} fullinstalljenkins${extratag} H2O4GPU_BUILD=${H2O4GPU_BUILD} H2O4GPU_SUFFIX=${H2O4GPU_SUFFIX}" +echo "shallow clone, and make buildinstall with ${H2O4GPU_BUILD} and ${H2O4GPU_SUFFIX}" +$DOCKER_CLI exec ${CONTAINER_NAME} bash -c "cd repo ; make ${makeopts} buildinstall H2O4GPU_BUILD=${H2O4GPU_BUILD} H2O4GPU_SUFFIX=${H2O4GPU_SUFFIX}" echo "Docker devel - Clean local wheels and Copying wheel from docker" -rm -rf src/interface_py/${dist}/*.whl -$docker cp -a ${CONTAINER_NAME}:repo/src/interface_py/${dist} src/interface_py/ +rm -rf src/interface_py/dist/ +$DOCKER_CLI cp -a ${CONTAINER_NAME}:/root/repo/src/interface_py/dist src/interface_py/ echo "Docker devel - Copying VERSION.txt" -mkdir -p build ; $docker cp ${CONTAINER_NAME}:repo/build/VERSION.txt build/ +mkdir -p build ; $DOCKER_CLI cp ${CONTAINER_NAME}:/root/repo/build/VERSION.txt build/ echo "Docker devel - Stopping docker" -$docker stop ${CONTAINER_NAME} +$DOCKER_CLI stop ${CONTAINER_NAME} echo "Docker devel - END" diff --git a/scripts/make-docker-runtests.sh b/scripts/make-docker-runtests.sh index 2253dea96..344013067 100755 --- a/scripts/make-docker-runtests.sh +++ b/scripts/make-docker-runtests.sh @@ -2,55 +2,48 @@ # Requires one has already done(e.g.): make docker-build-nccl-cuda9 to get wheel built or wheel was unstashed on jenkins set -e -# split layer and version -IFS=':' read -ra LAYER_VERSION <<< "${dockerimage}" -layer=${LAYER_VERSION[0]} -version=${LAYER_VERSION[1]} - -if [ "$layer" == "ubuntu" ] -then - docker=docker -else - docker=nvidia-docker -fi +DOCKER_CLI='nvidia-docker' +H2O4GPU_BUILD="${H2O4GPU_BUILD:-0}" +DATA_DIRS="${DATA_DIRS:-}" echo "Docker devel test and pylint - BEGIN" # --build-arg http_proxy=http://172.16.2.142:3128/ -$docker build -t opsh2oai/h2o4gpu-buildversion${extratag}-build -f Dockerfile-build --rm=false --build-arg layer=$layer --build-arg version=$version . +$DOCKER_CLI build -t opsh2oai/h2o4gpu-buildversion${extratag}-build -f Dockerfile-runtime --rm=false --build-arg docker_name=${dockerimage} . + #-u `id -u`:`id -g` -w `pwd` -v `pwd`:`pwd`:rw -$docker run --init --rm --name ${CONTAINER_NAME} -d -t -u root -v /home/0xdiag/h2o4gpu/data:/data -v /home/0xdiag/h2o4gpu/open_data:/open_data -v `pwd`:/dot --entrypoint=bash opsh2oai/h2o4gpu-buildversion${extratag}-build +$DOCKER_CLI run --init --rm --name ${CONTAINER_NAME} -d -t -u root ${DATA_DIRS} -v `pwd`:/dot --entrypoint=bash opsh2oai/h2o4gpu-buildversion${extratag}-build echo "Docker devel test and pylint - Copying files" -$docker exec ${CONTAINER_NAME} bash -c 'mkdir -p repo ; cp -a /dot/. ./repo' -$docker exec ${CONTAINER_NAME} bash -c 'cd ./repo ; ln -sf /data . || true ; ln -sf /open_data . || true' +$DOCKER_CLI exec ${CONTAINER_NAME} bash -c 'mkdir -p repo ; cp -a /dot/. ./repo' +$DOCKER_CLI exec ${CONTAINER_NAME} bash -c 'cd ./repo ; ln -sf /data . || true ; ln -sf /open_data . || true' -echo "Docker devel test and pylint - setup pyenv, pip install wheel from ${dist}, make ${target}" +echo "Docker devel test and pylint - pip install wheel from dist/${platform}, make ${target}" # Don't use version in wheel name when find so local call to this script works without specific jenkins versions -# Just ensure clean ${dist}/*.whl before unstash in jenkins -$docker exec ${CONTAINER_NAME} bash -c 'export HOME=`pwd`; eval "$(/root/.pyenv/bin/pyenv init -)" ; /root/.pyenv/bin/pyenv global 3.6.1; cd repo ; pip install `find /dot/src/interface_py/'${dist}' -name "*h2o4gpu-*.whl"`; pip freeze ; make '${target} +# Just ensure clean dist/${platform}/*.whl before unstash in jenkins +$DOCKER_CLI exec ${CONTAINER_NAME} bash -c 'export HOME=`pwd` ; cd repo ; pip install `find /dot/src/interface_py/dist/'${platform}' -name "*h2o4gpu-*.whl"`; pip freeze ; make '${target} { # try echo "Docker devel test and pylint - copy any dat results" rm -rf results ; mkdir -p results/ touch results/emptyresults.dat - nvidia-docker cp -a ${CONTAINER_NAME}:repo/results results/ + $DOCKER_CLI cp -a ${CONTAINER_NAME}:repo/results results/ } || { # catch echo "No results dat files" } echo "Docker devel test and pylint - copy build reports" rm -rf build/test-reports ; mkdir -p build/test-reports/ -$docker cp -a ${CONTAINER_NAME}:repo/build/test-reports build/ +$DOCKER_CLI cp -a ${CONTAINER_NAME}:repo/build/test-reports build/ echo "Docker devel test and pylint - copy logs for arch" rm -rf tmp ; mkdir -p tmp -$docker cp -a ${CONTAINER_NAME}:repo/tmp ./ +$DOCKER_CLI cp -a ${CONTAINER_NAME}:repo/tmp ./ echo "Docker devel test and pylint - pylint" -$docker exec ${CONTAINER_NAME} touch ./repo/src/interface_py/h2o4gpu/__init__.py -$docker exec ${CONTAINER_NAME} bash -c 'eval "$(/root/.pyenv/bin/pyenv init -)" ; /root/.pyenv/bin/pyenv global 3.6.1; cd repo ; make pylint' +$DOCKER_CLI exec ${CONTAINER_NAME} touch ./repo/src/interface_py/h2o4gpu/__init__.py +$DOCKER_CLI exec ${CONTAINER_NAME} bash -c 'cd repo ; make pylint' echo "Docker devel test and pylint - stop" -$docker stop ${CONTAINER_NAME} +$DOCKER_CLI stop ${CONTAINER_NAME} diff --git a/scripts/make-docker-runtime.sh b/scripts/make-docker-runtime.sh index 6ec1ada59..b38e4aa18 100755 --- a/scripts/make-docker-runtime.sh +++ b/scripts/make-docker-runtime.sh @@ -1,61 +1,52 @@ #!/bin/bash set -e -# split layer and version -IFS=':' read -ra LAYER_VERSION <<< "${dockerimage}" -layer=${LAYER_VERSION[0]} -version=${LAYER_VERSION[1]} - -if [ "$layer" == "ubuntu" ] -then - docker=docker -else - docker=nvidia-docker -fi +DOCKER_CLI='nvidia-docker' +DATA_DIRS="${DATA_DIRS:-}" echo "Docker runtime - BEGIN" echo "Docker runtime - Build" # wheel=${encodedFullVersionTag}${extratag}/h2o4gpu-${encodedFullVersionTag}-cp36-cp36m-linux_x86_64.whl # use this if want to pull from s3 in Dockerfile-runtime # --build-arg http_proxy=http://172.16.2.142:3128/ -$docker build -t opsh2oai/h2o4gpu-${versionTag}${extratag}-runtime:latest -f Dockerfile-runtime --rm=false --build-arg layer=$layer --build-arg version=$version . +$DOCKER_CLI build -t opsh2oai/h2o4gpu-${versionTag}${extratag}-runtime:latest -f Dockerfile-runtime --rm=false --build-arg docker_name=${dockerimage} . # -u `id -u`:`id -g` -d -t -w `pwd` -v `pwd`:`pwd`:rw echo "Runtime Docker - Run" -$docker run --init --rm --name ${CONTAINER_NAME} -d -t -u root -v /home/0xdiag/h2o4gpu/data:/data -v /home/0xdiag/h2o4gpu/open_data:/open_data -v `pwd`:/dot --entrypoint=bash opsh2oai/h2o4gpu-${versionTag}${extratag}-runtime:latest +$DOCKER_CLI run --init --rm --name ${CONTAINER_NAME} -d -t -u root ${DATA_DIRS} -v `pwd`:/dot --entrypoint=bash opsh2oai/h2o4gpu-${versionTag}${extratag}-runtime:latest echo "Docker runtime - pip install h2o4gpu and pip freeze" -$docker exec ${CONTAINER_NAME} bash -c '. /h2o4gpu_env/bin/activate ; pip install `find /dot/src/interface_py/'${dist}' -name "*h2o4gpu-*.whl" | xargs ls -tr | tail -1` ; pip freeze' +$DOCKER_CLI exec ${CONTAINER_NAME} bash -c 'pip install `find /dot/src/interface_py/dist/'${platform}' -name "*h2o4gpu-*.whl" | xargs ls -tr | tail -1` ; pip freeze' { # try echo "Docker runtime - Getting Data" - #nvidia-docker exec ${CONTAINER_NAME} bash -c '. /h2o4gpu_env/bin/activate ; mkdir -p scripts ; rm -rf scripts/fcov_get.py ; echo "from sklearn.datasets import fetch_covtype" > ./scripts/fcov_get.py ; echo "cov = fetch_covtype()" >> ./scripts/fcov_get.py' - #nvidia-docker exec ${CONTAINER_NAME} bash -c '. /h2o4gpu_env/bin/activate ; cd /jupyter/ ; python ../scripts/fcov_get.py' - $docker exec ${CONTAINER_NAME} bash -c 'cd /jupyter/ ; mkdir -p ./scikit_learn_data/covertype ; cp /open_data/covertype/* ./scikit_learn_data/covertype' - $docker exec ${CONTAINER_NAME} bash -c 'cd /jupyter/ ; mkdir -p ./scikit_learn_data/lfw_home ; cp -af /open_data/lfw_home ./scikit_learn_data' - $docker exec ${CONTAINER_NAME} bash -c 'cd /jupyter/demos ; cp /data/creditcard.csv .' - $docker exec ${CONTAINER_NAME} bash -c 'cd /jupyter/demos ; wget https://s3.amazonaws.com/h2o-public-test-data/h2o4gpu/open_data/kmeans_data/h2o-logo.jpg' - $docker exec ${CONTAINER_NAME} bash -c 'cd /jupyter/demos ; wget https://s3.amazonaws.com/h2o-public-test-data/h2o4gpu/open_data/Temples-shrines-and-castles-in-Japan-social-media-image.jpg' - $docker exec ${CONTAINER_NAME} bash -c 'cd /jupyter/demos ; wget https://s3.amazonaws.com/h2o-public-test-data/h2o4gpu/open_data/china.jpg' - $docker exec ${CONTAINER_NAME} bash -c 'cd /jupyter/demos ; cp /data/ipums_1k.csv .' - $docker exec ${CONTAINER_NAME} bash -c 'cd /jupyter/demos ; cp /data/ipums.feather .' - $docker exec -u root ${CONTAINER_NAME} bash -c 'cd /jupyter/ ; chmod -R a+rwx .' + #$DOCKER_CLI exec ${CONTAINER_NAME} bash -c 'mkdir -p scripts ; rm -rf scripts/fcov_get.py ; echo "from sklearn.datasets import fetch_covtype" > ./scripts/fcov_get.py ; echo "cov = fetch_covtype()" >> ./scripts/fcov_get.py' + #$DOCKER_CLI exec ${CONTAINER_NAME} bash -c 'cd /jupyter/ ; python ../scripts/fcov_get.py' + $DOCKER_CLI exec ${CONTAINER_NAME} bash -c 'cd /jupyter/ ; mkdir -p ./scikit_learn_data/covertype ; cp /open_data/covertype/* ./scikit_learn_data/covertype' + $DOCKER_CLI exec ${CONTAINER_NAME} bash -c 'cd /jupyter/ ; mkdir -p ./scikit_learn_data/lfw_home ; cp -af /open_data/lfw_home ./scikit_learn_data' + $DOCKER_CLI exec ${CONTAINER_NAME} bash -c 'cd /jupyter/demos ; cp /data/creditcard.csv .' + $DOCKER_CLI exec ${CONTAINER_NAME} bash -c 'cd /jupyter/demos ; wget https://s3.amazonaws.com/h2o-public-test-data/h2o4gpu/open_data/kmeans_data/h2o-logo.jpg' + $DOCKER_CLI exec ${CONTAINER_NAME} bash -c 'cd /jupyter/demos ; wget https://s3.amazonaws.com/h2o-public-test-data/h2o4gpu/open_data/Temples-shrines-and-castles-in-Japan-social-media-image.jpg' + $DOCKER_CLI exec ${CONTAINER_NAME} bash -c 'cd /jupyter/demos ; wget https://s3.amazonaws.com/h2o-public-test-data/h2o4gpu/open_data/china.jpg' + $DOCKER_CLI exec ${CONTAINER_NAME} bash -c 'cd /jupyter/demos ; cp /data/ipums_1k.csv .' + $DOCKER_CLI exec ${CONTAINER_NAME} bash -c 'cd /jupyter/demos ; cp /data/ipums.feather .' + $DOCKER_CLI exec -u root ${CONTAINER_NAME} bash -c 'cd /jupyter/ ; chmod -R a+rwx .' } || { # catch echo "Some Data Not Obtained" } -$docker commit ${CONTAINER_NAME} opsh2oai/h2o4gpu-${versionTag}${extratag}-runtime:latest +$DOCKER_CLI commit ${CONTAINER_NAME} opsh2oai/h2o4gpu-${versionTag}${extratag}-runtime:latest echo "Docker runtime - stopping docker" -$docker stop ${CONTAINER_NAME} +$DOCKER_CLI stop ${CONTAINER_NAME} if [ -z `command -v pbzip2` ] then echo "Docker runtime - saving docker to local disk -- native system must have bzip2" - $docker save opsh2oai/h2o4gpu-${versionTag}${extratag}-runtime | bzip2 > h2o4gpu-${fullVersionTag}${extratag}-runtime.tar.bz2 + $DOCKER_CLI save opsh2oai/h2o4gpu-${versionTag}${extratag}-runtime | bzip2 > h2o4gpu-${fullVersionTag}${extratag}-runtime.tar.bz2 else echo "Docker runtime - saving docker to local disk -- native system must have pbzip2" - $docker save opsh2oai/h2o4gpu-${versionTag}${extratag}-runtime | pbzip2 > h2o4gpu-${fullVersionTag}${extratag}-runtime.tar.bz2 + $DOCKER_CLI save opsh2oai/h2o4gpu-${versionTag}${extratag}-runtime | pbzip2 > h2o4gpu-${fullVersionTag}${extratag}-runtime.tar.bz2 fi echo "Docker runtime - END" diff --git a/scripts/make_jenkinsfiles.sh b/scripts/make_jenkinsfiles.sh index 3f78a67d7..ec4395e39 100755 --- a/scripts/make_jenkinsfiles.sh +++ b/scripts/make_jenkinsfiles.sh @@ -4,28 +4,31 @@ # jenkins that stage names have to be static text labels ## declare an array variable -declare -a arr=("nccl-cuda8" "nonccl-cuda9" "nccl-cuda9" "cpu" "nonccl-cuda9" "nccl-cuda9-aws1" "nccl-cuda9-benchmark" "nccl-cuda9-aws1-benchmark" "cpu") +declare -a arr=("x86_64-cuda8" "x86_64-cuda9" "x86_64-cuda92" "ppc64le-cuda8" "ppc64le-cuda9") ## now loop through the above array for i in "${arr[@]}" do echo "$i" - echo "#!/usr/bin/groovy" > Jenkinsfile-$i - echo "" >> Jenkinsfile-$i - echo "//################ FILE IS AUTO-GENERATED from .base files" >> Jenkinsfile-$i - echo "//################ DO NOT MODIFY" >> Jenkinsfile-$i - echo "//################ See scripts/make_jenkinsfiles.sh" >> Jenkinsfile-$i - echo "" >> Jenkinsfile-$i - - cat Jenkinsfile-$i.base >> Jenkinsfile-$i - echo "//################ BELOW IS COPY/PASTE of Jenkinsfile.utils2 (except stage names)" >> Jenkinsfile-$i - cat Jenkinsfile.utils2 >> Jenkinsfile-$i - sed -i 's/stage\(.*\)\"/stage\1 '$i'\"/g' Jenkinsfile-$i + echo "#!/usr/bin/groovy" > ci/Jenkinsfile-$i + echo "" >> ci/Jenkinsfile-$i + echo "//################ FILE IS AUTO-GENERATED from .base files" >> ci/Jenkinsfile-$i + echo "//################ DO NOT MODIFY" >> ci/Jenkinsfile-$i + echo "//################ See scripts/make_jenkinsfiles.sh" >> ci/Jenkinsfile-$i + echo "" >> ci/Jenkinsfile-$i + + cat ci/base/Jenkinsfile-$i.base >> ci/Jenkinsfile-$i + echo "//################ BELOW IS COPY/PASTE of ci/Jenkinsfile.template (except stage names)" >> ci/Jenkinsfile-$i + cat ci/Jenkinsfile.template >> ci/Jenkinsfile-$i + + sed -i .bck 's/stage\(.*\)\"/stage\1 '$i'\"/g' ci/Jenkinsfile-$i if [[ $i == *"benchmark"* ]]; then echo "More for benchmarks" - sed -i 's/dobenchmark = \"1\"/dobenchmark = \"0\"/g' Jenkinsfile-$i - sed -i 's/doruntime = \"1\"/doruntime = \"0\"/g' Jenkinsfile-$i + sed -i .bck 's/dobenchmark = \"1\"/dobenchmark = \"0\"/g' ci/Jenkinsfile-$i + sed -i .bck 's/doruntime = \"1\"/doruntime = \"0\"/g' ci/Jenkinsfile-$i fi - + + rm -rf ci/Jenkinsfile-$i.bck + done diff --git a/scripts/prepare_sklearn.sh b/scripts/prepare_sklearn.sh index 4d5a74c58..c76db3339 100755 --- a/scripts/prepare_sklearn.sh +++ b/scripts/prepare_sklearn.sh @@ -25,35 +25,40 @@ done #files=`find -type f | grep -v pycache` files=`find -type f | grep -v pycache | awk '{ print length($0) " " $0; }' | sort -n | cut -d ' ' -f 2-` -for fil in $files -do - echo "Edit contents of $fil" - if [[ "$fil" == *".git"* ]] +function modify_file() { + if [[ "$1" == *".git"* ]] then #echo "skip .git" true else - sed -i 's/sklearn/h2o4gpu/g' $fil - sed -i 's/scikit-learn/h2o4gpu/g' $fil + sed -i 's/sklearn/h2o4gpu/g' $1 + sed -i 's/scikit-learn/h2o4gpu/g' $1 # replace names - sed -i 's/\([^_a-zA-Z0-9]\?\)KMeans\([^_a-zA-Z0-9]\?\)/\1KMeansSklearn\2/g' $fil - sed -i 's/\([^_a-zA-Z0-9]\?\)Ridge\([^_a-zA-Z0-9]\?\)/\1RidgeSklearn\2/g' $fil - sed -i 's/\([^_a-zA-Z0-9]\?\)Lasso\([^_a-zA-Z0-9]\?\)/\1LassoSklearn\2/g' $fil - sed -i 's/\([^_a-zA-Z0-9]\?\)LogisticRegression\([^_a-zA-Z0-9]\?\)/\1LogisticRegressionSklearn\2/g' $fil - sed -i 's/\([^_a-zA-Z0-9]\?\)LinearRegression\([^_a-zA-Z0-9]\?\)/\1LinearRegressionSklearn\2/g' $fil - sed -i 's/\([^_a-zA-Z0-9]\?\)ElasticNet\([^_a-zA-Z0-9]\?\)/\1ElasticNetSklearn\2/g' $fil - sed -i 's/\([^_a-zA-Z0-9]\?\)GradientBoostingRegressor\([^_a-zA-Z0-9]\?\)/\1GradientBoostingRegressorSklearn\2/g' $fil - sed -i 's/\([^_a-zA-Z0-9]\?\)GradientBoostingClassifier\([^_a-zA-Z0-9]\?\)/\1GradientBoostingClassifierSklearn\2/g' $fil - sed -i 's/\([^_a-zA-Z0-9]\?\)RandomForestRegressor\([^_a-zA-Z0-9]\?\)/\1RandomForestRegressorSklearn\2/g' $fil - sed -i 's/\([^_a-zA-Z0-9]\?\)RandomForestClassifier\([^_a-zA-Z0-9]\?\)/\1RandomForestClassifierSklearn\2/g' $fil - sed -i 's/\([^_a-zA-Z0-9]\?\)TruncatedSVD\([^_a-zA-Z0-9]\?\)/\1TruncatedSVDSklearn\2/g' $fil - sed -i 's/\([^_a-zA-Z0-9]\?\)PCA\([^_a-zA-Z0-9]\?\)/\1PCASklearn\2/g' $fil + sed -i 's/\([^_a-zA-Z0-9]\?\)KMeans\([^_a-zA-Z0-9]\?\)/\1KMeansSklearn\2/g' $1 + sed -i 's/\([^_a-zA-Z0-9]\?\)Ridge\([^_a-zA-Z0-9]\?\)/\1RidgeSklearn\2/g' $1 + sed -i 's/\([^_a-zA-Z0-9]\?\)Lasso\([^_a-zA-Z0-9]\?\)/\1LassoSklearn\2/g' $1 + sed -i 's/\([^_a-zA-Z0-9]\?\)LogisticRegression\([^_a-zA-Z0-9]\?\)/\1LogisticRegressionSklearn\2/g' $1 + sed -i 's/\([^_a-zA-Z0-9]\?\)LinearRegression\([^_a-zA-Z0-9]\?\)/\1LinearRegressionSklearn\2/g' $1 + sed -i 's/\([^_a-zA-Z0-9]\?\)ElasticNet\([^_a-zA-Z0-9]\?\)/\1ElasticNetSklearn\2/g' $1 + sed -i 's/\([^_a-zA-Z0-9]\?\)GradientBoostingRegressor\([^_a-zA-Z0-9]\?\)/\1GradientBoostingRegressorSklearn\2/g' $1 + sed -i 's/\([^_a-zA-Z0-9]\?\)GradientBoostingClassifier\([^_a-zA-Z0-9]\?\)/\1GradientBoostingClassifierSklearn\2/g' $1 + sed -i 's/\([^_a-zA-Z0-9]\?\)RandomForestRegressor\([^_a-zA-Z0-9]\?\)/\1RandomForestRegressorSklearn\2/g' $1 + sed -i 's/\([^_a-zA-Z0-9]\?\)RandomForestClassifier\([^_a-zA-Z0-9]\?\)/\1RandomForestClassifierSklearn\2/g' $1 + sed -i 's/\([^_a-zA-Z0-9]\?\)TruncatedSVD\([^_a-zA-Z0-9]\?\)/\1TruncatedSVDSklearn\2/g' $1 + sed -i 's/\([^_a-zA-Z0-9]\?\)PCA\([^_a-zA-Z0-9]\?\)/\1PCASklearn\2/g' $1 # avoid duplicate conversions - sed -i 's/Sklearn_Sklearn/Sklearn/g' $fil + sed -i 's/Sklearn_Sklearn/Sklearn/g' $1 # other replacements - sed -i "s/from \.\. import get_config as _get_config/import os\n_ASSUME_FINITE = bool(os.environ.get('SKLEARN_ASSUME_FINITE', False))\ndef _get_config\(\):\n return \{'assume_finite': _ASSUME_FINITE\}/g" $fil + sed -i "s/from \.\. import get_config as _get_config/import os\n_ASSUME_FINITE = bool(os.environ.get('SKLEARN_ASSUME_FINITE', False))\ndef _get_config\(\):\n return \{'assume_finite': _ASSUME_FINITE\}/g" $1 fi +} + +for fil in $files +do + modify_file $fil & done +wait + cd .. # inject h2o4gpu into scikit-learn diff --git a/run.sh b/scripts/run.sh similarity index 92% rename from run.sh rename to scripts/run.sh index 4bdc7983b..37c02cc81 100755 --- a/run.sh +++ b/scripts/run.sh @@ -5,8 +5,6 @@ set -e -source h2o4gpu_env/bin/activate - if [ "x$1" != "x" ]; then d=$1 cd $d diff --git a/src/config2.mk b/src/config2.mk deleted file mode 100644 index 48d1cdfdb..000000000 --- a/src/config2.mk +++ /dev/null @@ -1,37 +0,0 @@ -location = $(CURDIR)/$(word $(words $(MAKEFILE_LIST)),$(MAKEFILE_LIST)) -WHERE := $(location) -$(info ** -> $(WHERE)) -$(info ** ------------------------------------------------------------------ **) - -NVCC := $(shell command -v nvcc 2> /dev/null) - -#local settings -USENCCL=0 -USENVTX=0 -CMAKE_BUILD_TYPE=0 - -$(warning CMAKE_BUILD_TYPE is $(CMAKE_BUILD_TYPE)) -$(warning USENVTX is $(USENVTX)) -$(warning USENCCL is $(USENCCL)) - -# for R (rest can do both at same time) -#TARGET=gpulib -#$(warning R TARGET is $(TARGET)) - -ifdef NVCC -# CUDA Flags for XGBoost -CUDA_LIB=$(CUDA_HOME)/lib64 -CUDA_VERSION ?= $(shell ls $(CUDA_LIB)/libcudart.so.* | head -1 | rev | cut -d "." -f -2 | rev) -CUDA_MAJOR = $(shell echo $(CUDA_VERSION) | cut -d "." -f 1) -ifeq ($(shell test $(CUDA_MAJOR) -ge 9; echo $$?),0) - $(warning Compiling with Cuda9 or higher) - # >=52 required for kmeans for larger data of size rows/32>2^16 - XGB_CUDA ?= -DGPU_COMPUTE_VER="35;52;60;61;70" -else - $(warning Compiling with Cuda8 or lower) - # >=52 required for kmeans for larger data of size rows/32>2^16 - XGB_CUDA ?= -DGPU_COMPUTE_VER="35;52;60;61" -endif -else -$(warning No CUDA found.) -endif diff --git a/src/interface_py/Makefile b/src/interface_py/Makefile index dc33e1422..69ffcc8d6 100644 --- a/src/interface_py/Makefile +++ b/src/interface_py/Makefile @@ -6,10 +6,12 @@ $(info ** ------------------------------------------------------------------ **) SHELL := /bin/bash # force avoidance of dash as shell thepwd = "$(shell pwd)" +include ../../make/config.mk + default: all pylint: - $$SHELL test.sh + $$SHELL scripts/run-pylint.sh pyformat: @status=0; \ @@ -22,8 +24,8 @@ pyformat: done; \ prep: - PYVER=`python -c 'import sys; print(".".join(map(str, sys.version_info[:2])))' | sed 's/\.//g'` && sed -i 's/python-tag=.*/python-tag=py'$$PYVER'/g' setup.cfg - PYVER=`python -c 'import sys; print(".".join(map(str, sys.version_info[:2])))'` && sed -i 's/python_version==.*/python_version=='$$PYVER'/g' setup.cfg + PYVER=`$(PYTHON) -c 'import sys; print(".".join(map(str, sys.version_info[:2])))' | sed 's/\.//g'` && sed -i 's/python-tag=.*/python-tag=py'$$PYVER'/g' setup.cfg + PYVER=`$(PYTHON) -c 'import sys; print(".".join(map(str, sys.version_info[:2])))'` && sed -i 's/python_version==.*/python_version=='$$PYVER'/g' setup.cfg cd ../../ && bash scripts/apply_sklearn_initmerge.sh # so if change our init it gets updated all: prep @@ -32,7 +34,7 @@ all: prep ln -sf ../../xgboost/python-package/xgboost . ln -sf ../../py3nvml/py3nvml . - python setup.py sdist bdist_wheel + $(PYTHON) setup.py sdist bdist_wheel # update build with xgboost shared library mkdir -p build/lib/xgboost/ @@ -40,7 +42,7 @@ all: prep # Make wheel with other builds added rm -rf dist/*.whl - python setup.py sdist bdist_wheel + $(PYTHON) setup.py sdist bdist_wheel # for pycharm ln -sf $(thepwd)/../interface_c/_ch2o4gpu_cpu.so . @@ -48,14 +50,14 @@ all: prep install: - -cd ../.. && pip uninstall -y h2o4gpu - -cd ../.. && pip uninstall -y xgboost - -cd ../.. && pip uninstall -y py3nvml + -cd ../.. && $(PYTHON) -m pip uninstall -y h2o4gpu + -cd ../.. && $(PYTHON) -m pip uninstall -y xgboost + -cd ../.. && $(PYTHON) -m pip uninstall -y py3nvml find -name *.pyc | xargs rm -rf find -name *__pycache__ | xargs rm -rf # something wrong with below - #python setup.py install - pip install dist/h2o4gpu*.whl --upgrade + #$(PYTHON) setup.py install + $(PYTHON) -m pip install dist/h2o4gpu*.whl --upgrade clean: rm -f h2o4gpu/BUILD_INFO.txt @@ -64,8 +66,7 @@ clean: # This *is* required rm -f h2o4gpu/__init__.py touch h2o4gpu/__init__.py - pip install numpy - python setup.py clean --all && rm -rf h2o4gpu.egg-info && rm -rf h2o4gpu/__pycache__/ && rm -rf dist/ + $(PYTHON) setup.py clean --all && rm -rf h2o4gpu.egg-info && rm -rf h2o4gpu/__pycache__/ && rm -rf dist/ cd h2o4gpu && find -L -name *.pyc | xargs rm -rf diff --git a/requirements_buildonly.txt b/src/interface_py/requirements_buildonly.txt similarity index 84% rename from requirements_buildonly.txt rename to src/interface_py/requirements_buildonly.txt index 053795b33..7a0db6a1f 100644 --- a/requirements_buildonly.txt +++ b/src/interface_py/requirements_buildonly.txt @@ -4,12 +4,7 @@ attrs==17.3.0 execnet==1.5.0 pluggy==0.6.0 py==1.5.2 -pytest==3.3.1 -pytest-forked==0.2 -pytest-xdist==1.20.1 -pytest-cov==2.4.0 six==1.11.0 -pylint==1.8.4 yapf==0.17.0 coverage==4.4.1 # docs @@ -18,7 +13,6 @@ sphinx_rtd_theme==0.2.4 pillow==4.2.1 # compile wheel wheel==0.31.0 -cmake==3.11.0 Cython==0.27.3 # for make testperf h2o==3.18.0.10 diff --git a/requirements_runtime.txt b/src/interface_py/requirements_runtime.txt similarity index 74% rename from requirements_runtime.txt rename to src/interface_py/requirements_runtime.txt index ea8f5e8a6..b4ecd4a38 100644 --- a/requirements_runtime.txt +++ b/src/interface_py/requirements_runtime.txt @@ -11,3 +11,8 @@ psutil==5.4.5 # below for xgboost scikit-learn==0.19.1 sklearn==0.0 +pytest==3.3.1 +pytest-forked==0.2 +pytest-xdist==1.20.1 +pytest-cov==2.4.0 +pylint==1.8.4 diff --git a/requirements_runtime_demos.txt b/src/interface_py/requirements_runtime_demos.txt similarity index 100% rename from requirements_runtime_demos.txt rename to src/interface_py/requirements_runtime_demos.txt diff --git a/src/interface_py/test.sh b/src/interface_py/scripts/run-pylint.sh similarity index 100% rename from src/interface_py/test.sh rename to src/interface_py/scripts/run-pylint.sh diff --git a/src/interface_py/setup.py b/src/interface_py/setup.py index 170bb6f11..4b63cba60 100644 --- a/src/interface_py/setup.py +++ b/src/interface_py/setup.py @@ -66,7 +66,7 @@ def run(self): # reqs is a list of requirement # e.g. ['django==1.5.1', 'mezzanine==1.4.6'] -with open("../../requirements_runtime.txt", "r") as fs: +with open("requirements_runtime.txt", "r") as fs: reqs = [r for r in fs.read().splitlines() if (len(r) > 0 and not r.startswith("#"))] def get_packages(directory): diff --git a/src/interface_r/vignettes/getting_started.Rmd b/src/interface_r/vignettes/getting_started.Rmd index 0ad0d29b1..b90797c0d 100644 --- a/src/interface_r/vignettes/getting_started.Rmd +++ b/src/interface_r/vignettes/getting_started.Rmd @@ -24,7 +24,7 @@ The R package makes use of RStudio's [reticulate](https://rstudio.github.io/reti ## Installation -There are a few [system requirements](https://github.com/h2oai/h2o4gpu#requirements), including Ubuntu 16.04+, Python >=3.6, R >=3.1, CUDA 8 or 9, and a machine with Nvidia GPUs. The code should still run if you have CPUs, but it will fall back to scikit-learn CPU based versions of the algorithms. +There are a few [system requirements](https://github.com/h2oai/h2o4gpu#requirements), including Linux with glibc 2.17+, Python >=3.6, R >=3.1, CUDA 8 or 9, and a machine with Nvidia GPUs. The code should still run if you have CPUs, but it will fall back to scikit-learn CPU based versions of the algorithms. The **h2o4gpu** Python module is a prerequisite for the R package. So first, follow the instructions [here](https://github.com/h2oai/h2o4gpu#user-installation) to install the **h2o4gpu** Python package (either at the system level or in a Python virtual envivonment). The easiest thing to do is to `pip install` the stable release `whl` file. To ensure compatibility, the Python package version number should match the R package version number. diff --git a/src/swig/ch2o4gpu_cpu.i b/src/swig/ch2o4gpu_cpu.i index 60a6e1c76..fbd838bb6 100644 --- a/src/swig/ch2o4gpu_cpu.i +++ b/src/swig/ch2o4gpu_cpu.i @@ -14,4 +14,4 @@ %include "solver/elastic_net.i" %include "solver/pogs.i" %include "matrix/matrix_dense.i" -%include "metrics.i" +%include "metrics.i" \ No newline at end of file diff --git a/tests_big/getresultsbig.sh b/tests/python/big/getresultsbig.sh similarity index 100% rename from tests_big/getresultsbig.sh rename to tests/python/big/getresultsbig.sh diff --git a/tests_big/test_glm_hyatt.py b/tests/python/big/test_glm_hyatt.py similarity index 100% rename from tests_big/test_glm_hyatt.py rename to tests/python/big/test_glm_hyatt.py diff --git a/tests_big/test_glm_ipums.py b/tests/python/big/test_glm_ipums.py similarity index 100% rename from tests_big/test_glm_ipums.py rename to tests/python/big/test_glm_ipums.py diff --git a/tests_open/daal/test_daal_normalization.py b/tests/python/open_data/daal/test_daal_normalization.py similarity index 100% rename from tests_open/daal/test_daal_normalization.py rename to tests/python/open_data/daal/test_daal_normalization.py diff --git a/tests_open/daal/test_daal_regression.py b/tests/python/open_data/daal/test_daal_regression.py similarity index 100% rename from tests_open/daal/test_daal_regression.py rename to tests/python/open_data/daal/test_daal_regression.py diff --git a/tests_open/daal/test_daal_ridge_regression.py b/tests/python/open_data/daal/test_daal_ridge_regression.py similarity index 100% rename from tests_open/daal/test_daal_ridge_regression.py rename to tests/python/open_data/daal/test_daal_ridge_regression.py diff --git a/tests_open/daal/test_daal_svd.py b/tests/python/open_data/daal/test_daal_svd.py similarity index 100% rename from tests_open/daal/test_daal_svd.py rename to tests/python/open_data/daal/test_daal_svd.py diff --git a/tests_open/gbm/model_saved.pkl b/tests/python/open_data/gbm/model_saved.pkl similarity index 100% rename from tests_open/gbm/model_saved.pkl rename to tests/python/open_data/gbm/model_saved.pkl diff --git a/tests_open/gbm/test_gpu_prediction_pickledmodel.py b/tests/python/open_data/gbm/test_gpu_prediction_pickledmodel.py similarity index 99% rename from tests_open/gbm/test_gpu_prediction_pickledmodel.py rename to tests/python/open_data/gbm/test_gpu_prediction_pickledmodel.py index f0c257551..7e15fba1b 100644 --- a/tests_open/gbm/test_gpu_prediction_pickledmodel.py +++ b/tests/python/open_data/gbm/test_gpu_prediction_pickledmodel.py @@ -227,7 +227,7 @@ def test_predict_sklearn_frompickle(self): Xtest = makeXtest() # load model - model = load_obj("./tests_open/gbm/model_saved.pkl") + model = load_obj("./tests/python/open_data/gbm/model_saved.pkl") # continue as before print("Before model.predict") diff --git a/tests_open/gbm/test_xgb_sklearn_wrapper.py b/tests/python/open_data/gbm/test_xgb_sklearn_wrapper.py similarity index 100% rename from tests_open/gbm/test_xgb_sklearn_wrapper.py rename to tests/python/open_data/gbm/test_xgb_sklearn_wrapper.py diff --git a/tests_open/gbm/test_xgboost.py b/tests/python/open_data/gbm/test_xgboost.py similarity index 100% rename from tests_open/gbm/test_xgboost.py rename to tests/python/open_data/gbm/test_xgboost.py diff --git a/tests_open/gbm/test_xgboost_dtinput.py b/tests/python/open_data/gbm/test_xgboost_dtinput.py similarity index 100% rename from tests_open/gbm/test_xgboost_dtinput.py rename to tests/python/open_data/gbm/test_xgboost_dtinput.py diff --git a/tests_open/getresults.sh b/tests/python/open_data/getresults.sh similarity index 100% rename from tests_open/getresults.sh rename to tests/python/open_data/getresults.sh diff --git a/tests_open/glm/test_elastic_net_ptr_driver.py b/tests/python/open_data/glm/test_elastic_net_ptr_driver.py similarity index 100% rename from tests_open/glm/test_elastic_net_ptr_driver.py rename to tests/python/open_data/glm/test_elastic_net_ptr_driver.py diff --git a/tests_open/glm/test_elastic_net_sklearn.py b/tests/python/open_data/glm/test_elastic_net_sklearn.py similarity index 100% rename from tests_open/glm/test_elastic_net_sklearn.py rename to tests/python/open_data/glm/test_elastic_net_sklearn.py diff --git a/tests_open/glm/test_elasticnet_sklearn_wrapper.py b/tests/python/open_data/glm/test_elasticnet_sklearn_wrapper.py similarity index 100% rename from tests_open/glm/test_elasticnet_sklearn_wrapper.py rename to tests/python/open_data/glm/test_elasticnet_sklearn_wrapper.py diff --git a/tests_open/glm/test_glm_credit.py b/tests/python/open_data/glm/test_glm_credit.py similarity index 100% rename from tests_open/glm/test_glm_credit.py rename to tests/python/open_data/glm/test_glm_credit.py diff --git a/tests_open/glm/test_glm_np_input.py b/tests/python/open_data/glm/test_glm_np_input.py similarity index 100% rename from tests_open/glm/test_glm_np_input.py rename to tests/python/open_data/glm/test_glm_np_input.py diff --git a/tests_open/glm/test_glm_simple.py b/tests/python/open_data/glm/test_glm_simple.py similarity index 100% rename from tests_open/glm/test_glm_simple.py rename to tests/python/open_data/glm/test_glm_simple.py diff --git a/tests_open/glm/test_glm_sklearn.py b/tests/python/open_data/glm/test_glm_sklearn.py similarity index 100% rename from tests_open/glm/test_glm_sklearn.py rename to tests/python/open_data/glm/test_glm_sklearn.py diff --git a/tests_open/glm/test_lasso.py b/tests/python/open_data/glm/test_lasso.py similarity index 100% rename from tests_open/glm/test_lasso.py rename to tests/python/open_data/glm/test_lasso.py diff --git a/tests_open/glm/test_lasso_sklearn_wrapper.py b/tests/python/open_data/glm/test_lasso_sklearn_wrapper.py similarity index 100% rename from tests_open/glm/test_lasso_sklearn_wrapper.py rename to tests/python/open_data/glm/test_lasso_sklearn_wrapper.py diff --git a/tests_open/glm/test_lasso_sparsity.py b/tests/python/open_data/glm/test_lasso_sparsity.py similarity index 100% rename from tests_open/glm/test_lasso_sparsity.py rename to tests/python/open_data/glm/test_lasso_sparsity.py diff --git a/tests_open/glm/test_logistic_credit.py b/tests/python/open_data/glm/test_logistic_credit.py similarity index 100% rename from tests_open/glm/test_logistic_credit.py rename to tests/python/open_data/glm/test_logistic_credit.py diff --git a/tests_open/glm/test_logistic_iris.py b/tests/python/open_data/glm/test_logistic_iris.py similarity index 100% rename from tests_open/glm/test_logistic_iris.py rename to tests/python/open_data/glm/test_logistic_iris.py diff --git a/tests_open/glm/test_logistic_sklearn_wrapper.py b/tests/python/open_data/glm/test_logistic_sklearn_wrapper.py similarity index 100% rename from tests_open/glm/test_logistic_sklearn_wrapper.py rename to tests/python/open_data/glm/test_logistic_sklearn_wrapper.py diff --git a/tests_open/glm/test_memory_leak_check.py b/tests/python/open_data/glm/test_memory_leak_check.py similarity index 100% rename from tests_open/glm/test_memory_leak_check.py rename to tests/python/open_data/glm/test_memory_leak_check.py diff --git a/tests_open/glm/test_regression_sklearn_wrapper.py b/tests/python/open_data/glm/test_regression_sklearn_wrapper.py similarity index 100% rename from tests_open/glm/test_regression_sklearn_wrapper.py rename to tests/python/open_data/glm/test_regression_sklearn_wrapper.py diff --git a/tests_open/glm/test_ridge.py b/tests/python/open_data/glm/test_ridge.py similarity index 100% rename from tests_open/glm/test_ridge.py rename to tests/python/open_data/glm/test_ridge.py diff --git a/tests_open/glm/test_ridge_sklearn_wrapper.py b/tests/python/open_data/glm/test_ridge_sklearn_wrapper.py similarity index 100% rename from tests_open/glm/test_ridge_sklearn_wrapper.py rename to tests/python/open_data/glm/test_ridge_sklearn_wrapper.py diff --git a/tests_open/kmeans/test_kmeans.py b/tests/python/open_data/kmeans/test_kmeans.py similarity index 100% rename from tests_open/kmeans/test_kmeans.py rename to tests/python/open_data/kmeans/test_kmeans.py diff --git a/tests_open/showresults.sh b/tests/python/open_data/showresults.sh similarity index 100% rename from tests_open/showresults.sh rename to tests/python/open_data/showresults.sh diff --git a/tests_open/svd/test_pca.py b/tests/python/open_data/svd/test_pca.py similarity index 100% rename from tests_open/svd/test_pca.py rename to tests/python/open_data/svd/test_pca.py diff --git a/tests_open/svd/test_tsvd.py b/tests/python/open_data/svd/test_tsvd.py similarity index 100% rename from tests_open/svd/test_tsvd.py rename to tests/python/open_data/svd/test_tsvd.py diff --git a/tests_open/svd/test_tsvd_bench.py b/tests/python/open_data/svd/test_tsvd_bench.py similarity index 100% rename from tests_open/svd/test_tsvd_bench.py rename to tests/python/open_data/svd/test_tsvd_bench.py diff --git a/tests_open/svd/test_tsvd_gpuid.py b/tests/python/open_data/svd/test_tsvd_gpuid.py similarity index 100% rename from tests_open/svd/test_tsvd_gpuid.py rename to tests/python/open_data/svd/test_tsvd_gpuid.py diff --git a/tests_open/svd/test_tsvd_power.py b/tests/python/open_data/svd/test_tsvd_power.py similarity index 100% rename from tests_open/svd/test_tsvd_power.py rename to tests/python/open_data/svd/test_tsvd_power.py diff --git a/tests_open/svd/test_tsvd_wrapper.py b/tests/python/open_data/svd/test_tsvd_wrapper.py similarity index 100% rename from tests_open/svd/test_tsvd_wrapper.py rename to tests/python/open_data/svd/test_tsvd_wrapper.py diff --git a/tests_open/svd/test_tsvd_wrapper_iris.py b/tests/python/open_data/svd/test_tsvd_wrapper_iris.py similarity index 100% rename from tests_open/svd/test_tsvd_wrapper_iris.py rename to tests/python/open_data/svd/test_tsvd_wrapper_iris.py diff --git a/tests_open/svd/test_tsvd_wrapper_options.py b/tests/python/open_data/svd/test_tsvd_wrapper_options.py similarity index 100% rename from tests_open/svd/test_tsvd_wrapper_options.py rename to tests/python/open_data/svd/test_tsvd_wrapper_options.py diff --git a/tests_open/svd/test_tsvd_x_transformed.py b/tests/python/open_data/svd/test_tsvd_x_transformed.py similarity index 100% rename from tests_open/svd/test_tsvd_x_transformed.py rename to tests/python/open_data/svd/test_tsvd_x_transformed.py diff --git a/tests_open/system/test_import.py b/tests/python/open_data/system/test_import.py similarity index 100% rename from tests_open/system/test_import.py rename to tests/python/open_data/system/test_import.py diff --git a/tests_open/system/test_metrics.py b/tests/python/open_data/system/test_metrics.py similarity index 100% rename from tests_open/system/test_metrics.py rename to tests/python/open_data/system/test_metrics.py diff --git a/tests_small/test-LinearModels.ipynb b/tests/python/small/test-LinearModels.ipynb similarity index 100% rename from tests_small/test-LinearModels.ipynb rename to tests/python/small/test-LinearModels.ipynb diff --git a/tests_small/test_glm_hyatt.py b/tests/python/small/test_glm_hyatt.py similarity index 100% rename from tests_small/test_glm_hyatt.py rename to tests/python/small/test_glm_hyatt.py diff --git a/tests_small/test_glm_ipums.py b/tests/python/small/test_glm_ipums.py similarity index 100% rename from tests_small/test_glm_ipums.py rename to tests/python/small/test_glm_ipums.py diff --git a/tests_small/test_glm_paribas.py b/tests/python/small/test_glm_paribas.py similarity index 100% rename from tests_small/test_glm_paribas.py rename to tests/python/small/test_glm_paribas.py diff --git a/testsxgboost/01_airline_GPU.py b/tests/python/xgboost/01_airline_GPU.py similarity index 100% rename from testsxgboost/01_airline_GPU.py rename to tests/python/xgboost/01_airline_GPU.py diff --git a/testsxgboost/03_football_GPU.py b/tests/python/xgboost/03_football_GPU.py similarity index 100% rename from testsxgboost/03_football_GPU.py rename to tests/python/xgboost/03_football_GPU.py diff --git a/testsxgboost/04_PlanetKaggle_GPU.py b/tests/python/xgboost/04_PlanetKaggle_GPU.py similarity index 100% rename from testsxgboost/04_PlanetKaggle_GPU.py rename to tests/python/xgboost/04_PlanetKaggle_GPU.py diff --git a/testsxgboost/05_FraudDetection_GPU.py b/tests/python/xgboost/05_FraudDetection_GPU.py similarity index 100% rename from testsxgboost/05_FraudDetection_GPU.py rename to tests/python/xgboost/05_FraudDetection_GPU.py diff --git a/testsxgboost/06_HIGGS_GPU.py b/tests/python/xgboost/06_HIGGS_GPU.py similarity index 100% rename from testsxgboost/06_HIGGS_GPU.py rename to tests/python/xgboost/06_HIGGS_GPU.py diff --git a/testsxgboost/extractjson.py b/tests/python/xgboost/extractjson.py similarity index 100% rename from testsxgboost/extractjson.py rename to tests/python/xgboost/extractjson.py diff --git a/tests/python/xgboost/extracttestxgboost.sh b/tests/python/xgboost/extracttestxgboost.sh new file mode 100644 index 000000000..1ac5253c3 --- /dev/null +++ b/tests/python/xgboost/extracttestxgboost.sh @@ -0,0 +1,19 @@ +# get path +MYPWD=`pwd` +echo "PWD is $MYPWD" +export RESULTS_DIR=$MYPWD/results + +# collect only required data +grep -B 2 -A 9 performance $RESULTS_DIR/football.txt > $RESULTS_DIR/football_acc_perf.json +grep -B 2 -A 10 performance $RESULTS_DIR/credit.txt > $RESULTS_DIR/credit_acc_perf.json # also has AUC +grep -B 2 -A 8 performance $RESULTS_DIR/airlines.txt > $RESULTS_DIR/airlines_acc_perf.json +grep -B 2 -A 8 performance $RESULTS_DIR/planet.txt > $RESULTS_DIR/planet_acc_perf.json +grep -B 2 -A 8 performance $RESULTS_DIR/higgs.txt > $RESULTS_DIR/higgs_acc_perf.json + +# extract results out of the json +python tests/python/xgboost/extractjson.py test_gbm_football $RESULTS_DIR $RESULTS_DIR/football_acc_perf.json $RESULTS_DIR/test_gbm_football.error.dat $RESULTS_DIR/test_gbm_football.error.h2o.dat $RESULTS_DIR/test_gbm_football.error.h2o4gpu.dat $RESULTS_DIR/test_gbm_football.time.dat $RESULTS_DIR/test_gbm_football.time.h2o.dat $RESULTS_DIR/test_gbm_football.time.h2o4gpu.dat +python tests/python/xgboost/extractjson.py test_gbm_credit $RESULTS_DIR $RESULTS_DIR/credit_acc_perf.json $RESULTS_DIR/test_gbm_credit.error.dat $RESULTS_DIR/test_gbm_credit.error.h2o.dat $RESULTS_DIR/test_gbm_credit.error.h2o4gpu.dat $RESULTS_DIR/test_gbm_credit.time.dat $RESULTS_DIR/test_gbm_credit.time.h2o.dat $RESULTS_DIR/test_gbm_credit.time.h2o4gpu.dat +python tests/python/xgboost/extractjson.py test_gbm_airlines $RESULTS_DIR $RESULTS_DIR/airlines_acc_perf.json $RESULTS_DIR/test_gbm_airlines.error.dat $RESULTS_DIR/test_gbm_airlines.error.h2o.dat $RESULTS_DIR/test_gbm_airlines.error.h2o4gpu.dat $RESULTS_DIR/test_gbm_airlines.time.dat $RESULTS_DIR/test_gbm_airlines.time.h2o.dat $RESULTS_DIR/test_gbm_airlines.time.h2o4gpu.dat +python tests/python/xgboost/extractjson.py test_gbm_planet $RESULTS_DIR $RESULTS_DIR/planet_acc_perf.json $RESULTS_DIR/test_gbm_planet.error.dat $RESULTS_DIR/test_gbm_planet.error.h2o.dat $RESULTS_DIR/test_gbm_planet.error.h2o4gpu.dat $RESULTS_DIR/test_gbm_planet.time.dat $RESULTS_DIR/test_gbm_planet.time.h2o.dat $RESULTS_DIR/test_gbm_planet.time.h2o4gpu.dat +python tests/python/xgboost/extractjson.py test_gbm_higgs $RESULTS_DIR $RESULTS_DIR/higgs_acc_perf.json $RESULTS_DIR/test_gbm_higgs.error.dat $RESULTS_DIR/test_gbm_higgs.error.h2o.dat $RESULTS_DIR/test_gbm_higgs.error.h2o4gpu.dat $RESULTS_DIR/test_gbm_higgs.time.dat $RESULTS_DIR/test_gbm_higgs.time.h2o.dat $RESULTS_DIR/test_gbm_higgs.time.h2o4gpu.dat + diff --git a/testsxgboost/libs/conversion.py b/tests/python/xgboost/libs/conversion.py similarity index 100% rename from testsxgboost/libs/conversion.py rename to tests/python/xgboost/libs/conversion.py diff --git a/testsxgboost/libs/football.py b/tests/python/xgboost/libs/football.py similarity index 100% rename from testsxgboost/libs/football.py rename to tests/python/xgboost/libs/football.py diff --git a/testsxgboost/libs/loaders.py b/tests/python/xgboost/libs/loaders.py similarity index 100% rename from testsxgboost/libs/loaders.py rename to tests/python/xgboost/libs/loaders.py diff --git a/testsxgboost/libs/metrics.py b/tests/python/xgboost/libs/metrics.py similarity index 100% rename from testsxgboost/libs/metrics.py rename to tests/python/xgboost/libs/metrics.py diff --git a/testsxgboost/libs/notebook_memory_management.py b/tests/python/xgboost/libs/notebook_memory_management.py similarity index 100% rename from testsxgboost/libs/notebook_memory_management.py rename to tests/python/xgboost/libs/notebook_memory_management.py diff --git a/testsxgboost/libs/planet_kaggle.py b/tests/python/xgboost/libs/planet_kaggle.py similarity index 100% rename from testsxgboost/libs/planet_kaggle.py rename to tests/python/xgboost/libs/planet_kaggle.py diff --git a/testsxgboost/libs/timer.py b/tests/python/xgboost/libs/timer.py similarity index 100% rename from testsxgboost/libs/timer.py rename to tests/python/xgboost/libs/timer.py diff --git a/testsxgboost/libs/utils.py b/tests/python/xgboost/libs/utils.py similarity index 100% rename from testsxgboost/libs/utils.py rename to tests/python/xgboost/libs/utils.py diff --git a/testsxgboost/runtestxgboost.sh b/tests/python/xgboost/runtestxgboost.sh similarity index 92% rename from testsxgboost/runtestxgboost.sh rename to tests/python/xgboost/runtestxgboost.sh index 68fc9e1c9..b1532042c 100755 --- a/testsxgboost/runtestxgboost.sh +++ b/tests/python/xgboost/runtestxgboost.sh @@ -17,7 +17,7 @@ then cd $MOUNT_POINT/football/ unzip -o soccer.zip cd ../../ -cd testsxgboost # for libs stuff +cd tests/python/xgboost # for libs stuff ipython 03_football_GPU.py &> $RESULTS_DIR/football.txt # py from export of ipynb removing inline commands cd $MYPWD @@ -27,7 +27,7 @@ if [ $runtests -eq 1 ] || [ $runtests -eq -1 ] then # run credit -cd testsxgboost # for libs stuff +cd tests/python/xgboost # for libs stuff ipython 05_FraudDetection_GPU.py &> $RESULTS_DIR/credit.txt # py from export of ipynb removing inline commands cd $MYPWD @@ -37,7 +37,7 @@ if [ $runtests -eq 2 ] || [ $runtests -eq -1 ] then # run airlines -cd testsxgboost # for libs stuff +cd tests/python/xgboost # for libs stuff ipython 01_airline_GPU.py &> $RESULTS_DIR/airlines.txt # py from export of ipynb removing inline commands cd $MYPWD @@ -62,7 +62,7 @@ rm -rf validate-jpg #mkdir -p validate-jpg #cp -a test-jpg/*.jpg validate-jpg/ cd ../../ -cd testsxgboost # for libs stuff +cd tests/python/xgboost # for libs stuff ipython 04_PlanetKaggle_GPU.py &> $RESULTS_DIR/planet.txt # py from export of ipynb removing inline commands cd $MYPWD @@ -72,7 +72,7 @@ if [ $runtests -eq 4 ] || [ $runtests -eq -1 ] then # run higgs -cd testsxgboost # for libs stuff +cd tests/python/xgboost # for libs stuff ipython 06_HIGGS_GPU.py &> $RESULTS_DIR/higgs.txt # py from export of ipynb removing inline commands cd $MYPWD diff --git a/testsxgboost/extracttestxgboost.sh b/testsxgboost/extracttestxgboost.sh deleted file mode 100644 index cc0a15b7c..000000000 --- a/testsxgboost/extracttestxgboost.sh +++ /dev/null @@ -1,19 +0,0 @@ -# get path -MYPWD=`pwd` -echo "PWD is $MYPWD" -export RESULTS_DIR=$MYPWD/results - -# collect only required data -grep -B 2 -A 9 performance $RESULTS_DIR/football.txt > $RESULTS_DIR/football_acc_perf.json -grep -B 2 -A 10 performance $RESULTS_DIR/credit.txt > $RESULTS_DIR/credit_acc_perf.json # also has AUC -grep -B 2 -A 8 performance $RESULTS_DIR/airlines.txt > $RESULTS_DIR/airlines_acc_perf.json -grep -B 2 -A 8 performance $RESULTS_DIR/planet.txt > $RESULTS_DIR/planet_acc_perf.json -grep -B 2 -A 8 performance $RESULTS_DIR/higgs.txt > $RESULTS_DIR/higgs_acc_perf.json - -# extract results out of the json -python testsxgboost/extractjson.py test_gbm_football $RESULTS_DIR $RESULTS_DIR/football_acc_perf.json $RESULTS_DIR/test_gbm_football.error.dat $RESULTS_DIR/test_gbm_football.error.h2o.dat $RESULTS_DIR/test_gbm_football.error.h2o4gpu.dat $RESULTS_DIR/test_gbm_football.time.dat $RESULTS_DIR/test_gbm_football.time.h2o.dat $RESULTS_DIR/test_gbm_football.time.h2o4gpu.dat -python testsxgboost/extractjson.py test_gbm_credit $RESULTS_DIR $RESULTS_DIR/credit_acc_perf.json $RESULTS_DIR/test_gbm_credit.error.dat $RESULTS_DIR/test_gbm_credit.error.h2o.dat $RESULTS_DIR/test_gbm_credit.error.h2o4gpu.dat $RESULTS_DIR/test_gbm_credit.time.dat $RESULTS_DIR/test_gbm_credit.time.h2o.dat $RESULTS_DIR/test_gbm_credit.time.h2o4gpu.dat -python testsxgboost/extractjson.py test_gbm_airlines $RESULTS_DIR $RESULTS_DIR/airlines_acc_perf.json $RESULTS_DIR/test_gbm_airlines.error.dat $RESULTS_DIR/test_gbm_airlines.error.h2o.dat $RESULTS_DIR/test_gbm_airlines.error.h2o4gpu.dat $RESULTS_DIR/test_gbm_airlines.time.dat $RESULTS_DIR/test_gbm_airlines.time.h2o.dat $RESULTS_DIR/test_gbm_airlines.time.h2o4gpu.dat -python testsxgboost/extractjson.py test_gbm_planet $RESULTS_DIR $RESULTS_DIR/planet_acc_perf.json $RESULTS_DIR/test_gbm_planet.error.dat $RESULTS_DIR/test_gbm_planet.error.h2o.dat $RESULTS_DIR/test_gbm_planet.error.h2o4gpu.dat $RESULTS_DIR/test_gbm_planet.time.dat $RESULTS_DIR/test_gbm_planet.time.h2o.dat $RESULTS_DIR/test_gbm_planet.time.h2o4gpu.dat -python testsxgboost/extractjson.py test_gbm_higgs $RESULTS_DIR $RESULTS_DIR/higgs_acc_perf.json $RESULTS_DIR/test_gbm_higgs.error.dat $RESULTS_DIR/test_gbm_higgs.error.h2o.dat $RESULTS_DIR/test_gbm_higgs.error.h2o4gpu.dat $RESULTS_DIR/test_gbm_higgs.time.dat $RESULTS_DIR/test_gbm_higgs.time.h2o.dat $RESULTS_DIR/test_gbm_higgs.time.h2o4gpu.dat - diff --git a/testsxgboost/libs/__init__.py b/testsxgboost/libs/__init__.py deleted file mode 100755 index e69de29bb..000000000