diff --git a/.gitignore b/.gitignore
index bc1953a5c..a5c67e360 100644
--- a/.gitignore
+++ b/.gitignore
@@ -28,9 +28,9 @@ src/pylint.d/*
 src/interface_py/py3nvml
 src/interface_py/xgboost
 deps/
-data/
-open_data/
-smalldata/
+./data/
+./open_data/
+./smalldata/
 tests/smalldata
 tests/data
 *.idea/
@@ -72,7 +72,7 @@ tests/results
 examples/py/sourced.png
 
 # temp data generated
-testsxgboost/fifa_data.pk
+tests/python/xgboost/fifa_data.pk
 examples/py/ipums_feather
 
 # Visual Studio Code
@@ -157,7 +157,7 @@ src/interface_py/h2o4gpu/utils/
 
 ## add submodules so user has to force to add/commit
 cub
-xgboost
+./xgboost
 py3nvml
 scikit-learn
 tests/googletest
diff --git a/CMakeLists.txt b/CMakeLists.txt
index 76b29c218..c88dd0502 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -51,14 +51,13 @@ FILE(GLOB_RECURSE COMMON_SOURCES
 INCLUDE_DIRECTORIES(
         src/include
         src/cpu/include
+        # Here and not in target_include_directories b/c cmake < 3.7 which we use in Dockerfiles does not support it
+        src/gpu/include
         ${PYTHON_INCLUDE_PATH}
         ${PYTHON_INCLUDE_PATH_CUST}
 )
 
 ADD_LIBRARY(commonh2o4gpu OBJECT ${COMMON_SOURCES})
-TARGET_INCLUDE_DIRECTORIES (commonh2o4gpu PUBLIC
-        src/include
-        )
 #============= BUILD COMMON CPU/GPU CODE
 
 #============= BUILD CPU LIBRARY
@@ -140,9 +139,6 @@ if(USE_CUDA)
                 ${BLAS_LIBRARIES}
                 ${NVTX_LIBRARY}
                 ${NVML_LIBRARY})
-        TARGET_INCLUDE_DIRECTORIES (gpuh2o4gpu PUBLIC
-                src/gpu/include
-                )
         #============= BUILD GPU LIBRARY
 
         #============= GPU SWIG
diff --git a/DEVEL.md b/DEVEL.md
index 2e19d9a85..19d93766d 100644
--- a/DEVEL.md
+++ b/DEVEL.md
@@ -102,7 +102,7 @@ git clone https://github.com/h2oai/xgboost
 cd xgboost
 git checkout h2oai
 make -f Makefile2
-pip install python-package/dist/xgboost-0.7-py3-none-any.whl --upgrade
+pip install python-package/dist/xgboost-0.71-py3-none-any.whl --upgrade
 ```
 Note: By default the GPU NCCL version is installed using your local cuda version.
 
@@ -111,6 +111,8 @@ If fully understand build, can do jump to latter steps of
 
 ## Build flags and options:
 
+To find a full list of used flags and options please refer to `make/config.mk`. Here are the most useful ones:
+
 ##### Debug mode
 
 To build the code in debug mode set `CMAKE_BUILD_TYPE=Debug` when building e.g. `make fullinstall CMAKE_BUILD_TYPE=Debug`.
@@ -123,6 +125,10 @@ To enable `nvToolsExt` set the `USENVTX` variable e.g. `make fullinstall USENVTX
 
 To expedite the building process in dev setup you can set `DEV_BUILD=ON` e.g. `make fullinstall DEV_BUILD=ON`. This will build the binary with only single CUDA compute capability (currently 6.1).
 
+##### NCCL
+
+Currently only XGBoost part of the project is using NCCL. By default NCCL support is ON during builds. To turn it off run with `USENCCL=0` e.g. `make fullinstall USENCCL=0`
+
 ## Testing
 
 - test python package
diff --git a/Dockerfile-build b/Dockerfile-build
index 125a11d09..4dfbdc656 100644
--- a/Dockerfile-build
+++ b/Dockerfile-build
@@ -1,100 +1,174 @@
-# Copy this file and Run from one level higher than the git pull directory
-# To build: docker build -t opsh2oai/h2oai-nv -f Dockerfile-nvdocker .
-# To run with docker run -it -p 12345:12345 opsh2oai/h2oai-nv
+ARG docker_name
 
-ARG layer
-ARG version
-
-FROM $layer:$version
-# e.g. FROM ubuntu:latest
-
-RUN echo $layer
+FROM $docker_name
 
 MAINTAINER H2o.ai <ops@h2o.ai>
 
-ENV DEBIAN_FRONTEND noninteractive
+#
+# Env variables for CUDA. Necessary because certain systems don't support nvidia-docker so we should use plain docker as much as possible.
+#
 ENV HOME=/root
-ENV PYENV_ROOT=$HOME/.pyenv
-ENV PATH=$PYENV_ROOT/bin:$PATH
-
-RUN if $layer -ne "ubuntu"; then export CUDA_HOME=/usr/local/cuda && \
-	export PATH=/usr/local/cuda/bin:$PATH && \
-	export LD_LIBRARY_PATH_MORE=/home/$USER/lib/:$CUDA_HOME/lib64/:$CUDA_HOME/lib/:/usr/local/cuda/lib64:/usr/local/cuda/extras/CUPTI/lib64:/usr/local/nvidia/lib:/usr/local/nvidia/lib64 && \
-	export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:$LD_LIBRARY_PATH_MORE && \
-	export CUDADIR=/usr/local/cuda/include/ && \
-	export OMP_NUM_THREADS=32 && \
-	export MKL_NUM_THREADS=32 && \
-	export VECLIB_MAXIMUM_THREADS=32; fi
+ENV CUDA_HOME=/usr/local/cuda
+ENV CUDADIR=/usr/local/cuda/include/
+ENV PATH=/usr/local/cuda/bin:$PATH
+ENV LD_LIBRARY_PATH_CUDA=$CUDA_HOME/lib64/:$CUDA_HOME/lib/:/usr/local/cuda/lib64:/usr/local/cuda/extras/CUPTI/lib64:/usr/local/nvidia/lib:/usr/local/nvidia/lib64
+ENV LD_LIBRARY_PATH_BUILD=/lib64:/usr/local/lib64:/home/$USER/lib/
+ENV LD_LIBRARY_PATH=$LD_LIBRARY_PATH_CUDA:$LD_LIBRARY_PATH_BUILD:$LD_LIBRARY_PATH
 
-# Symlinks for NVML
-RUN \
-    export NVIDIA_DRIVER=$(grep "Module" /proc/driver/nvidia/version | cut -f 9 -d' ' | cut -f 1 -d '.') && \
-    mkdir -p /usr/lib/nvidia-$NVIDIA_DRIVER && \
-    ln -s /usr/local/nvidia/lib64/libnvidia-ml.so.1 /usr/lib/nvidia-$NVIDIA_DRIVER/libnvidia-ml.so
+#
+# Env variables used by the codebase.
+#
+ENV OMP_NUM_THREADS=32
+ENV MKL_NUM_THREADS=32
+ENV VECLIB_MAXIMUM_THREADS=32
 
-# add-apt-repository ppa:fkrull/deadsnakes  && \
+#
+# Library versions
+#
+ENV MINICONDA_VERSION=4.4.10
+ENV SWIG_VERSION=3.0.12
+ENV PILLOW_VERSION=4.2.1
+ENV GIT_VERSION=2.17.0
 
-# Setup Repos
-RUN \
-  apt-get update -y && \
-  apt-get -y install curl apt-utils python-software-properties \
-  software-properties-common iputils-ping wget cpio net-tools build-essential \
-  git zip dirmngr && \
-  apt-get -y --no-install-recommends  install \
-      python3-dateutil python3-magic s3cmd && \
-  wget http://launchpadlibrarian.net/326935544/s3cmd_2.0.0-1_all.deb && \
-  dpkg -i s3cmd_2.0.0-1_all.deb && \
-  add-apt-repository ppa:jonathonf/python-3.6 && \
-  apt-get update -yqq && \
-  curl -sL https://deb.nodesource.com/setup_7.x | bash - && \
-  apt-get -y --no-install-recommends  install \
-    python3.6 \
-    python3.6-dev \
-    python3-pip \
-    python3-setuptools \
-    python3-wheel && \
-  update-alternatives --install /usr/bin/python python /usr/bin/python3.6 100 && \
-  python -m pip install --upgrade pip && \
-  apt-get clean && \
-  rm -rf /var/cache/apt/* && \
-  apt-get install -y libopenblas-dev axel && \
-  apt-get install -y make build-essential libssl-dev zlib1g-dev libbz2-dev libreadline-dev libsqlite3-dev wget curl llvm libncurses5-dev libncursesw5-dev xz-utils tk-dev swig
+#
+# Install necessary libraries and dependencies
+#
+RUN yum install -y epel-release
+
+# Setup gcc etc.
+RUN yum install -y gcc gcc-c++ libgcc libstdc++ libgomp glibc
+
+# Git requirements
+RUN yum install -y libcurl-devel zlib-devel asciidoc xmlto wget make autoconf gettext
 
+# Compile from source because yum's latest version is 1.8.3
+# --depth for submodule update which we use was added in 1.8.4
 RUN \
-    mkdir -p .pylint.d && \
-    rm -rf ~/.pyenv && \
-    git clone https://github.com/pyenv/pyenv.git ~/.pyenv && \
-    eval "$(/root/.pyenv/bin/pyenv init -)" && \
-    CONFIGURE_OPTS=--enable-shared /root/.pyenv/bin/pyenv install 3.6.1 && \
-    CONFIGURE_OPTS=--enable-shared /root/.pyenv/bin/pyenv global 3.6.1 && \
-    pip install setuptools --no-cache-dir
-
-# Install Daal library
-COPY scripts/daal/install_daal.sh scripts/daal/install_daal.sh
+    wget https://www.kernel.org/pub/software/scm/git/git-${GIT_VERSION}.tar.xz && \
+    tar xf git-${GIT_VERSION}.tar.xz && \
+    cd git-${GIT_VERSION} && \
+    make configure && \
+    ./configure --prefix=/usr && \
+    make all && \
+    make install;
+
+# H2O4GPU requirements + util programs
+RUN yum install -y \
+    ncurses-devel \
+        bzip2 \
+        which \
+        axel \
+        cmake3 \
+        openssl-devel \
+        libpng-devel \
+        freetype-devel \
+        blas-devel \
+        openblas-devel && \
+    wget https://repo.continuum.io/miniconda/Miniconda3-${MINICONDA_VERSION}-Linux-`arch`.sh && \
+    bash Miniconda3-${MINICONDA_VERSION}-Linux-`arch`.sh -b -p /opt/h2oai/h2o4gpu/python && \
+    wget https://s3.amazonaws.com/artifacts.h2o.ai/releases/ai/h2o/dai-thirdparty-deps-llvm/1.0-master-21/`arch`-centos7/llvm.tar.bz2 && \
+    tar xvf llvm.tar.bz2 && \
+    cp -r llvm/* /opt/h2oai/h2o4gpu/ && \
+    rm -rf llvm*
+
+ENV LLVM4=/opt/h2oai/h2o4gpu
+ENV PATH=/opt/h2oai/h2o4gpu/python/bin:$PATH
+ENV PATH=/usr/local/bin:$PATH
+ENV PATH=$LLVM4/bin:$PATH
+ENV LD_LIBRARY_PATH=$LLVM4/lib:$LD_LIBRARY_PATH
+
+#
+# Symlinks
+#
+
+# AR for conda
+RUN ln /usr/bin/ar $LLVM4/bin/`arch`-conda_cos6-linux-gnu-ar
+
+# CentOS' yum install cmake has only 2.X so need to install cmake3 and make a symlink
+RUN ln -s /usr/bin/cmake3 /usr/bin/cmake
+
+# Symlinks for Python libs used by SWIG in CMake - it does not recognize Miniconda paths otherwise
 RUN \
-	chmod +x scripts/daal/install_daal.sh && \
-	scripts/daal/install_daal.sh
+    mkdir -p /usr/lib64/ && \
+    ln -s /opt/h2oai/h2o4gpu/python/lib/*python* /usr/lib64/ && \
+    mkdir -p /usr/include/python3.6m && \
+    ln -s /opt/h2oai/h2o4gpu/python/include/python3.6m/* /usr/include/python3.6m
+
+# Yumming openblas puts some files in a not-so-standard locations
+RUN ln -s /usr/include/openblas/* /usr/local/include/
 
-COPY requirements_buildonly.txt requirements_buildonly.txt
-COPY requirements_runtime.txt requirements_runtime.txt
-COPY requirements_runtime_demos.txt requirements_runtime_demos.txt
+# Symlinks for NVML
 RUN \
-    chmod a+rwx / && \
-    chmod -R a+rwx /root  && \
-    chmod ugo+s /root/ && \
-    mkdir -p /root/.cache/ && \
-    eval "$(/root/.pyenv/bin/pyenv init -)" && \
-    /root/.pyenv/bin/pyenv global 3.6.1 && \
-    pip install setuptools && \
-    pip install -r requirements_buildonly.txt && \
-    pip install -r requirements_runtime.txt && \
-    pip install -r requirements_runtime_demos.txt
-
-# Install R dependencies and h2o4gpu R package when appropriate
-COPY scripts/install_r.sh scripts/install_r.sh
-COPY scripts/test_r_pkg.sh scripts/test_r_pkg.sh
-COPY scripts/install_r_deps.sh scripts/install_r_deps.sh
+    mkdir -p /usr/lib64/nvidia/ && \
+    ln -s /usr/local/cuda-`nvcc --version | tail -n 1 | cut -f 5 -d' ' | cut -f 1 -d ','`/targets/`arch`-linux/lib/stubs/libnvidia-ml.so /usr/lib64/nvidia/libnvidia-ml.so
+
+#
+# Builds from source due to too old versions in yum
+#
+WORKDIR $HOME
+
+# SWIG
 RUN \
-    apt-get update -y && \
-    apt-get -y install libcurl4-openssl-dev libssl-dev libxml2-dev && \
-    scripts/install_r_deps.sh
+    wget https://sourceforge.net/projects/swig/files/swig/swig-${SWIG_VERSION}/swig-${SWIG_VERSION}.tar.gz && \
+    tar -zxvf swig-${SWIG_VERSION}.tar.gz && \
+    cd swig-${SWIG_VERSION} && \
+    ./configure --prefix=/usr && \
+    make -j $(nproc) && \
+    make install && \
+    cd $HOME && \
+    rm -rf swig-3*
+
+# TODO Install DAAL
+
+#
+# PPC64 specific - certain libs/whl don't support PPC64LE
+#
+
+# Arrow
+RUN bash -c 'if [ `arch` = "ppc64le" ]; then \
+	git clone https://github.com/apache/arrow.git && \
+	cd $HOME/arrow/cpp && \
+	git checkout tags/apache-arrow-0.8.0 && \
+    yum install -y boost-devel && \
+  	pip install numpy cython && \
+	cmake -DARROW_CXXFLAGS="-lutil" -DARROW_PYTHON=on && make -j && make install && \
+	cd $HOME/arrow/python && \
+	ARROW_HOME=/usr/local python setup.py install && \
+	yum install -y libjpeg-devel;\
+	fi'
+
+# Pillow
+RUN bash -c 'if [ `arch` = "ppc64le" ]; then \
+	wget https://files.pythonhosted.org/packages/55/aa/f7f983fb72710a9daa4b3374b7c160091d3f94f5c09221f9336ade9027f3/Pillow-${PILLOW_VERSION}.tar.gz && \
+	tar xvf Pillow-${PILLOW_VERSION}.tar.gz && \
+	cd $HOME/Pillow-${PILLOW_VERSION} && \
+	sed -i "s/'ppc64'/'ppc64le'/g" setup.py && \
+	python setup.py install && \
+	cd $HOME && \
+	rm -rf Pillow-${PILLOW_VERSION}*; \
+	fi'
+
+
+#
+# Install Python requirements
+#
+RUN pip install numpy setuptools
+
+COPY src/interface_py/requirements_buildonly.txt requirements_buildonly.txt
+COPY src/interface_py/requirements_runtime.txt requirements_runtime.txt
+COPY src/interface_py/requirements_runtime_demos.txt requirements_runtime_demos.txt
+
+RUN pip install -r requirements_buildonly.txt
+RUN pip install -r requirements_runtime.txt
+RUN pip install -r requirements_runtime_demos.txt
+
+RUN chmod -R o+rwx /opt/h2oai/h2o4gpu/python
+RUN chmod -R o+rwx /root
+
+WORKDIR $HOME
+
+ENV GIT_AUTHOR_NAME="anonymous"
+ENV GIT_AUTHOR_EMAIL="anonymous@h2o.ai"
+ENV GIT_COMMITTER_NAME="anonymous"
+ENV GIT_COMMITTER_EMAIL="anonymous@h2o.ai"
+ENV EMAIL="anonymous@h2o.ai"
\ No newline at end of file
diff --git a/Dockerfile-build-centos7.in b/Dockerfile-build-centos7.in
deleted file mode 100644
index f3ffcbea1..000000000
--- a/Dockerfile-build-centos7.in
+++ /dev/null
@@ -1,138 +0,0 @@
-FROM FROM_SUBST 
- 
-MAINTAINER H2o.ai <ops@h2o.ai>
-
-ENV CUDA_HOME=/usr/local/cuda
-ENV PATH=/usr/local/cuda/bin:$PATH
-ENV LD_LIBRARY_PATH_MORE=/usr/lib/gcc/ppc64le-redhat-linux/4.8.2/:/home/$USER/lib/:$CUDA_HOME/lib64/:$CUDA_HOME/lib/:/usr/local/cuda/lib64:/usr/local/cuda/extras/CUPTI/lib64:/usr/local/nvidia/lib:/usr/local/nvidia/lib64
-ENV LD_LIBRARY_PATH=/lib64:$LD_LIBRARY_PATH:$LD_LIBRARY_PATH_MORE
-ENV CUDADIR=/usr/local/cuda/include/
-ENV OMP_NUM_THREADS=32
-ENV MKL_NUM_THREADS=32
-ENV HOME=/root
-ENV VECLIB_MAXIMUM_THREADS=32
-RUN \
-    yum groupinstall -y "Development Tools"
-
-RUN \
-    yum install -y \
-        ncurses-devel \
-	zlib-devel \
-	wget \
-	bzip2 \
-	openssl-devel \
-	libcurl-devel && \
-    wget https://repo.continuum.io/miniconda/Miniconda3-4.3.27-Linux-ARCH_SUBST.sh && \
-    bash Miniconda3-4.3.27-Linux-ARCH_SUBST.sh -b -p /opt/h2oai/dai/python && \
-    wget https://s3.amazonaws.com/artifacts.h2o.ai/releases/ai/h2o/dai-thirdparty-deps-llvm/1.0-master-21/ARCH_SUBST-centos7/llvm.tar.bz2 && \
-    tar xvf llvm.tar.bz2 && \
-    cp -r llvm/* /opt/h2oai/dai/ && \
-    rm -rf llvm*
-
-ENV LLVM4=/opt/h2oai/dai
-ENV PATH=/opt/h2oai/dai/python/bin:$PATH
-ENV PATH=/usr/local/bin:$PATH
-ENV PATH=$LLVM4/bin:$PATH
-ENV LD_LIBRARY_PATH=$LLVM4/lib
-COPY scripts/gcc_wrapper.sh /opt/h2oai/gcc_wrapper/gcc
-COPY scripts/g++_wrapper.sh /opt/h2oai/gcc_wrapper/g++
-COPY scripts/gcc_wrapper.sh /opt/h2oai/gcc_wrapper/ARCH_SUBST-conda_cos6-linux-gnu-gcc
-ENV PATH=/opt/h2oai/gcc_wrapper:$PATH
-RUN ln /usr/bin/ar $LLVM4/bin/ARCH_SUBST-conda_cos6-linux-gnu-ar
-
-RUN yum install -y atlas-devel blas-devel && \
-  ln /usr/lib64/libgfortran.so.3 /usr/lib64/libgfortran.so && \
-  wget http://github.com/xianyi/OpenBLAS/archive/v0.2.20.tar.gz && \
-  tar xvf v0.2.20.tar.gz && \
-  rm v0.2.20.tar.gz && \
-  cd OpenBLAS-0.2.20 && make CBLAS_ONLY=1 && make PREFIX=/usr/local install
-
-ENV OPENBLAS_PREFIX=open
-RUN yum install -y libstdc++ libc libgomp
-ENV LD_LIBRARY_PATH=/usr/local/lib:$LD_LIBRARY_PATH
-COPY scripts/g++_wrapper.sh /opt/h2oai/gcc_wrapper/ARCH_SUBST-conda_cos6-linux-gnu-c++
-RUN yum install -y which
-WORKDIR $HOME
-COPY requirements_buildonly.txt requirements_buildonly.txt
-COPY requirements_runtime.txt requirements_runtime.txt
-COPY requirements_runtime_demos.txt requirements_runtime_demos.txt
-
-RUN yum install -y \
-  libpng-devel \
-  freetype-devel
-
-ENV PATH=/usr/local/bin:$PATH
-RUN \
-    wget https://cmake.org/files/v3.10/cmake-3.10.1.tar.gz && \
-    tar xvf cmake-3.10.1.tar.gz && \
-    cd $HOME/cmake-3.10.1 && \
-    ./configure && \
-    make -j6 install && \
-    cd $HOME && \
-    rm -rf cmake-3*
-
-# Install SWIG b/c yum has old versions
-RUN \
-    wget https://sourceforge.net/projects/swig/files/swig/swig-3.0.12/swig-3.0.12.tar.gz && \
-    tar -zxvf swig-3.0.12.tar.gz && \
-    cd swig-3.0.12 && \
-    ./configure --prefix=/usr && \
-    make -j 4 && \
-    make install && \
-    cd $HOME && \
-    rm -rf swig-3*
-
-# Symlinks for CMake/SWIG - it does not recognize Miniconda paths otherwise
-RUN \
-    mkdir -p /usr/lib64/ && \
-    ln -s /opt/h2oai/dai/python/lib/*python* /usr/lib64/ && \
-    mkdir -p /usr/include/python3.6m && \
-    ln -s /opt/h2oai/dai/python/include/python3.6m/* /usr/include/python3.6m
-
-RUN bash -c 'if [ `arch` = "ppc64le" ]; then \
-	git clone https://github.com/apache/arrow.git && \
-	cd $HOME/arrow/cpp && \
-	git checkout tags/apache-arrow-0.8.0 && \
-	yum install -y boost-devel && \
-  	pip install numpy cython && \
-	cmake -DARROW_CXXFLAGS="-lutil" -DARROW_PYTHON=on && make -j && make install && \
-	cd $HOME/arrow/python && \
-	ARROW_HOME=/usr/local python setup.py install && \
-	yum install -y libjpeg-devel; \
-	fi'
-
-ENV PILLOW_VERSION=4.2.1
-RUN bash -c 'if [ `arch` = "ppc64le" ]; then \
-	wget https://files.pythonhosted.org/packages/55/aa/f7f983fb72710a9daa4b3374b7c160091d3f94f5c09221f9336ade9027f3/Pillow-${PILLOW_VERSION}.tar.gz && \
-	tar xvf Pillow-${PILLOW_VERSION}.tar.gz && \
-	cd $HOME/Pillow-${PILLOW_VERSION} && \
-	sed -i "s/'ppc64'/'ppc64le'/g" setup.py && \
-	python3.6 setup.py install && \
-	cd $HOME && \
-	rm -rf Pillow-${PILLOW_VERSION}*; \
-	fi'
-
-# Symlinks for NVML
-RUN \
-    mkdir -p /usr/lib64/nvidia/ && \
-    ln -s /usr/local/cuda-MY_CUDA_VERSION_SUBST/targets/ARCH_SUBST-linux/lib/stubs/libnvidia-ml.so /usr/lib64/nvidia/libnvidia-ml.so
-
-RUN pip install numpy
-RUN pip install setuptools llvmlite==0.20.0 scikit-build scipy
-RUN sed -i 's/cmake/# cmake/' requirements_buildonly.txt
-RUN pip install -r requirements_buildonly.txt
-RUN pip install -r requirements_runtime.txt
-RUN pip install -r requirements_runtime_demos.txt
-
-RUN yum install -y which
-
-RUN chmod -R o+rwx /opt/h2oai/dai/python
-RUN chmod -R o+rwx /root
-
-WORKDIR $HOME
-
-ENV GIT_AUTHOR_NAME="anonymous"
-ENV GIT_AUTHOR_EMAIL="anonymous@h2o.ai"
-ENV GIT_COMMITTER_NAME="anonymous"
-ENV GIT_COMMITTER_EMAIL="anonymous@h2o.ai"
-ENV EMAIL="anonymous@h2o.ai"
diff --git a/Dockerfile-runtime b/Dockerfile-runtime
index 8f73f8966..7363b9607 100644
--- a/Dockerfile-runtime
+++ b/Dockerfile-runtime
@@ -1,76 +1,74 @@
-#How to run:
-#To build: docker build -t opsh2o4gpu/h2o4gpu-runtime -f Dockerfile-runtime .
-#To run: nvidia-docker run -p 8888:8888 -v /some/local/log:/log opsh2o4gpu/h2o4gpu-runtime &
-# or without nvidia: docker run -p 8888:8888 -v /some/local/log:/log opsh2o4gpu/h2o4gpu-runtime &
-
-ARG layer
-ARG version
-
-FROM $layer:$version
-# e.g. FROM ubuntu:latest
-
-ARG wheel
-ENV wheel=${wheel}
-ARG buckettype
-ENV buckettype=${buckettype}
+ARG docker_name
 
+FROM $docker_name
 
 MAINTAINER H2o.ai <ops@h2o.ai>
 
-ENV DEBIAN_FRONTEND noninteractive
-
-RUN if $layer -ne "ubuntu"; then export CUDA_HOME=/usr/local/cuda && \
-	export PATH=/usr/local/cuda/bin:$PATH && \
-	export LD_LIBRARY_PATH_MORE=/home/$USER/lib/:$CUDA_HOME/lib64/:$CUDA_HOME/lib/:/usr/local/cuda/lib64:/usr/local/cuda/extras/CUPTI/lib64:/usr/local/nvidia/lib:/usr/local/nvidia/lib64 && \
-	export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:$LD_LIBRARY_PATH_MORE && \
-	export CUDADIR=/usr/local/cuda/include/ && \
-	export OMP_NUM_THREADS=32 && \
-	export MKL_NUM_THREADS=32 && \
-	export VECLIB_MAXIMUM_THREADS=32; fi
-
-# add-apt-repository ppa:fkrull/deadsnakes  && \
-
-RUN \
-  # Setup Repos
-  apt-get update -y && \
-  apt-get -y install curl apt-utils python-software-properties \
-  software-properties-common iputils-ping wget cpio net-tools build-essential \
-  git zip dirmngr && \
-  apt-get -y --no-install-recommends  install \
-      python3-dateutil \
-      python3-magic && \
-  wget http://launchpadlibrarian.net/326935544/s3cmd_2.0.0-1_all.deb && \
-  dpkg -i s3cmd_2.0.0-1_all.deb && \
-  add-apt-repository ppa:jonathonf/python-3.6 && \
-  apt-get update -yqq && \
-  curl -sL https://deb.nodesource.com/setup_7.x | bash - && \
-  # Install H2o dependencies
-  apt-get -y --no-install-recommends  install \
-    python3.6 \
-    python3.6-dev \
-    virtualenv \
-    python3-pip && \
-  update-alternatives --install /usr/bin/python python /usr/bin/python3.6 100 && \
-  python -m pip install --upgrade pip && \
-  apt-get clean && \
-  rm -rf /var/cache/apt/* && \
-  apt-get install -y libopenblas-dev pbzip2
-
-RUN \
-  mkdir h2o4gpu_env && \
-  virtualenv --python=/usr/bin/python3.6 h2o4gpu_env && \
-  chmod -R o+w h2o4gpu_env && \
-  . h2o4gpu_env/bin/activate && \
-  pip install --no-cache-dir --upgrade pip && \
-  pip install --no-cache-dir --upgrade setuptools && \
-  pip install --no-cache-dir --upgrade numpy && \
-  pip install --no-cache-dir --upgrade jupyter
+ENV HOME=/root
+ENV CUDA_HOME=/usr/local/cuda
+ENV PATH=/usr/local/cuda/bin:$PATH
+ENV CUDADIR=/usr/local/cuda/include/
+ENV LD_LIBRARY_PATH=/usr/lib64:/usr/local/lib:$LD_LIBRARY_PATH
+
+ENV MINICONDA_VERSION=4.4.10
+
+# Setup gcc etc.
+RUN yum install -y epel-release
+
+RUN yum install -y gcc gcc-c++ libgcc libstdc++ libgomp glibc
+
+RUN yum install -y \
+    make \
+    ncurses-devel \
+	zlib-devel \
+	wget \
+	blas-devel \
+    openblas-devel \
+    libpng-devel \
+    freetype-devel \
+	bzip2 && \
+    wget https://repo.continuum.io/miniconda/Miniconda3-${MINICONDA_VERSION}-Linux-`arch`.sh && \
+    bash Miniconda3-${MINICONDA_VERSION}-Linux-`arch`.sh -b -p /opt/h2oai/h2o4gpu/python
+ENV PATH=/opt/h2oai/h2o4gpu/python/bin:$PATH
+
+#
+# PPC64 specific - certain libs/whl don't support PPC64LE
+#
+
+WORKDIR $HOME
+
+# Arrow
+RUN bash -c 'if [ `arch` = "ppc64le" ]; then \
+	yum install -y git boost-devel cmake3 && \
+	ln -s /usr/bin/cmake3 /usr/bin/cmake && \
+	git clone https://github.com/apache/arrow.git && \
+	cd $HOME/arrow/cpp && \
+	git checkout tags/apache-arrow-0.8.0 && \
+  	pip install numpy cython && \
+	cmake -DARROW_CXXFLAGS="-lutil" -DARROW_PYTHON=on && make -j && make install && \
+	cd $HOME/arrow/python && \
+	ARROW_HOME=/usr/local python setup.py install && \
+	yum install -y libjpeg-devel; \
+	fi'
+
+# Pillow
+ENV PILLOW_VERSION=4.2.1
+RUN bash -c 'if [ `arch` = "ppc64le" ]; then \
+	wget https://files.pythonhosted.org/packages/55/aa/f7f983fb72710a9daa4b3374b7c160091d3f94f5c09221f9336ade9027f3/Pillow-${PILLOW_VERSION}.tar.gz && \
+	tar xvf Pillow-${PILLOW_VERSION}.tar.gz && \
+	cd $HOME/Pillow-${PILLOW_VERSION} && \
+	sed -i "s/'ppc64'/'ppc64le'/g" setup.py && \
+	python setup.py install && \
+	cd $HOME && \
+	rm -rf Pillow-${PILLOW_VERSION}*; \
+	fi'
+
+WORKDIR /
 
 # Add requirements
-COPY requirements_runtime.txt requirements.txt
-COPY requirements_runtime_demos.txt requirements_runtime_demos.txt
+COPY src/interface_py/requirements_runtime.txt requirements.txt
+COPY src/interface_py/requirements_runtime_demos.txt requirements_runtime_demos.txt
 RUN \
-  . h2o4gpu_env/bin/activate && \
   chmod a+rwx requirements*.txt && \
   pip install --no-cache-dir -r requirements.txt && \
   pip install --no-cache-dir -r requirements_runtime_demos.txt
@@ -91,7 +89,6 @@ COPY examples/py/demos/H2O4GPU_PCA.ipynb /jupyter/demos/H2O4GPU_PCA.ipynb
 COPY examples/py/demos/H2O4GPU_Daal_LinearRegression.ipynb /jupyter/demos/H2O4GPU_Daal_LinearRegression.ipynb
 COPY examples/py/demos/figures /jupyter/demos/figures
 RUN \
-  . h2o4gpu_env/bin/activate && \
   cd /jupyter/demos && \
   chmod -R a+rwx /jupyter && \
   mkdir /scikit_learn_data && \
@@ -103,14 +100,12 @@ RUN \
   HOME=/jupyter jupyter notebook --generate-config && \
   sed -i "s/#c.NotebookApp.token = '<generated>'/c.NotebookApp.token = 'h2o'/" /jupyter/.jupyter/jupyter_notebook_config.py && \
   chmod -R a+rwx /jupyter/.jupyter
-# Add shell wrapper
-COPY run.sh /run.sh
 
+# Add shell wrapper
+COPY scripts/run.sh /run.sh
 RUN \
-  . h2o4gpu_env/bin/activate && \
   chmod a+rwx run.sh
 
-
 ARG h2o4gpu_VERSION
 ARG h2o4gpu_COMMIT
 ARG DOCKER_VERSION_TAG
@@ -118,5 +113,5 @@ LABEL \
 h2o4gpu_commit="$h2o4gpu_COMMIT" \
 docker_version_tag="$DOCKER_VERSION_TAG"
 
-ENTRYPOINT ["./run.sh"]  
+ENTRYPOINT ["./run.sh"]
 EXPOSE 8888
diff --git a/EXAMPLE_SOLVER.md b/EXAMPLE_SOLVER.md
index 276ab17d9..7af543ef4 100644
--- a/EXAMPLE_SOLVER.md
+++ b/EXAMPLE_SOLVER.md
@@ -356,4 +356,4 @@ Add Java wrapper files - *coming soon*.
 
 ### Tests
 
-Add tests! Currently adding Python tests in `tests_open` is the easiest way. C/C++/CUDA tests coming soon.
\ No newline at end of file
+Add tests! Currently adding Python tests in `tests/python/open_data` is the easiest way. C/C++/CUDA tests coming soon.
\ No newline at end of file
diff --git a/Jenkinsfile-cpu.base b/Jenkinsfile-cpu.base
deleted file mode 100644
index 20ee763dc..000000000
--- a/Jenkinsfile-cpu.base
+++ /dev/null
@@ -1,23 +0,0 @@
-// TOOD: rename to @Library('h2o-jenkins-pipeline-lib')
-@Library('test-shared-library') _
-
-import ai.h2o.ci.Utils
-import static ai.h2o.ci.Utils.banner
-def utilsLib = new Utils()
-import ai.h2o.ci.BuildInfo
-
-def commitMessage = ''
-def h2o4gpuUtils = null
-
-def dist = "dist8"
-def BUILDTYPE = "cpu"
-def cuda = "ubuntu:16.04"
-def extratag = "-cpu"
-def linuxwheel = "linux_whl2"
-def testtype = "dotestfast_nonccl"
-def labelbuild = "docker && linux"
-def labeltest = "docker"
-def labelruntime = "docker"
-def doingbenchmark = "0"
-def dobenchmark = "1"
-def doruntime = "1"
diff --git a/Jenkinsfile-dai b/Jenkinsfile-dai
deleted file mode 100644
index 37cb8c728..000000000
--- a/Jenkinsfile-dai
+++ /dev/null
@@ -1,120 +0,0 @@
-#!/usr/bin/groovy
-//------------------------------------------------------------------------------
-//  This Source Code Form is subject to the terms of the Mozilla Public
-//  License, v. 2.0. If a copy of the MPL was not distributed with this
-//  file, You can obtain one at http://mozilla.org/MPL/2.0/.
-//------------------------------------------------------------------------------
-@Library('test-shared-library') _
-
-import ai.h2o.ci.Utils
-def utilsLib = new Utils()
-
-pipeline {
-    agent none
-
-    // Setup job options
-    options {
-        ansiColor('xterm')
-        timestamps()
-        timeout(time: 120, unit: 'MINUTES')
-        buildDiscarder(logRotator(daysToKeepStr: '30'))
-    }
-
-    stages {
-        stage('Build') {
-            parallel {
-                stage('Build on x86_64-centos7-cuda8.0') {
-                    agent {
-                        label "linux && docker && !micro"
-                    }
-                    steps {
-                        dumpInfo 'x86_64-centos7-cuda8 Build Info'
-                        script {
-                            sh """
-                                make mrproper_in_docker
-                                make BRANCH_NAME=${env.BRANCH_NAME} BUILD_NUM=${env.BUILD_ID} centos7_cuda8_in_docker
-                            """
-                        }
-                        stash includes: 'dist/**/*', name: 'x86_64-centos7-cuda8.0'
-                    }
-                }
-                stage('Build on x86_64-centos7-cuda9.0') {
-                    agent {
-                        label "linux && docker && !micro"
-                    }
-                    steps {
-                        dumpInfo 'x86_64-centos7-cuda9 Build Info'
-                        script {
-                            sh """
-                                make mrproper_in_docker
-                                make BRANCH_NAME=${env.BRANCH_NAME} BUILD_NUM=${env.BUILD_ID} centos7_cuda9_in_docker
-                            """
-                        }
-                        stash includes: 'dist/**/*', name: 'x86_64-centos7-cuda9.0'
-                    }
-                }
-                stage('Build on ppc64le-centos7-cuda8.0') {
-                    agent {
-                        label "ibm-power"
-                    }
-                    steps {
-                        dumpInfo 'ppc64le-centos7-cuda8 Build Info'
-                        script {
-                            sh """
-                                make mrproper_in_docker
-                                make BRANCH_NAME=${env.BRANCH_NAME} BUILD_NUM=${env.BUILD_ID} centos7_cuda8_in_docker
-                            """
-                        }
-                        stash includes: 'dist/**/*', name: 'ppc64le-centos7-cuda8.0'
-                    }
-                }
-                stage('Build on ppc64le-centos7-cuda9.0') {
-                    agent {
-                        label "ibm-power"
-                    }
-                    steps {
-                        dumpInfo 'ppc64le-centos7-cuda9 Build Info'
-                        script {
-                            sh """
-                                make mrproper_in_docker
-                                make BRANCH_NAME=${env.BRANCH_NAME} BUILD_NUM=${env.BUILD_ID} centos7_cuda9_in_docker
-                            """
-                        }
-                        stash includes: 'dist/**/*', name: 'ppc64le-centos7-cuda9.0'
-                    }
-                }
-            }
-        }
-
-        stage('Publish centos7 snapshot to S3') {
-            when {
-                branch 'master'
-            }
-            agent {
-                label "linux && docker && !micro"
-            }
-            steps {
-                sh "rm -rf dist"
-                unstash 'x86_64-centos7-cuda8.0'
-                unstash 'x86_64-centos7-cuda9.0'
-                unstash 'ppc64le-centos7-cuda8.0'
-                unstash 'ppc64le-centos7-cuda9.0'
-                sh 'echo "Stashed files:" && find dist'
-                script {
-                    docker.withRegistry("https://docker.h2o.ai", "docker.h2o.ai") {
-                        docker.image('s3cmd').inside {
-                            def versionText = utilsLib.getCommandOutput("cat dist/x86_64-centos7-cuda8.0/VERSION.txt")
-                            s3up {
-                                localArtifact = 'dist/*'
-                                artifactId = "h2o4gpu"
-                                version = versionText
-                                keepPrivate = false
-                            }
-                        }
-                    }
-                }
-            }
-        }
-    }
-}
-
diff --git a/Jenkinsfile-nccl-cuda8.base b/Jenkinsfile-nccl-cuda8.base
deleted file mode 100644
index 876992844..000000000
--- a/Jenkinsfile-nccl-cuda8.base
+++ /dev/null
@@ -1,25 +0,0 @@
-// TOOD: rename to @Library('h2o-jenkins-pipeline-lib') _
-@Library('test-shared-library') _
-
-import ai.h2o.ci.Utils
-import static ai.h2o.ci.Utils.banner
-def utilsLib = new Utils()
-import ai.h2o.ci.BuildInfo
-
-def commitMessage = ''
-def h2o4gpuUtils = null
-
-def dist = "dist1"
-def BUILDTYPE = "nccl-cuda8"
-def cuda = "nvidia/cuda:8.0-cudnn5-devel-ubuntu16.04"
-def cudart = "nvidia/cuda:8.0-cudnn5-runtime-ubuntu16.04"
-def extratag = "-nccl-cuda8"
-def linuxwheel = "linux_whl1"
-def testtype = "dotestfast"
-def labelbuild = "nvidia-docker && (mr-dl11 || mr-dl16 || mr-dl10)"
-def labeltest = "gpu && nvidia-docker && (mr-dl11 || mr-dl16 || mr-dl10)"
-def labelruntime = "nvidia-docker"
-def doingbenchmark = "0"
-def dobenchmark = "0"
-def doruntime = "1"
-
diff --git a/Jenkinsfile-nccl-cuda9-aws1-benchmark b/Jenkinsfile-nccl-cuda9-aws1-benchmark
deleted file mode 100644
index ef847b64d..000000000
--- a/Jenkinsfile-nccl-cuda9-aws1-benchmark
+++ /dev/null
@@ -1,280 +0,0 @@
-#!/usr/bin/groovy
-
-//################ FILE IS AUTO-GENERATED from .base files
-//################ DO NOT MODIFY
-//################ See scripts/make_jenkinsfiles.sh
-
-// TOOD: rename to @Library('h2o-jenkins-pipeline-lib') _
-@Library('test-shared-library') _
-
-import ai.h2o.ci.Utils
-import static ai.h2o.ci.Utils.banner
-def utilsLib = new Utils()
-import ai.h2o.ci.BuildInfo
-
-def commitMessage = ''
-def h2o4gpuUtils = null
-
-def dist = "dist7"
-def BUILDTYPE = "nccl-cuda9-aws1-benchmark"
-def cuda = "nvidia/cuda:9.0-cudnn7-devel-ubuntu16.04"
-def cudart = "nvidia/cuda:9.0-cudnn7-runtime-ubuntu16.04"
-def extratag = "-nccl-cuda9-aws1-benchmark"
-def linuxwheel = "linux_whl7"
-def testtype = "dotestperf"
-def labelbuild = "ec2P32xlarge"
-def labeltest = "ec2P32xlarge"
-def labelruntime = "ec2P32xlarge"
-def doingbenchmark = "1"
-def dobenchmark = "0"
-def doruntime = "0"
-
-//################ BELOW IS COPY/PASTE of Jenkinsfile.utils2 (except stage names)
-// Just Notes:
-//
-//def jobnums       = [0 , 1 , 2  , 3]
-//def tags          = ["nccl" , "nonccl" , "nccl"  , "nonccl"]
-//def cudatags      = ["cuda8", "cuda8"  , "cuda9" , "cuda9"]
-//def dobuilds      = [1, 0, 0, 0]
-//def dofulltests   = [1, 0, 0, 0]
-//def dopytests     = [1, 0, 0, 0]
-//def doruntimes    = [1, 1, 1, 1]
-//def dockerimagesbuild    = ["nvidia/cuda:8.0-cudnn5-devel-ubuntu16.04", "nvidia/cuda:8.0-cudnn5-devel-ubuntu16.04", "nvidia/cuda:9.0-cudnn7-devel-ubuntu16.04", "nvidia/cuda:9.0-cudnn7-devel-ubuntu16.04"]
-//def dockerimagesruntime  = ["nvidia/cuda:8.0-cudnn5-runtime-ubuntu16.04", "nvidia/cuda:8.0-cudnn5-runtime-ubuntu16.04", "nvidia/cuda:9.0-cudnn7-runtime-ubuntu16.04", "nvidia/cuda:9.0-cudnn7-runtime-ubuntu16.04"]
-//def dists         = ["dist1","dist2","dist3","dist4"]
-
-def benchmark_commit_trigger
-
-pipeline {
-    agent none
-
-    // Setup job options
-    options {
-        ansiColor('xterm')
-        timestamps()
-        timeout(time: 300, unit: 'MINUTES')
-        buildDiscarder(logRotator(numToKeepStr: '10'))
-        disableConcurrentBuilds()
-        skipDefaultCheckout()
-    }
-
-    environment {
-        MAKE_OPTS = "-s CI=1" // -s: silent mode
-        BUILD_TYPE = "${BUILDTYPE}"
-    }
-
-    stages {
-        /////////////////////////////////////////////////////////////////////
-        //
-        //
-        //  Avoid mr-dl8 and mr-dl10 for build for now due to permission denied issue
-        /////////////////////////////////////////////////////////////////////
-        stage("Git clone on Linux nccl-cuda9-aws1-benchmark") {
-
-            agent {
-                label "${labelbuild}"
-            }
-            steps {
-                dumpInfo 'Linux Build Info'
-                // Do checkout
-                retryWithTimeout(200 /* seconds */, 3 /* retries */) {
-                    deleteDir()
-                    checkout([
-                            $class                           : 'GitSCM',
-                            branches                         : scm.branches,
-                            doGenerateSubmoduleConfigurations: false,
-                            extensions                       : scm.extensions + [[$class: 'SubmoduleOption', disableSubmodules: true, recursiveSubmodules: false, reference: '', trackingSubmodules: false, shallow: true]],
-                            submoduleCfg                     : [],
-                            userRemoteConfigs                : scm.userRemoteConfigs])
-                }
-                script {
-                    h2o4gpuUtils = load "Jenkinsfile.utils"
-                    buildInfo("h2o4gpu", h2o4gpuUtils.isRelease())
-                    commitMessage = sh(script: 'git log -1 --pretty=%B | tr "\n" " "', returnStdout: true).trim()
-                    echo "Commit Message: ${commitMessage}"
-                    benchmark_commit_trigger = ("${commitMessage}" ==~ /.*trigger_benchmark.*/)
-                    echo "benchmark_commit_trigger: ${benchmark_commit_trigger}"
-                }
-                stash includes: "Jenkinsfile*", name: "jenkinsfiles"
-            }
-        }
-        stage("Build Wheel on Linux nccl-cuda9-aws1-benchmark") {
-
-            agent {
-                label "${labelbuild}"
-            }
-            when {
-                expression {
-                    unstash "jenkinsfiles"
-                    h2o4gpuUtils = load "Jenkinsfile.utils"
-                    return "${doingbenchmark}" == "1" || h2o4gpuUtils.doBuild() || h2o4gpuUtils.doTests()
-                }
-            }
-            steps {
-                // Do checkout
-                retryWithTimeout(200 /* seconds */, 3 /* retries */) {
-                    deleteDir()
-                    checkout([
-                            $class                           : 'GitSCM',
-                            branches                         : scm.branches,
-                            doGenerateSubmoduleConfigurations: false,
-                            extensions                       : scm.extensions + [[$class: 'SubmoduleOption', disableSubmodules: true, recursiveSubmodules: false, reference: '', trackingSubmodules: false, shallow: true]],
-                            submoduleCfg                     : [],
-                            userRemoteConfigs                : scm.userRemoteConfigs])
-                }
-                script {
-                    h2o4gpuUtils = load "Jenkinsfile.utils"
-                    h2o4gpuUtils.buildOnLinux("${cuda}", "${extratag}", "${dist}", "${linuxwheel}")
-
-                    buildInfo("h2o4gpu", h2o4gpuUtils.isRelease())
-
-                    script {
-                        // Load the version file content
-                        buildInfo.get().setVersion(utilsLib.getCommandOutput("cat build/VERSION.txt"))
-                        utilsLib.setCurrentBuildName(buildInfo.get().getVersion())
-                        utilsLib.appendBuildDescription("""|Authors: ${buildInfo.get().getAuthorNames().join(" ")}
-                                |Git SHA: ${buildInfo.get().getGitSha().substring(0, 8)}
-                                |""".stripMargin("|"))
-                    }
-
-                }
-            }
-        }
-
-        stage("Test Wheel & Pylint & S3up on Linux nccl-cuda9-aws1-benchmark") {
-            agent {
-                label "${labeltest}"
-            }
-            when {
-                expression {
-                    unstash "jenkinsfiles"
-                    h2o4gpuUtils = load "Jenkinsfile.utils"
-                    return  "${doingbenchmark}" == "1" || h2o4gpuUtils.doTests() && (h2o4gpuUtils.rerun_disabled(commitMessage) || !h2o4gpuUtils.wasStageSuccessful("Fast Test Wheel & Pylint & S3up on Linux"))
-                }
-            }
-            steps {
-                dumpInfo 'Linux Test Info'
-                // Get source code (should put tests into wheel, then wouldn't have to checkout)
-                retryWithTimeout(200 /* seconds */, 3 /* retries */) {
-                    checkout scm
-                }
-                script {
-                    unstash 'version_info'
-                    sh """
-                        echo "Before Stashed wheel file:"
-                        ls -l src/interface_py/${dist}/  || true
-                        rm -rf src/interface_py/${dist}/ || true
-                       """
-                    unstash "${linuxwheel}"
-                    sh """
-                        echo "After Stashed wheel file:"
-                        ls -l src/interface_py/${dist}/  || true
-                       """
-                    h2o4gpuUtils.runTests(buildInfo.get(), "${cuda}", "${extratag}", "${dist}", "${testtype}")
-                }
-                retryWithTimeout(500 /* seconds */, 5 /* retries */) {
-                    withCredentials([[$class: 'AmazonWebServicesCredentialsBinding', credentialsId: "awsArtifactsUploader"]]) {
-                        script {
-                            h2o4gpuUtils.publishToS3(buildInfo.get(), "${extratag}" , "${dist}")
-                        }
-                    }
-                }
-            }
-        }
-        stage("Build/Publish Runtime Docker Linux nccl-cuda9-aws1-benchmark") {
-            agent {
-                label "${labelruntime}"
-            }
-            when {
-                expression {
-                    unstash "jenkinsfiles"
-                    h2o4gpuUtils = load "Jenkinsfile.utils"
-                    return "${doruntime}" == "1" && h2o4gpuUtils.doRuntime()
-                }
-            }
-            steps {
-                dumpInfo 'Linux Build Info'
-                // Do checkout
-                retryWithTimeout(200 /* seconds */, 3 /* retries */) {
-                    deleteDir()
-                    checkout([
-                            $class                           : 'GitSCM',
-                            branches                         : scm.branches,
-                            doGenerateSubmoduleConfigurations: false,
-                            extensions                       : scm.extensions + [[$class: 'SubmoduleOption', disableSubmodules: true, recursiveSubmodules: false, reference: '', trackingSubmodules: false, shallow: true]],
-                            submoduleCfg                     : [],
-                            userRemoteConfigs                : scm.userRemoteConfigs])
-                }
-                script {
-                    sh """
-                        echo "Before Stashed wheel file:"
-                        ls -l src/interface_py/${dist}/  || true
-                        rm -rf src/interface_py/${dist}/ || true
-                       """
-                    unstash "${linuxwheel}"
-                    sh """
-                        echo "After Stashed wheel file:"
-                        ls -l src/interface_py/${dist}/  || true
-                       """
-                    unstash 'version_info'
-                    sh 'echo "Stashed version file:" && ls -l build/'
-                }
-                script {
-                    h2o4gpuUtils.buildRuntime(buildInfo.get(), "${cudart}", "${dist}", "${extratag}")
-                }
-                retryWithTimeout(1000 /* seconds */, 5 /* retries */) {
-                withCredentials([[$class: 'AmazonWebServicesCredentialsBinding', credentialsId: "awsArtifactsUploader"]]) {
-                    script {
-                        h2o4gpuUtils.publishRuntimeToS3(buildInfo.get(), "${extratag}")
-                    }
-                }
-                }
-            }
-        }
-
-        stage("Benchmarking Linux nccl-cuda9-aws1-benchmark") {
-            agent {
-                label 'master'
-            }
-            when {
-                expression {
-                    unstash "jenkinsfiles"
-                    h2o4gpuUtils = load "Jenkinsfile.utils"
-                    echo "benchmark_commit_trigger: ${benchmark_commit_trigger}"
-                    return "${doingbenchmark}" == "1" || (("${benchmark_commit_trigger}"=="true" || h2o4gpuUtils.doTriggerBenchmarksJob()) && "${dobenchmark}" == "1" && env.BRANCH_NAME == "master")
-                }
-            }
-            steps {
-                script {
-                    utilsLib.appendBuildDescription("BENCH \u2713")
-                }
-
-                echo banner("Triggering downstream jobs h2o4gpu${extratag}-benchmark : RUNTIME_ID=${buildInfo.get().getVersion()}")
-                build job: "/h2o4gpu${extratag}-benchmark/${env.BRANCH_NAME}", parameters: [[$class: 'StringParameterValue', name: 'RUNTIME_ID', value: buildInfo.get().getVersion()]], propagate: false, wait: false, quietPeriod: 60
-            }
-        }
-
-    } // end over stages
-    post {
-        failure {
-            node('linux') {
-                script {
-                    if(env.BRANCH_NAME == "master") {
-                        emailext(
-                                to: "mateusz@h2o.ai, jmckinney@h2o.ai",
-                                subject: "BUILD FAILED: Job '${env.JOB_NAME} [${env.BUILD_NUMBER}]'",
-                                body: '''${JELLY_SCRIPT, template="html_gmail"}''',
-                                attachLog: true,
-                                compressLog: true,
-                                recipientProviders: [
-                                        [$class: 'DevelopersRecipientProvider'],
-                                ]
-                            )
-                    }
-                }
-            }
-        }
-    }
-}
-
-
diff --git a/Jenkinsfile-nccl-cuda9-aws1-benchmark.base b/Jenkinsfile-nccl-cuda9-aws1-benchmark.base
deleted file mode 100644
index 42e4ef13f..000000000
--- a/Jenkinsfile-nccl-cuda9-aws1-benchmark.base
+++ /dev/null
@@ -1,25 +0,0 @@
-// TOOD: rename to @Library('h2o-jenkins-pipeline-lib') _
-@Library('test-shared-library') _
-
-import ai.h2o.ci.Utils
-import static ai.h2o.ci.Utils.banner
-def utilsLib = new Utils()
-import ai.h2o.ci.BuildInfo
-
-def commitMessage = ''
-def h2o4gpuUtils = null
-
-def dist = "dist7"
-def BUILDTYPE = "nccl-cuda9-aws1-benchmark"
-def cuda = "nvidia/cuda:9.0-cudnn7-devel-ubuntu16.04"
-def cudart = "nvidia/cuda:9.0-cudnn7-runtime-ubuntu16.04"
-def extratag = "-nccl-cuda9-aws1-benchmark"
-def linuxwheel = "linux_whl7"
-def testtype = "dotestperf"
-def labelbuild = "ec2P32xlarge"
-def labeltest = "ec2P32xlarge"
-def labelruntime = "ec2P32xlarge"
-def doingbenchmark = "1"
-def dobenchmark = "0"
-def doruntime = "0"
-
diff --git a/Jenkinsfile-nccl-cuda9-aws1.base b/Jenkinsfile-nccl-cuda9-aws1.base
deleted file mode 100644
index d7f972359..000000000
--- a/Jenkinsfile-nccl-cuda9-aws1.base
+++ /dev/null
@@ -1,25 +0,0 @@
-// TOOD: rename to @Library('h2o-jenkins-pipeline-lib') _
-@Library('test-shared-library') _
-
-import ai.h2o.ci.Utils
-import static ai.h2o.ci.Utils.banner
-def utilsLib = new Utils()
-import ai.h2o.ci.BuildInfo
-
-def commitMessage = ''
-def h2o4gpuUtils = null
-
-def dist = "dist5"
-def BUILDTYPE = "nccl-cuda9-aws1"
-def cuda = "nvidia/cuda:9.0-cudnn7-devel-ubuntu16.04"
-def cudart = "nvidia/cuda:9.0-cudnn7-runtime-ubuntu16.04"
-def extratag = "-nccl-cuda9-aws1"
-def linuxwheel = "linux_whl5"
-def testtype = "dotest"
-def labelbuild = "ec2P32xlarge"
-def labeltest = "ec2P32xlarge"
-def labelruntime = "ec2P32xlarge"
-def doingbenchmark = "0"
-def dobenchmark = "1"
-def doruntime = "0"
-
diff --git a/Jenkinsfile-nccl-cuda9-benchmark b/Jenkinsfile-nccl-cuda9-benchmark
deleted file mode 100644
index 3ec99505f..000000000
--- a/Jenkinsfile-nccl-cuda9-benchmark
+++ /dev/null
@@ -1,280 +0,0 @@
-#!/usr/bin/groovy
-
-//################ FILE IS AUTO-GENERATED from .base files
-//################ DO NOT MODIFY
-//################ See scripts/make_jenkinsfiles.sh
-
-// TOOD: rename to @Library('h2o-jenkins-pipeline-lib') _
-@Library('test-shared-library') _
-
-import ai.h2o.ci.Utils
-import static ai.h2o.ci.Utils.banner
-def utilsLib = new Utils()
-import ai.h2o.ci.BuildInfo
-
-def commitMessage = ''
-def h2o4gpuUtils = null
-
-def dist = "dist6"
-def BUILDTYPE = "nccl-cuda9-benchmark"
-def cuda = "nvidia/cuda:9.0-cudnn7-devel-ubuntu16.04"
-def cudart = "nvidia/cuda:9.0-cudnn7-runtime-ubuntu16.04"
-def extratag = "-nccl-cuda9-benchmark"
-def linuxwheel = "linux_whl6"
-def testtype = "dotestperf"
-def labelbuild = "mr-dl3"
-def labeltest = "mr-dl3"
-def labelruntime = "mr-dl3"
-def doingbenchmark = "1"
-def dobenchmark = "0"
-def doruntime = "0"
-
-//################ BELOW IS COPY/PASTE of Jenkinsfile.utils2 (except stage names)
-// Just Notes:
-//
-//def jobnums       = [0 , 1 , 2  , 3]
-//def tags          = ["nccl" , "nonccl" , "nccl"  , "nonccl"]
-//def cudatags      = ["cuda8", "cuda8"  , "cuda9" , "cuda9"]
-//def dobuilds      = [1, 0, 0, 0]
-//def dofulltests   = [1, 0, 0, 0]
-//def dopytests     = [1, 0, 0, 0]
-//def doruntimes    = [1, 1, 1, 1]
-//def dockerimagesbuild    = ["nvidia/cuda:8.0-cudnn5-devel-ubuntu16.04", "nvidia/cuda:8.0-cudnn5-devel-ubuntu16.04", "nvidia/cuda:9.0-cudnn7-devel-ubuntu16.04", "nvidia/cuda:9.0-cudnn7-devel-ubuntu16.04"]
-//def dockerimagesruntime  = ["nvidia/cuda:8.0-cudnn5-runtime-ubuntu16.04", "nvidia/cuda:8.0-cudnn5-runtime-ubuntu16.04", "nvidia/cuda:9.0-cudnn7-runtime-ubuntu16.04", "nvidia/cuda:9.0-cudnn7-runtime-ubuntu16.04"]
-//def dists         = ["dist1","dist2","dist3","dist4"]
-
-def benchmark_commit_trigger
-
-pipeline {
-    agent none
-
-    // Setup job options
-    options {
-        ansiColor('xterm')
-        timestamps()
-        timeout(time: 300, unit: 'MINUTES')
-        buildDiscarder(logRotator(numToKeepStr: '10'))
-        disableConcurrentBuilds()
-        skipDefaultCheckout()
-    }
-
-    environment {
-        MAKE_OPTS = "-s CI=1" // -s: silent mode
-        BUILD_TYPE = "${BUILDTYPE}"
-    }
-
-    stages {
-        /////////////////////////////////////////////////////////////////////
-        //
-        //
-        //  Avoid mr-dl8 and mr-dl10 for build for now due to permission denied issue
-        /////////////////////////////////////////////////////////////////////
-        stage("Git clone on Linux nccl-cuda9-benchmark") {
-
-            agent {
-                label "${labelbuild}"
-            }
-            steps {
-                dumpInfo 'Linux Build Info'
-                // Do checkout
-                retryWithTimeout(200 /* seconds */, 3 /* retries */) {
-                    deleteDir()
-                    checkout([
-                            $class                           : 'GitSCM',
-                            branches                         : scm.branches,
-                            doGenerateSubmoduleConfigurations: false,
-                            extensions                       : scm.extensions + [[$class: 'SubmoduleOption', disableSubmodules: true, recursiveSubmodules: false, reference: '', trackingSubmodules: false, shallow: true]],
-                            submoduleCfg                     : [],
-                            userRemoteConfigs                : scm.userRemoteConfigs])
-                }
-                script {
-                    h2o4gpuUtils = load "Jenkinsfile.utils"
-                    buildInfo("h2o4gpu", h2o4gpuUtils.isRelease())
-                    commitMessage = sh(script: 'git log -1 --pretty=%B | tr "\n" " "', returnStdout: true).trim()
-                    echo "Commit Message: ${commitMessage}"
-                    benchmark_commit_trigger = ("${commitMessage}" ==~ /.*trigger_benchmark.*/)
-                    echo "benchmark_commit_trigger: ${benchmark_commit_trigger}"
-                }
-                stash includes: "Jenkinsfile*", name: "jenkinsfiles"
-            }
-        }
-        stage("Build Wheel on Linux nccl-cuda9-benchmark") {
-
-            agent {
-                label "${labelbuild}"
-            }
-            when {
-                expression {
-                    unstash "jenkinsfiles"
-                    h2o4gpuUtils = load "Jenkinsfile.utils"
-                    return "${doingbenchmark}" == "1" || h2o4gpuUtils.doBuild() || h2o4gpuUtils.doTests()
-                }
-            }
-            steps {
-                // Do checkout
-                retryWithTimeout(200 /* seconds */, 3 /* retries */) {
-                    deleteDir()
-                    checkout([
-                            $class                           : 'GitSCM',
-                            branches                         : scm.branches,
-                            doGenerateSubmoduleConfigurations: false,
-                            extensions                       : scm.extensions + [[$class: 'SubmoduleOption', disableSubmodules: true, recursiveSubmodules: false, reference: '', trackingSubmodules: false, shallow: true]],
-                            submoduleCfg                     : [],
-                            userRemoteConfigs                : scm.userRemoteConfigs])
-                }
-                script {
-                    h2o4gpuUtils = load "Jenkinsfile.utils"
-                    h2o4gpuUtils.buildOnLinux("${cuda}", "${extratag}", "${dist}", "${linuxwheel}")
-
-                    buildInfo("h2o4gpu", h2o4gpuUtils.isRelease())
-
-                    script {
-                        // Load the version file content
-                        buildInfo.get().setVersion(utilsLib.getCommandOutput("cat build/VERSION.txt"))
-                        utilsLib.setCurrentBuildName(buildInfo.get().getVersion())
-                        utilsLib.appendBuildDescription("""|Authors: ${buildInfo.get().getAuthorNames().join(" ")}
-                                |Git SHA: ${buildInfo.get().getGitSha().substring(0, 8)}
-                                |""".stripMargin("|"))
-                    }
-
-                }
-            }
-        }
-
-        stage("Test Wheel & Pylint & S3up on Linux nccl-cuda9-benchmark") {
-            agent {
-                label "${labeltest}"
-            }
-            when {
-                expression {
-                    unstash "jenkinsfiles"
-                    h2o4gpuUtils = load "Jenkinsfile.utils"
-                    return  "${doingbenchmark}" == "1" || h2o4gpuUtils.doTests() && (h2o4gpuUtils.rerun_disabled(commitMessage) || !h2o4gpuUtils.wasStageSuccessful("Fast Test Wheel & Pylint & S3up on Linux"))
-                }
-            }
-            steps {
-                dumpInfo 'Linux Test Info'
-                // Get source code (should put tests into wheel, then wouldn't have to checkout)
-                retryWithTimeout(200 /* seconds */, 3 /* retries */) {
-                    checkout scm
-                }
-                script {
-                    unstash 'version_info'
-                    sh """
-                        echo "Before Stashed wheel file:"
-                        ls -l src/interface_py/${dist}/  || true
-                        rm -rf src/interface_py/${dist}/ || true
-                       """
-                    unstash "${linuxwheel}"
-                    sh """
-                        echo "After Stashed wheel file:"
-                        ls -l src/interface_py/${dist}/  || true
-                       """
-                    h2o4gpuUtils.runTests(buildInfo.get(), "${cuda}", "${extratag}", "${dist}", "${testtype}")
-                }
-                retryWithTimeout(500 /* seconds */, 5 /* retries */) {
-                    withCredentials([[$class: 'AmazonWebServicesCredentialsBinding', credentialsId: "awsArtifactsUploader"]]) {
-                        script {
-                            h2o4gpuUtils.publishToS3(buildInfo.get(), "${extratag}" , "${dist}")
-                        }
-                    }
-                }
-            }
-        }
-        stage("Build/Publish Runtime Docker Linux nccl-cuda9-benchmark") {
-            agent {
-                label "${labelruntime}"
-            }
-            when {
-                expression {
-                    unstash "jenkinsfiles"
-                    h2o4gpuUtils = load "Jenkinsfile.utils"
-                    return "${doruntime}" == "1" && h2o4gpuUtils.doRuntime()
-                }
-            }
-            steps {
-                dumpInfo 'Linux Build Info'
-                // Do checkout
-                retryWithTimeout(200 /* seconds */, 3 /* retries */) {
-                    deleteDir()
-                    checkout([
-                            $class                           : 'GitSCM',
-                            branches                         : scm.branches,
-                            doGenerateSubmoduleConfigurations: false,
-                            extensions                       : scm.extensions + [[$class: 'SubmoduleOption', disableSubmodules: true, recursiveSubmodules: false, reference: '', trackingSubmodules: false, shallow: true]],
-                            submoduleCfg                     : [],
-                            userRemoteConfigs                : scm.userRemoteConfigs])
-                }
-                script {
-                    sh """
-                        echo "Before Stashed wheel file:"
-                        ls -l src/interface_py/${dist}/  || true
-                        rm -rf src/interface_py/${dist}/ || true
-                       """
-                    unstash "${linuxwheel}"
-                    sh """
-                        echo "After Stashed wheel file:"
-                        ls -l src/interface_py/${dist}/  || true
-                       """
-                    unstash 'version_info'
-                    sh 'echo "Stashed version file:" && ls -l build/'
-                }
-                script {
-                    h2o4gpuUtils.buildRuntime(buildInfo.get(), "${cudart}", "${dist}", "${extratag}")
-                }
-                retryWithTimeout(1000 /* seconds */, 5 /* retries */) {
-                withCredentials([[$class: 'AmazonWebServicesCredentialsBinding', credentialsId: "awsArtifactsUploader"]]) {
-                    script {
-                        h2o4gpuUtils.publishRuntimeToS3(buildInfo.get(), "${extratag}")
-                    }
-                }
-                }
-            }
-        }
-
-        stage("Benchmarking Linux nccl-cuda9-benchmark") {
-            agent {
-                label 'master'
-            }
-            when {
-                expression {
-                    unstash "jenkinsfiles"
-                    h2o4gpuUtils = load "Jenkinsfile.utils"
-                    echo "benchmark_commit_trigger: ${benchmark_commit_trigger}"
-                    return "${doingbenchmark}" == "1" || (("${benchmark_commit_trigger}"=="true" || h2o4gpuUtils.doTriggerBenchmarksJob()) && "${dobenchmark}" == "1" && env.BRANCH_NAME == "master")
-                }
-            }
-            steps {
-                script {
-                    utilsLib.appendBuildDescription("BENCH \u2713")
-                }
-
-                echo banner("Triggering downstream jobs h2o4gpu${extratag}-benchmark : RUNTIME_ID=${buildInfo.get().getVersion()}")
-                build job: "/h2o4gpu${extratag}-benchmark/${env.BRANCH_NAME}", parameters: [[$class: 'StringParameterValue', name: 'RUNTIME_ID', value: buildInfo.get().getVersion()]], propagate: false, wait: false, quietPeriod: 60
-            }
-        }
-
-    } // end over stages
-    post {
-        failure {
-            node('linux') {
-                script {
-                    if(env.BRANCH_NAME == "master") {
-                        emailext(
-                                to: "mateusz@h2o.ai, jmckinney@h2o.ai",
-                                subject: "BUILD FAILED: Job '${env.JOB_NAME} [${env.BUILD_NUMBER}]'",
-                                body: '''${JELLY_SCRIPT, template="html_gmail"}''',
-                                attachLog: true,
-                                compressLog: true,
-                                recipientProviders: [
-                                        [$class: 'DevelopersRecipientProvider'],
-                                ]
-                            )
-                    }
-                }
-            }
-        }
-    }
-}
-
-
diff --git a/Jenkinsfile-nccl-cuda9-benchmark.base b/Jenkinsfile-nccl-cuda9-benchmark.base
deleted file mode 100644
index ffdb6debd..000000000
--- a/Jenkinsfile-nccl-cuda9-benchmark.base
+++ /dev/null
@@ -1,25 +0,0 @@
-// TOOD: rename to @Library('h2o-jenkins-pipeline-lib') _
-@Library('test-shared-library') _
-
-import ai.h2o.ci.Utils
-import static ai.h2o.ci.Utils.banner
-def utilsLib = new Utils()
-import ai.h2o.ci.BuildInfo
-
-def commitMessage = ''
-def h2o4gpuUtils = null
-
-def dist = "dist6"
-def BUILDTYPE = "nccl-cuda9-benchmark"
-def cuda = "nvidia/cuda:9.0-cudnn7-devel-ubuntu16.04"
-def cudart = "nvidia/cuda:9.0-cudnn7-runtime-ubuntu16.04"
-def extratag = "-nccl-cuda9-benchmark"
-def linuxwheel = "linux_whl6"
-def testtype = "dotestperf"
-def labelbuild = "mr-dl3"
-def labeltest = "mr-dl3"
-def labelruntime = "mr-dl3"
-def doingbenchmark = "1"
-def dobenchmark = "0"
-def doruntime = "0"
-
diff --git a/Jenkinsfile-nccl-cuda9.base b/Jenkinsfile-nccl-cuda9.base
deleted file mode 100644
index 10cf20cfd..000000000
--- a/Jenkinsfile-nccl-cuda9.base
+++ /dev/null
@@ -1,25 +0,0 @@
-// TOOD: rename to @Library('h2o-jenkins-pipeline-lib') _
-@Library('test-shared-library') _
-
-import ai.h2o.ci.Utils
-import static ai.h2o.ci.Utils.banner
-def utilsLib = new Utils()
-import ai.h2o.ci.BuildInfo
-
-def commitMessage = ''
-def h2o4gpuUtils = null
-
-def dist = "dist4"
-def BUILDTYPE = "nccl-cuda9"
-def cuda = "nvidia/cuda:9.0-cudnn7-devel-ubuntu16.04"
-def cudart = "nvidia/cuda:9.0-cudnn7-runtime-ubuntu16.04"
-def extratag = "-nccl-cuda9"
-def linuxwheel = "linux_whl4"
-def testtype = "dotest"
-def labelbuild = "nvidia-docker && (mr-dl11 || mr-dl16 || mr-dl10)"
-def labeltest = "gpu && nvidia-docker && (mr-dl11 || mr-dl16 || mr-dl10)"
-def labelruntime = "nvidia-docker"
-def doingbenchmark = "0"
-def dobenchmark = "1"
-def doruntime = "1"
-
diff --git a/Jenkinsfile-nonccl-cuda8 b/Jenkinsfile-nonccl-cuda8
deleted file mode 100644
index 071e6c702..000000000
--- a/Jenkinsfile-nonccl-cuda8
+++ /dev/null
@@ -1,280 +0,0 @@
-#!/usr/bin/groovy
-
-//################ FILE IS AUTO-GENERATED from .base files
-//################ DO NOT MODIFY
-//################ See scripts/make_jenkinsfiles.sh
-
-// TOOD: rename to @Library('h2o-jenkins-pipeline-lib') _
-@Library('test-shared-library') _
-
-import ai.h2o.ci.Utils
-import static ai.h2o.ci.Utils.banner
-def utilsLib = new Utils()
-import ai.h2o.ci.BuildInfo
-
-def commitMessage = ''
-def h2o4gpuUtils = null
-
-def dist = "dist2"
-def BUILDTYPE = "nonccl-cuda8"
-def cuda = "nvidia/cuda:8.0-cudnn5-devel-ubuntu16.04"
-def cudart = "nvidia/cuda:8.0-cudnn5-runtime-ubuntu16.04"
-def extratag = "-nonccl-cuda8"
-def linuxwheel = "linux_whl2"
-def testtype = "dotestfast_nonccl"
-def labelbuild = "nvidia-docker && (mr-dl11 || mr-dl16 || mr-dl10)"
-def labeltest = "gpu && nvidia-docker && (mr-dl11 || mr-dl16 || mr-dl10)"
-def labelruntime = "nvidia-docker"
-def doingbenchmark = "0"
-def dobenchmark = "0"
-def doruntime = "1"
-
-//################ BELOW IS COPY/PASTE of Jenkinsfile.utils2 (except stage names)
-// Just Notes:
-//
-//def jobnums       = [0 , 1 , 2  , 3]
-//def tags          = ["nccl" , "nonccl" , "nccl"  , "nonccl"]
-//def cudatags      = ["cuda8", "cuda8"  , "cuda9" , "cuda9"]
-//def dobuilds      = [1, 0, 0, 0]
-//def dofulltests   = [1, 0, 0, 0]
-//def dopytests     = [1, 0, 0, 0]
-//def doruntimes    = [1, 1, 1, 1]
-//def dockerimagesbuild    = ["nvidia/cuda:8.0-cudnn5-devel-ubuntu16.04", "nvidia/cuda:8.0-cudnn5-devel-ubuntu16.04", "nvidia/cuda:9.0-cudnn7-devel-ubuntu16.04", "nvidia/cuda:9.0-cudnn7-devel-ubuntu16.04"]
-//def dockerimagesruntime  = ["nvidia/cuda:8.0-cudnn5-runtime-ubuntu16.04", "nvidia/cuda:8.0-cudnn5-runtime-ubuntu16.04", "nvidia/cuda:9.0-cudnn7-runtime-ubuntu16.04", "nvidia/cuda:9.0-cudnn7-runtime-ubuntu16.04"]
-//def dists         = ["dist1","dist2","dist3","dist4"]
-
-def benchmark_commit_trigger
-
-pipeline {
-    agent none
-
-    // Setup job options
-    options {
-        ansiColor('xterm')
-        timestamps()
-        timeout(time: 300, unit: 'MINUTES')
-        buildDiscarder(logRotator(numToKeepStr: '10'))
-        disableConcurrentBuilds()
-        skipDefaultCheckout()
-    }
-
-    environment {
-        MAKE_OPTS = "-s CI=1" // -s: silent mode
-        BUILD_TYPE = "${BUILDTYPE}"
-    }
-
-    stages {
-        /////////////////////////////////////////////////////////////////////
-        //
-        //
-        //  Avoid mr-dl8 and mr-dl10 for build for now due to permission denied issue
-        /////////////////////////////////////////////////////////////////////
-        stage("Git clone on Linux nonccl-cuda8") {
-
-            agent {
-                label "${labelbuild}"
-            }
-            steps {
-                dumpInfo 'Linux Build Info'
-                // Do checkout
-                retryWithTimeout(200 /* seconds */, 3 /* retries */) {
-                    deleteDir()
-                    checkout([
-                            $class                           : 'GitSCM',
-                            branches                         : scm.branches,
-                            doGenerateSubmoduleConfigurations: false,
-                            extensions                       : scm.extensions + [[$class: 'SubmoduleOption', disableSubmodules: true, recursiveSubmodules: false, reference: '', trackingSubmodules: false, shallow: true]],
-                            submoduleCfg                     : [],
-                            userRemoteConfigs                : scm.userRemoteConfigs])
-                }
-                script {
-                    h2o4gpuUtils = load "Jenkinsfile.utils"
-                    buildInfo("h2o4gpu", h2o4gpuUtils.isRelease())
-                    commitMessage = sh(script: 'git log -1 --pretty=%B | tr "\n" " "', returnStdout: true).trim()
-                    echo "Commit Message: ${commitMessage}"
-                    benchmark_commit_trigger = ("${commitMessage}" ==~ /.*trigger_benchmark.*/)
-                    echo "benchmark_commit_trigger: ${benchmark_commit_trigger}"
-                }
-                stash includes: "Jenkinsfile*", name: "jenkinsfiles"
-            }
-        }
-        stage("Build Wheel on Linux nonccl-cuda8") {
-
-            agent {
-                label "${labelbuild}"
-            }
-            when {
-                expression {
-                    unstash "jenkinsfiles"
-                    h2o4gpuUtils = load "Jenkinsfile.utils"
-                    return "${doingbenchmark}" == "1" || h2o4gpuUtils.doBuild() || h2o4gpuUtils.doTests()
-                }
-            }
-            steps {
-                // Do checkout
-                retryWithTimeout(200 /* seconds */, 3 /* retries */) {
-                    deleteDir()
-                    checkout([
-                            $class                           : 'GitSCM',
-                            branches                         : scm.branches,
-                            doGenerateSubmoduleConfigurations: false,
-                            extensions                       : scm.extensions + [[$class: 'SubmoduleOption', disableSubmodules: true, recursiveSubmodules: false, reference: '', trackingSubmodules: false, shallow: true]],
-                            submoduleCfg                     : [],
-                            userRemoteConfigs                : scm.userRemoteConfigs])
-                }
-                script {
-                    h2o4gpuUtils = load "Jenkinsfile.utils"
-                    h2o4gpuUtils.buildOnLinux("${cuda}", "${extratag}", "${dist}", "${linuxwheel}")
-
-                    buildInfo("h2o4gpu", h2o4gpuUtils.isRelease())
-
-                    script {
-                        // Load the version file content
-                        buildInfo.get().setVersion(utilsLib.getCommandOutput("cat build/VERSION.txt"))
-                        utilsLib.setCurrentBuildName(buildInfo.get().getVersion())
-                        utilsLib.appendBuildDescription("""|Authors: ${buildInfo.get().getAuthorNames().join(" ")}
-                                |Git SHA: ${buildInfo.get().getGitSha().substring(0, 8)}
-                                |""".stripMargin("|"))
-                    }
-
-                }
-            }
-        }
-
-        stage("Test Wheel & Pylint & S3up on Linux nonccl-cuda8") {
-            agent {
-                label "${labeltest}"
-            }
-            when {
-                expression {
-                    unstash "jenkinsfiles"
-                    h2o4gpuUtils = load "Jenkinsfile.utils"
-                    return  "${doingbenchmark}" == "1" || h2o4gpuUtils.doTests() && (h2o4gpuUtils.rerun_disabled(commitMessage) || !h2o4gpuUtils.wasStageSuccessful("Fast Test Wheel & Pylint & S3up on Linux"))
-                }
-            }
-            steps {
-                dumpInfo 'Linux Test Info'
-                // Get source code (should put tests into wheel, then wouldn't have to checkout)
-                retryWithTimeout(200 /* seconds */, 3 /* retries */) {
-                    checkout scm
-                }
-                script {
-                    unstash 'version_info'
-                    sh """
-                        echo "Before Stashed wheel file:"
-                        ls -l src/interface_py/${dist}/  || true
-                        rm -rf src/interface_py/${dist}/ || true
-                       """
-                    unstash "${linuxwheel}"
-                    sh """
-                        echo "After Stashed wheel file:"
-                        ls -l src/interface_py/${dist}/  || true
-                       """
-                    h2o4gpuUtils.runTests(buildInfo.get(), "${cuda}", "${extratag}", "${dist}", "${testtype}")
-                }
-                retryWithTimeout(500 /* seconds */, 5 /* retries */) {
-                    withCredentials([[$class: 'AmazonWebServicesCredentialsBinding', credentialsId: "awsArtifactsUploader"]]) {
-                        script {
-                            h2o4gpuUtils.publishToS3(buildInfo.get(), "${extratag}" , "${dist}")
-                        }
-                    }
-                }
-            }
-        }
-        stage("Build/Publish Runtime Docker Linux nonccl-cuda8") {
-            agent {
-                label "${labelruntime}"
-            }
-            when {
-                expression {
-                    unstash "jenkinsfiles"
-                    h2o4gpuUtils = load "Jenkinsfile.utils"
-                    return "${doruntime}" == "1" && h2o4gpuUtils.doRuntime()
-                }
-            }
-            steps {
-                dumpInfo 'Linux Build Info'
-                // Do checkout
-                retryWithTimeout(200 /* seconds */, 3 /* retries */) {
-                    deleteDir()
-                    checkout([
-                            $class                           : 'GitSCM',
-                            branches                         : scm.branches,
-                            doGenerateSubmoduleConfigurations: false,
-                            extensions                       : scm.extensions + [[$class: 'SubmoduleOption', disableSubmodules: true, recursiveSubmodules: false, reference: '', trackingSubmodules: false, shallow: true]],
-                            submoduleCfg                     : [],
-                            userRemoteConfigs                : scm.userRemoteConfigs])
-                }
-                script {
-                    sh """
-                        echo "Before Stashed wheel file:"
-                        ls -l src/interface_py/${dist}/  || true
-                        rm -rf src/interface_py/${dist}/ || true
-                       """
-                    unstash "${linuxwheel}"
-                    sh """
-                        echo "After Stashed wheel file:"
-                        ls -l src/interface_py/${dist}/  || true
-                       """
-                    unstash 'version_info'
-                    sh 'echo "Stashed version file:" && ls -l build/'
-                }
-                script {
-                    h2o4gpuUtils.buildRuntime(buildInfo.get(), "${cudart}", "${dist}", "${extratag}")
-                }
-                retryWithTimeout(1000 /* seconds */, 5 /* retries */) {
-                withCredentials([[$class: 'AmazonWebServicesCredentialsBinding', credentialsId: "awsArtifactsUploader"]]) {
-                    script {
-                        h2o4gpuUtils.publishRuntimeToS3(buildInfo.get(), "${extratag}")
-                    }
-                }
-                }
-            }
-        }
-
-        stage("Benchmarking Linux nonccl-cuda8") {
-            agent {
-                label 'master'
-            }
-            when {
-                expression {
-                    unstash "jenkinsfiles"
-                    h2o4gpuUtils = load "Jenkinsfile.utils"
-                    echo "benchmark_commit_trigger: ${benchmark_commit_trigger}"
-                    return "${doingbenchmark}" == "1" || (("${benchmark_commit_trigger}"=="true" || h2o4gpuUtils.doTriggerBenchmarksJob()) && "${dobenchmark}" == "1" && env.BRANCH_NAME == "master")
-                }
-            }
-            steps {
-                script {
-                    utilsLib.appendBuildDescription("BENCH \u2713")
-                }
-
-                echo banner("Triggering downstream jobs h2o4gpu${extratag}-benchmark : RUNTIME_ID=${buildInfo.get().getVersion()}")
-                build job: "/h2o4gpu${extratag}-benchmark/${env.BRANCH_NAME}", parameters: [[$class: 'StringParameterValue', name: 'RUNTIME_ID', value: buildInfo.get().getVersion()]], propagate: false, wait: false, quietPeriod: 60
-            }
-        }
-
-    } // end over stages
-    post {
-        failure {
-            node('linux') {
-                script {
-                    if(env.BRANCH_NAME == "master") {
-                        emailext(
-                                to: "mateusz@h2o.ai, jmckinney@h2o.ai",
-                                subject: "BUILD FAILED: Job '${env.JOB_NAME} [${env.BUILD_NUMBER}]'",
-                                body: '''${JELLY_SCRIPT, template="html_gmail"}''',
-                                attachLog: true,
-                                compressLog: true,
-                                recipientProviders: [
-                                        [$class: 'DevelopersRecipientProvider'],
-                                ]
-                            )
-                    }
-                }
-            }
-        }
-    }
-}
-
-
diff --git a/Jenkinsfile-nonccl-cuda8.base b/Jenkinsfile-nonccl-cuda8.base
deleted file mode 100644
index 979219e5f..000000000
--- a/Jenkinsfile-nonccl-cuda8.base
+++ /dev/null
@@ -1,25 +0,0 @@
-// TOOD: rename to @Library('h2o-jenkins-pipeline-lib') _
-@Library('test-shared-library') _
-
-import ai.h2o.ci.Utils
-import static ai.h2o.ci.Utils.banner
-def utilsLib = new Utils()
-import ai.h2o.ci.BuildInfo
-
-def commitMessage = ''
-def h2o4gpuUtils = null
-
-def dist = "dist2"
-def BUILDTYPE = "nonccl-cuda8"
-def cuda = "nvidia/cuda:8.0-cudnn5-devel-ubuntu16.04"
-def cudart = "nvidia/cuda:8.0-cudnn5-runtime-ubuntu16.04"
-def extratag = "-nonccl-cuda8"
-def linuxwheel = "linux_whl2"
-def testtype = "dotestfast_nonccl"
-def labelbuild = "nvidia-docker && (mr-dl11 || mr-dl16 || mr-dl10)"
-def labeltest = "gpu && nvidia-docker && (mr-dl11 || mr-dl16 || mr-dl10)"
-def labelruntime = "nvidia-docker"
-def doingbenchmark = "0"
-def dobenchmark = "0"
-def doruntime = "1"
-
diff --git a/Jenkinsfile-nonccl-cuda9 b/Jenkinsfile-nonccl-cuda9
deleted file mode 100644
index 951f66878..000000000
--- a/Jenkinsfile-nonccl-cuda9
+++ /dev/null
@@ -1,280 +0,0 @@
-#!/usr/bin/groovy
-
-//################ FILE IS AUTO-GENERATED from .base files
-//################ DO NOT MODIFY
-//################ See scripts/make_jenkinsfiles.sh
-
-// TOOD: rename to @Library('h2o-jenkins-pipeline-lib') _
-@Library('test-shared-library') _
-
-import ai.h2o.ci.Utils
-import static ai.h2o.ci.Utils.banner
-def utilsLib = new Utils()
-import ai.h2o.ci.BuildInfo
-
-def commitMessage = ''
-def h2o4gpuUtils = null
-
-def dist = "dist3"
-def BUILDTYPE = "nonccl-cuda9"
-def cuda = "nvidia/cuda:9.0-cudnn7-devel-ubuntu16.04"
-def cudart = "nvidia/cuda:9.0-cudnn7-runtime-ubuntu16.04"
-def extratag = "-nonccl-cuda9"
-def linuxwheel = "linux_whl3"
-def testtype = "dotestfast_nonccl"
-def labelbuild = "nvidia-docker && (mr-dl11 || mr-dl16 || mr-dl10)"
-def labeltest = "gpu && nvidia-docker && (mr-dl11 || mr-dl16 || mr-dl10)"
-def labelruntime = "nvidia-docker"
-def doingbenchmark = "0"
-def dobenchmark = "0"
-def doruntime = "1"
-
-//################ BELOW IS COPY/PASTE of Jenkinsfile.utils2 (except stage names)
-// Just Notes:
-//
-//def jobnums       = [0 , 1 , 2  , 3]
-//def tags          = ["nccl" , "nonccl" , "nccl"  , "nonccl"]
-//def cudatags      = ["cuda8", "cuda8"  , "cuda9" , "cuda9"]
-//def dobuilds      = [1, 0, 0, 0]
-//def dofulltests   = [1, 0, 0, 0]
-//def dopytests     = [1, 0, 0, 0]
-//def doruntimes    = [1, 1, 1, 1]
-//def dockerimagesbuild    = ["nvidia/cuda:8.0-cudnn5-devel-ubuntu16.04", "nvidia/cuda:8.0-cudnn5-devel-ubuntu16.04", "nvidia/cuda:9.0-cudnn7-devel-ubuntu16.04", "nvidia/cuda:9.0-cudnn7-devel-ubuntu16.04"]
-//def dockerimagesruntime  = ["nvidia/cuda:8.0-cudnn5-runtime-ubuntu16.04", "nvidia/cuda:8.0-cudnn5-runtime-ubuntu16.04", "nvidia/cuda:9.0-cudnn7-runtime-ubuntu16.04", "nvidia/cuda:9.0-cudnn7-runtime-ubuntu16.04"]
-//def dists         = ["dist1","dist2","dist3","dist4"]
-
-def benchmark_commit_trigger
-
-pipeline {
-    agent none
-
-    // Setup job options
-    options {
-        ansiColor('xterm')
-        timestamps()
-        timeout(time: 300, unit: 'MINUTES')
-        buildDiscarder(logRotator(numToKeepStr: '10'))
-        disableConcurrentBuilds()
-        skipDefaultCheckout()
-    }
-
-    environment {
-        MAKE_OPTS = "-s CI=1" // -s: silent mode
-        BUILD_TYPE = "${BUILDTYPE}"
-    }
-
-    stages {
-        /////////////////////////////////////////////////////////////////////
-        //
-        //
-        //  Avoid mr-dl8 and mr-dl10 for build for now due to permission denied issue
-        /////////////////////////////////////////////////////////////////////
-        stage("Git clone on Linux nonccl-cuda9") {
-
-            agent {
-                label "${labelbuild}"
-            }
-            steps {
-                dumpInfo 'Linux Build Info'
-                // Do checkout
-                retryWithTimeout(200 /* seconds */, 3 /* retries */) {
-                    deleteDir()
-                    checkout([
-                            $class                           : 'GitSCM',
-                            branches                         : scm.branches,
-                            doGenerateSubmoduleConfigurations: false,
-                            extensions                       : scm.extensions + [[$class: 'SubmoduleOption', disableSubmodules: true, recursiveSubmodules: false, reference: '', trackingSubmodules: false, shallow: true]],
-                            submoduleCfg                     : [],
-                            userRemoteConfigs                : scm.userRemoteConfigs])
-                }
-                script {
-                    h2o4gpuUtils = load "Jenkinsfile.utils"
-                    buildInfo("h2o4gpu", h2o4gpuUtils.isRelease())
-                    commitMessage = sh(script: 'git log -1 --pretty=%B | tr "\n" " "', returnStdout: true).trim()
-                    echo "Commit Message: ${commitMessage}"
-                    benchmark_commit_trigger = ("${commitMessage}" ==~ /.*trigger_benchmark.*/)
-                    echo "benchmark_commit_trigger: ${benchmark_commit_trigger}"
-                }
-                stash includes: "Jenkinsfile*", name: "jenkinsfiles"
-            }
-        }
-        stage("Build Wheel on Linux nonccl-cuda9") {
-
-            agent {
-                label "${labelbuild}"
-            }
-            when {
-                expression {
-                    unstash "jenkinsfiles"
-                    h2o4gpuUtils = load "Jenkinsfile.utils"
-                    return "${doingbenchmark}" == "1" || h2o4gpuUtils.doBuild() || h2o4gpuUtils.doTests()
-                }
-            }
-            steps {
-                // Do checkout
-                retryWithTimeout(200 /* seconds */, 3 /* retries */) {
-                    deleteDir()
-                    checkout([
-                            $class                           : 'GitSCM',
-                            branches                         : scm.branches,
-                            doGenerateSubmoduleConfigurations: false,
-                            extensions                       : scm.extensions + [[$class: 'SubmoduleOption', disableSubmodules: true, recursiveSubmodules: false, reference: '', trackingSubmodules: false, shallow: true]],
-                            submoduleCfg                     : [],
-                            userRemoteConfigs                : scm.userRemoteConfigs])
-                }
-                script {
-                    h2o4gpuUtils = load "Jenkinsfile.utils"
-                    h2o4gpuUtils.buildOnLinux("${cuda}", "${extratag}", "${dist}", "${linuxwheel}")
-
-                    buildInfo("h2o4gpu", h2o4gpuUtils.isRelease())
-
-                    script {
-                        // Load the version file content
-                        buildInfo.get().setVersion(utilsLib.getCommandOutput("cat build/VERSION.txt"))
-                        utilsLib.setCurrentBuildName(buildInfo.get().getVersion())
-                        utilsLib.appendBuildDescription("""|Authors: ${buildInfo.get().getAuthorNames().join(" ")}
-                                |Git SHA: ${buildInfo.get().getGitSha().substring(0, 8)}
-                                |""".stripMargin("|"))
-                    }
-
-                }
-            }
-        }
-
-        stage("Test Wheel & Pylint & S3up on Linux nonccl-cuda9") {
-            agent {
-                label "${labeltest}"
-            }
-            when {
-                expression {
-                    unstash "jenkinsfiles"
-                    h2o4gpuUtils = load "Jenkinsfile.utils"
-                    return  "${doingbenchmark}" == "1" || h2o4gpuUtils.doTests() && (h2o4gpuUtils.rerun_disabled(commitMessage) || !h2o4gpuUtils.wasStageSuccessful("Fast Test Wheel & Pylint & S3up on Linux"))
-                }
-            }
-            steps {
-                dumpInfo 'Linux Test Info'
-                // Get source code (should put tests into wheel, then wouldn't have to checkout)
-                retryWithTimeout(200 /* seconds */, 3 /* retries */) {
-                    checkout scm
-                }
-                script {
-                    unstash 'version_info'
-                    sh """
-                        echo "Before Stashed wheel file:"
-                        ls -l src/interface_py/${dist}/  || true
-                        rm -rf src/interface_py/${dist}/ || true
-                       """
-                    unstash "${linuxwheel}"
-                    sh """
-                        echo "After Stashed wheel file:"
-                        ls -l src/interface_py/${dist}/  || true
-                       """
-                    h2o4gpuUtils.runTests(buildInfo.get(), "${cuda}", "${extratag}", "${dist}", "${testtype}")
-                }
-                retryWithTimeout(500 /* seconds */, 5 /* retries */) {
-                    withCredentials([[$class: 'AmazonWebServicesCredentialsBinding', credentialsId: "awsArtifactsUploader"]]) {
-                        script {
-                            h2o4gpuUtils.publishToS3(buildInfo.get(), "${extratag}" , "${dist}")
-                        }
-                    }
-                }
-            }
-        }
-        stage("Build/Publish Runtime Docker Linux nonccl-cuda9") {
-            agent {
-                label "${labelruntime}"
-            }
-            when {
-                expression {
-                    unstash "jenkinsfiles"
-                    h2o4gpuUtils = load "Jenkinsfile.utils"
-                    return "${doruntime}" == "1" && h2o4gpuUtils.doRuntime()
-                }
-            }
-            steps {
-                dumpInfo 'Linux Build Info'
-                // Do checkout
-                retryWithTimeout(200 /* seconds */, 3 /* retries */) {
-                    deleteDir()
-                    checkout([
-                            $class                           : 'GitSCM',
-                            branches                         : scm.branches,
-                            doGenerateSubmoduleConfigurations: false,
-                            extensions                       : scm.extensions + [[$class: 'SubmoduleOption', disableSubmodules: true, recursiveSubmodules: false, reference: '', trackingSubmodules: false, shallow: true]],
-                            submoduleCfg                     : [],
-                            userRemoteConfigs                : scm.userRemoteConfigs])
-                }
-                script {
-                    sh """
-                        echo "Before Stashed wheel file:"
-                        ls -l src/interface_py/${dist}/  || true
-                        rm -rf src/interface_py/${dist}/ || true
-                       """
-                    unstash "${linuxwheel}"
-                    sh """
-                        echo "After Stashed wheel file:"
-                        ls -l src/interface_py/${dist}/  || true
-                       """
-                    unstash 'version_info'
-                    sh 'echo "Stashed version file:" && ls -l build/'
-                }
-                script {
-                    h2o4gpuUtils.buildRuntime(buildInfo.get(), "${cudart}", "${dist}", "${extratag}")
-                }
-                retryWithTimeout(1000 /* seconds */, 5 /* retries */) {
-                withCredentials([[$class: 'AmazonWebServicesCredentialsBinding', credentialsId: "awsArtifactsUploader"]]) {
-                    script {
-                        h2o4gpuUtils.publishRuntimeToS3(buildInfo.get(), "${extratag}")
-                    }
-                }
-                }
-            }
-        }
-
-        stage("Benchmarking Linux nonccl-cuda9") {
-            agent {
-                label 'master'
-            }
-            when {
-                expression {
-                    unstash "jenkinsfiles"
-                    h2o4gpuUtils = load "Jenkinsfile.utils"
-                    echo "benchmark_commit_trigger: ${benchmark_commit_trigger}"
-                    return "${doingbenchmark}" == "1" || (("${benchmark_commit_trigger}"=="true" || h2o4gpuUtils.doTriggerBenchmarksJob()) && "${dobenchmark}" == "1" && env.BRANCH_NAME == "master")
-                }
-            }
-            steps {
-                script {
-                    utilsLib.appendBuildDescription("BENCH \u2713")
-                }
-
-                echo banner("Triggering downstream jobs h2o4gpu${extratag}-benchmark : RUNTIME_ID=${buildInfo.get().getVersion()}")
-                build job: "/h2o4gpu${extratag}-benchmark/${env.BRANCH_NAME}", parameters: [[$class: 'StringParameterValue', name: 'RUNTIME_ID', value: buildInfo.get().getVersion()]], propagate: false, wait: false, quietPeriod: 60
-            }
-        }
-
-    } // end over stages
-    post {
-        failure {
-            node('linux') {
-                script {
-                    if(env.BRANCH_NAME == "master") {
-                        emailext(
-                                to: "mateusz@h2o.ai, jmckinney@h2o.ai",
-                                subject: "BUILD FAILED: Job '${env.JOB_NAME} [${env.BUILD_NUMBER}]'",
-                                body: '''${JELLY_SCRIPT, template="html_gmail"}''',
-                                attachLog: true,
-                                compressLog: true,
-                                recipientProviders: [
-                                        [$class: 'DevelopersRecipientProvider'],
-                                ]
-                            )
-                    }
-                }
-            }
-        }
-    }
-}
-
-
diff --git a/Jenkinsfile-nonccl-cuda9.base b/Jenkinsfile-nonccl-cuda9.base
deleted file mode 100644
index 4679fdea5..000000000
--- a/Jenkinsfile-nonccl-cuda9.base
+++ /dev/null
@@ -1,25 +0,0 @@
-// TOOD: rename to @Library('h2o-jenkins-pipeline-lib') _
-@Library('test-shared-library') _
-
-import ai.h2o.ci.Utils
-import static ai.h2o.ci.Utils.banner
-def utilsLib = new Utils()
-import ai.h2o.ci.BuildInfo
-
-def commitMessage = ''
-def h2o4gpuUtils = null
-
-def dist = "dist3"
-def BUILDTYPE = "nonccl-cuda9"
-def cuda = "nvidia/cuda:9.0-cudnn7-devel-ubuntu16.04"
-def cudart = "nvidia/cuda:9.0-cudnn7-runtime-ubuntu16.04"
-def extratag = "-nonccl-cuda9"
-def linuxwheel = "linux_whl3"
-def testtype = "dotestfast_nonccl"
-def labelbuild = "nvidia-docker && (mr-dl11 || mr-dl16 || mr-dl10)"
-def labeltest = "gpu && nvidia-docker && (mr-dl11 || mr-dl16 || mr-dl10)"
-def labelruntime = "nvidia-docker"
-def doingbenchmark = "0"
-def dobenchmark = "0"
-def doruntime = "1"
-
diff --git a/Makefile b/Makefile
index 4421fe048..e29a056ea 100644
--- a/Makefile
+++ b/Makefile
@@ -1,85 +1,16 @@
-include Makefile_header.mk
-location = $(CURDIR)/$(word $(words $(MAKEFILE_LIST)),$(MAKEFILE_LIST))
-WHERE := $(location)
-$(info ** -> $(WHERE))
-$(info ** ------------------------------------------------------------------ **)
-NVCC := $(shell command -v nvcc 2> /dev/null)
-
-SHELL := /bin/bash # force avoidance of dash as shell
-#
-# Build specific config
-#
-CONFIG=make/config.mk
-include $(CONFIG)
-
-VERSION=make/version.mk
-include $(VERSION)
-
-MAJOR_MINOR=$(shell echo $(BASE_VERSION) | sed 's/.*\(^[0-9][0-9]*\.[0-9][0-9]*\).*/\1/g' )
-
-# System specific stuff
-include src/config2.mk
-
-ifdef NVCC
-ifeq ($(shell test $(CUDA_MAJOR) -ge 9; echo $$?),0)
-    $(warning Compiling with Cuda9 or higher)
-    XGB_CUDA ?= -DGPU_COMPUTE_VER="35;52;60;61;70"
-else
-    $(warning Compiling with Cuda8 or lower)
-    # >=52 required for kmeans for larger data of size rows/32>2^16
-    XGB_CUDA ?= -DGPU_COMPUTE_VER="35;52;60;61"
-endif
-endif
-
-# Location of local directory with dependencies
-DEPS_DIR = deps
-
-# Detect OS
-OS := $(shell uname)
-## Python has crazy ideas about os names
-ifeq ($(OS), Darwin)
-		PY_OS ?= "macosx"
-else
-		PY_OS ?= $(OS)
-endif
-
-# see if have ccache for faster compile times if no changes to file
-theccache=$(shell echo `which ccache`)
-ifeq ($(theccache),)
-		theccacheclean=
-else
-		theccacheclean=$(theccache) -C
-endif
-
-RANDOM := $(shell bash -c 'echo $$RANDOM')
-LOGEXT=$(RANDOM)$(shell date +'_%Y.%m.%d-%H:%M:%S')
-NUMPROCS := $(shell cat /proc/cpuinfo|grep processor|wc -l)
-
-#
-# Docker image tagging
-#
-DOCKER_VERSION_TAG ?= "latest"
-
-#
-# Setup S3 access credentials
-#
-S3_CMD_LINE := aws s3
-
-#
-# BUILD_INFO setup
-#
-H2O4GPU_COMMIT ?= $(shell git rev-parse HEAD)
-H2O4GPU_BUILD_DATE := $(shell date)
-H2O4GPU_BUILD ?= "LOCAL BUILD @ $(shell git rev-parse --short HEAD) build at $(H2O4GPU_BUILD_DATE)"
-H2O4GPU_SUFFIX ?= "+local_$(shell git describe --always --dirty)"
+include make/Makefile_header.mk
 
+include make/version.mk
+include make/config.mk
 
 help:
+	$(call inform, " -------- Test data sync ---------")
+	$(call inform, "make sync_open_data  Downloads the test data.")
+	$(call inform, "make sync_small_data Downloads the small test data.")
 	$(call inform, " -------- Build and Install ---------")
 	$(call inform, "make clean           Clean all build files.")
 	$(call inform, "make                 fullinstall")
 	$(call inform, "make fullinstall     Clean everything then compile and install everything (for cuda9 with nccl in xgboost).")
-	$(call inform, "make cpu-fullinstall Clean everything then compile and isntall everything only with CPU")
 	$(call inform, "make build           Just Build the whole project.")
 	$(call inform, " -------- Test ---------")
 	$(call inform, "make test            Run tests.")
@@ -97,6 +28,12 @@ help:
 	$(call inform, "Example Pycharm environment flags: PYTHONPATH=/home/jon/h2o4gpu/src/interface_py:/home/jon/h2o4gpu;PYTHONUNBUFFERED=1;LD_LIBRARY_PATH=/opt/clang+llvm-4.0.0-x86_64-linux-gnu-ubuntu-16.04//lib/:/home/jon/lib:/opt/rstudio-1.0.136/bin/:/usr/local/cuda/lib64:/usr/local/cuda/extras/CUPTI/lib64::/home/jon/lib/:$LD_LIBRARY_PATH;LLVM4=/opt/clang+llvm-4.0.0-x86_64-linux-gnu-ubuntu-16.04/")
 	$(call inform, "Example Pycharm working directory: /home/jon/h2o4gpu/")
 
+default: fullinstall
+
+#########################################
+# DATA TARGETS
+#########################################
+
 sync_small_data:
 	@echo "---- Synchronizing test data ----"
 	mkdir -p $(DATA_DIR)
@@ -111,8 +48,31 @@ sync_open_data:
 	mkdir -p $(OPEN_DATA_DIR)
 	$(S3_CMD_LINE) sync --no-sign-request "$(OPEN_DATA_BUCKET)" "$(OPEN_DATA_DIR)"
 
-default: fullinstall
+#########################################
+# DEPENDENCY MANAGEMENT TARGETS
+#########################################
+
+alldeps-install: deps_install fullinstall-xgboost libsklearn
+
+alldeps: deps_fetch alldeps-install
+
+deps_fetch:
+	@echo "---- Fetch dependencies ---- "
+	bash scripts/gitshallow_submodules.sh
+	git submodule update
 
+deps_install:
+	@echo "---- Install dependencies ----"
+	#-xargs -a requirements.txt -n 1 -P 1 $(PYTHON) -m pip install
+	easy_install pip
+	easy_install setuptools
+	cat src/interface_py/requirements_buildonly.txt src/interface_py/requirements_runtime.txt > requirements.txt
+	$(PYTHON) -m pip install -r requirements.txt
+	rm -rf requirements.txt
+	bash scripts/install_r_deps.sh
+
+#########################################
+# SUBMODULE BUILD TARGETS
 #########################################
 
 update_submodule:
@@ -129,196 +89,146 @@ cpp:
 py: apply-sklearn_simple build/VERSION.txt
 	$(MAKE) -j all -C src/interface_py
 
-pylint:
-	$(MAKE) pylint -C src/interface_py
+.PHONY: xgboost
+xgboost:
+	@echo "----- Building XGboost target $(XGBOOST_TARGET) -----"
+	cd xgboost ; make -f Makefile2 $(XGBOOST_TARGET)
 
-fullpy: apply-sklearn_simple pylint
+fullinstall-xgboost: xgboost install_xgboost
 
-pyinstall:
-	$(MAKE) -j install -C src/interface_py
+#########################################
+# SOURCE QUALITY CHECK TARGETS
+#########################################
 
+pylint:
+	$(MAKE) pylint -C src/interface_py
 
-##############################################
+#########################################
+# PROJECT BUILD TARGETS
+#########################################
 
-alldeps-nccl-cuda8: deps_fetch alldeps_install-nccl-cuda8
-alldeps-nonccl-cuda8: deps_fetch alldeps_install-nonccl-cuda8
-alldeps-nccl-cuda9: deps_fetch alldeps_install-nccl-cuda9
-alldeps-nonccl-cuda9: deps_fetch alldeps_install-nonccl-cuda9
-alldeps-cpuonly: deps_fetch alldeps_install-cpuonly
+build: update_submodule build_quick
 
-clean: cleanbuild deps_clean xgboost_clean py3nvml_clean
-	-rm -rf ./build
-	-rm -rf ./results/ ./tmp/
+build_quick: cpp py
 
-cleanbuild: cleanpy
+build_py: update_submodule clean_py py # avoid cpp
 
-cleanpy:
-	$(MAKE) -j clean -C src/interface_py
+#########################################
+# INSTALL TARGETS
+#########################################
 
-xgboost_clean:
-	-pip uninstall -y xgboost
-	rm -rf xgboost/build/
+install_xgboost:
+	@echo "----- pip install xgboost built locally -----"
+	cd xgboost/python-package/dist && $(PYTHON) -m pip install xgboost-0.71-py3-none-any.whl --target ../
 
-buildquick: cpp py
-build: update_submodule buildquick
-buildnocpp: update_submodule cleanpy py # avoid cpp
+install_py:
+	$(MAKE) -j install -C src/interface_py
 
-install: pyinstall
+install: install_py
 
-### for direct building of xgboost
-# https://xgboost.readthedocs.io/en/latest/build.html
-libxgboost-nccl-local:
-	cd xgboost ; make -f Makefile2 libxgboost
-libxgboost-nonccl-local:
-	cd xgboost ; make -f Makefile2 libxgboost2
-libxgboost-cpu-local:
-	cd xgboost ; make -f Makefile2 libxgboost-cpu
+#########################################
+# CLEANING TARGETS
+#########################################
 
-apply-xgboost-nccl-local: libxgboost-nccl-local pipxgboost
-apply-xgboost-nonccl-local: libxgboost-nonccl-local pipxgboost
-apply-xgboost-cpu-local: libxgboost-cpu-local pipxgboost
+clean: clean_py3nvml clean_xgboost clean_deps clean_py  clean_cpp
+	-rm -rf ./build
+	-rm -rf ./results/ ./tmp/
 
-pipxgboost:
-	@echo "----- pip install xgboost built locally -----"
-	cd xgboost/python-package/dist && pip install *.whl --upgrade --target ../
+clean_cpp:
+	rm -rf src/interface_c/_ch2o4gpu_*pu.so
+	rm -rf src/interface_py/h2o4gpu/libs/ch2o4gpu_*pu.py
 
-alldeps-nccl-local: deps_fetch alldeps-install-nccl-local
-alldeps-nonccl-local: deps_fetch alldeps-install-nonccl-local
-alldeps-cpu-local: deps_fetch alldeps-install-cpu-local
+clean_py:
+	$(MAKE) -j clean -C src/interface_py
+
+clean_xgboost:
+	-$(PYTHON) -m pip uninstall -y xgboost
+	rm -rf xgboost/build/
 
-# lib for sklearn because don't want to fully apply yet
-alldeps-install-nccl-local: deps_install apply-xgboost-nccl-local apply_py3nvml libsklearn
-alldeps-install-nonccl-local: deps_install apply-xgboost-nonccl-local apply_py3nvml libsklearn
-alldeps-install-cpu-local: deps_install apply-xgboost-cpu-local apply_py3nvml libsklearn
-alldeps_install-cpuonly: deps_install apply-xgboost-cpu-local apply_py3nvml libsklearn install_daal_x86_64
+clean_py3nvml:
+	-$(PYTHON) -m pip uninstall -y py3nvml
 
-##### dependencies
-deps_clean:
-	@echo "----- Cleaning deps -----"
+clean_deps:
+	@echo "----- Cleaning dependencies -----"
 	rm -rf "$(DEPS_DIR)"
 	# sometimes --upgrade leaves extra packages around
-	cat requirements_buildonly.txt requirements_runtime.txt requirements_runtime_demos.txt > requirements.txt
+	cat src/interface_py/requirements_buildonly.txt src/interface_py/requirements_runtime.txt src/interface_py/requirements_runtime_demos.txt > requirements.txt
 	sed 's/==.*//g' requirements.txt|grep -v "#" > requirements_plain.txt
-	-xargs -a requirements_plain.txt -n 1 -P $(NUMPROCS) pip uninstall -y
+	-xargs -a requirements_plain.txt -n 1 -P $(NUMPROCS) $(PYTHON) -m pip uninstall -y
 	rm -rf requirements_plain.txt requirements.txt
 
-deps_fetch:
-	@echo "---- Fetch dependencies ---- "
-	bash scripts/gitshallow_submodules.sh
-	git submodule update
-
-deps_install:
-	@echo "---- Install dependencies ----"
-	#-xargs -a requirements.txt -n 1 -P 1 pip install --upgrade
-	easy_install pip
-	easy_install setuptools
-	cat requirements_buildonly.txt requirements_runtime.txt > requirements.txt
-	pip install -r requirements.txt --upgrade
-	rm -rf requirements.txt
-	bash scripts/install_r_deps.sh
-	# issue with their package, have to do this here (still fails sometimes, so remove)
-#	pip install sphinxcontrib-osexample
-
-# lib for sklearn because don't want to fully apply yet
-alldeps_install-nccl-cuda8: deps_install apply-xgboost-nccl-cuda8 apply_py3nvml libsklearn
-alldeps_install-nonccl-cuda8: deps_install apply-xgboost-nonccl-cuda8 apply_py3nvml libsklearn
-alldeps_install-nccl-cuda9: deps_install apply-xgboost-nccl-cuda9 apply_py3nvml libsklearn
-alldeps_install-nonccl-cuda9: deps_install apply-xgboost-nonccl-cuda9 apply_py3nvml libsklearn
-
-fullinstall: fullinstall-nccl-cuda9
-fullinstalllocal: fullinstall-nccl-local
-cpu-fullinstall: fullinstall-cpuonly
-
-fullinstall-nccl-local: clean alldeps-nccl-local build install
-	mkdir -p src/interface_py/dist-nccl-local/ && mv src/interface_py/dist/*.whl src/interface_py/dist-nccl-local/
-fullinstall-nonccl-local: clean alldeps-nonccl-local build install
-	mkdir -p src/interface_py/dist-nonccl-local/ && mv src/interface_py/dist/*.whl src/interface_py/dist-nonccl-local/
-
-fullinstall-nccl-cuda8: clean alldeps-nccl-cuda8 build install
-	mkdir -p src/interface_py/dist1/ && mv src/interface_py/dist/*.whl src/interface_py/dist1/
+#########################################
+# FULL BUILD AND INSTALL TARGETS
+#########################################
 
-fullinstall-nonccl-cuda8: clean alldeps-nonccl-cuda8 build install
-	mkdir -p src/interface_py/dist2/ && mv src/interface_py/dist/*.whl src/interface_py/dist2/
+fullinstall: clean alldeps build install
+	mkdir -p src/interface_py/$(DIST_DIR)/$(PLATFORM)/ && mv src/interface_py/dist/*.whl src/interface_py/$(DIST_DIR)/$(PLATFORM)/
 
-fullinstall-nccl-cuda9: clean alldeps-nccl-cuda9 build install
-	mkdir -p src/interface_py/dist4/ && mv src/interface_py/dist/*.whl src/interface_py/dist4/
+buildinstall: alldeps build install
+	mkdir -p src/interface_py/$(DIST_DIR)/$(PLATFORM)/ && mv src/interface_py/dist/*.whl src/interface_py/$(DIST_DIR)/$(PLATFORM)/
 
-fullinstall-nonccl-cuda9: clean alldeps-nonccl-cuda9 build install
-	mkdir -p src/interface_py/dist3/ && mv src/interface_py/dist/*.whl src/interface_py/dist3/
+#########################################
+# DOCKER TARGETS
+#########################################
 
-fullinstall-cpuonly: clean alldeps-cpuonly build install
-	mkdir -p src/interface_py/dist-cpuonly-local/ && mv src/interface_py/dist/*.whl src/interface_py/dist-cpuonly-local/
+DOCKER_CUDA_VERSION?=9.0
 
-####################################################
-# Docker stuff
+ifeq (${DOCKER_CUDA_VERSION},8.0)
+    DOCKER_CUDNN_VERSION?=5
+else
+    DOCKER_CUDNN_VERSION?=7
+endif
 
-# default for docker is nccl-cuda9
-docker-build: docker-build-nccl-cuda9
-docker-runtime: docker-runtime-nccl-cuda9
-docker-runtests: docker-runtests-nccl-cuda9
-get_docker: get_docker-nccl-cuda9
-load_docker: docker-runtime-nccl-cuda9-load
-run_in_docker: run_in_docker-nccl-cuda9
+centos7_cuda80_in_docker:
+	$(MAKE) DOCKER_CUDA_VERSION=8.0 docker-build
 
+centos7_cuda90_in_docker:
+	$(MAKE) DOCKER_CUDA_VERSION=9.0 docker-build
 
-############### CUDA9
+centos7_cuda92_in_docker:
+	$(MAKE) DOCKER_CUDA_VERSION=9.2 docker-build
 
-docker-build-nccl-cuda9:
-	@echo "+-- Building Wheel in Docker (-nccl-cuda9) --+"
-	rm -rf src/interface_py/dist/*.whl ; rm -rf src/interface_py/dist4/*.whl
-	export CONTAINER_NAME="localmake-build" ;\
+docker-build:
+	@echo "+-- Building Wheel in Docker --+"
+	export CONTAINER_NAME="local-make-build-cuda$(DOCKER_CUDA_VERSION)" ;\
 	export versionTag=$(BASE_VERSION) ;\
-	export extratag="-nccl-cuda9" ;\
-	export dockerimage="nvidia/cuda:9.0-cudnn7-devel-ubuntu16.04" ;\
-	export H2O4GPU_BUILD="" ;\
-	export H2O4GPU_SUFFIX="" ;\
-	export makeopts="" ;\
-	export dist="dist4" ;\
+	export extratag="-cuda$(DOCKER_CUDA_VERSION)" ;\
+	export dockerimage="nvidia/cuda${DOCKER_ARCH}:$(DOCKER_CUDA_VERSION)-cudnn$(DOCKER_CUDNN_VERSION)-devel-centos7" ;\
 	bash scripts/make-docker-devel.sh
 
-docker-runtime-nccl-cuda9:
+docker-runtime:
 	@echo "+--Building Runtime Docker Image Part 2 (-nccl-cuda9) --+"
-	export CONTAINER_NAME="localmake-runtime" ;\
+	export CONTAINER_NAME="local-make-runtime-cuda$(DOCKER_CUDA_VERSION)" ;\
 	export versionTag=$(BASE_VERSION) ;\
-	export extratag="-nccl-cuda9" ;\
-	export encodedFullVersionTag=$(BASE_VERSION) ;\
+	export extratag="-cuda$(DOCKER_CUDA_VERSION)" ;\
 	export fullVersionTag=$(BASE_VERSION) ;\
-	export buckettype="releases/bleeding-edge" ;\
-	export dockerimage="nvidia/cuda:9.0-cudnn7-runtime-ubuntu16.04" ;\
+	export dockerimage="nvidia/cuda${DOCKER_ARCH}:$(DOCKER_CUDA_VERSION)-cudnn$(DOCKER_CUDNN_VERSION)-runtime-centos7" ;\
 	bash scripts/make-docker-runtime.sh
 
-.PHONY: docker-runtime-nccl-cuda9-run
-
-docker-runtime-nccl-cuda9-run:
-	@echo "+-Running Docker Runtime Image (-nccl-cuda9) --+"
-	export CONTAINER_NAME="localmake-runtime-run" ;\
-	export versionTag=$(BASE_VERSION) ;\
-	export extratag="-nccl-cuda9" ;\
-	export encodedFullVersionTag=$(BASE_VERSION) ;\
+docker-runtime-run:
+	@echo "+-Running Docker Runtime Image (-cuda9) --+"
 	export fullVersionTag=$(BASE_VERSION) ;\
-	export buckettype="releases/bleeding-edge" ;\
-	export dockerimage="nvidia/cuda:9.0-cudnn7-devel-ubuntu16.04" ;\
-	nvidia-docker run --init --rm --name $${CONTAINER_NAME} -d -t -u `id -u`:`id -g` --entrypoint=bash opsh2oai/h2o4gpu-$${versionTag}$${extratag}-runtime:latest
+	nvidia-docker run --init --rm --name "localmake-runtime-run" -d -t -u `id -u`:`id -g` --entrypoint=bash opsh2oai/h2o4gpu-$$(BASE_VERSION)-cuda$(DOCKER_CUDA_VERSION)-runtime:latest
 
-docker-runtests-nccl-cuda9:
+docker-runtests:
 	@echo "+-- Run tests in docker (-nccl-cuda9) --+"
 	export CONTAINER_NAME="localmake-runtests" ;\
-	export extratag="-nccl-cuda9" ;\
-	export dockerimage="nvidia/cuda:9.0-cudnn7-devel-ubuntu16.04" ;\
-	export dist="dist4" ;\
+	export extratag="-cuda$(DOCKER_CUDA_VERSION)" ;\
+	export dockerimage="nvidia/cuda${DOCKER_ARCH}:$(DOCKER_CUDA_VERSION)-cudnn$(DOCKER_CUDNN_VERSION)-devel-centos7" ;\
 	export target="dotest" ;\
 	bash scripts/make-docker-runtests.sh
 
-get_docker-nccl-cuda9:
-	wget https://s3.amazonaws.com/h2o-release/h2o4gpu/releases/bleeding-edge/ai/h2o/h2o4gpu/$(MAJOR_MINOR)-nccl-cuda9/h2o4gpu-$(BASE_VERSION)-nccl-cuda9-runtime.tar.bz2
+get_docker:
+	wget https://s3.amazonaws.com/h2o-release/h2o4gpu/releases/bleeding-edge/ai/h2o/h2o4gpu/$(MAJOR_MINOR)-cuda$(DOCKER_CUDA_VERSION)/h2o4gpu-$(BASE_VERSION)-cuda$(DOCKER_CUDA_VERSION)-runtime.tar.bz2
 
-docker-runtime-nccl-cuda9-load:
-	pbzip2 -dc h2o4gpu-$(BASE_VERSION)-nccl-cuda9-runtime.tar.bz2 | nvidia-docker load
+docker-runtime-load:
+	pbzip2 -dc h2o4gpu-$(BASE_VERSION)-cuda$(DOCKER_CUDA_VERSION)-runtime.tar.bz2 | nvidia-docker load
 
-run_in_docker-nccl-cuda9:
-	-mkdir -p log ; nvidia-docker run --name localhost --rm -p 8888:8888 -u `id -u`:`id -g` -v `pwd`/log:/log --entrypoint=./run.sh opsh2oai/h2o4gpu-$(BASE_VERSION)-nccl-cuda9-runtime &
+run_in_docker:
+	-mkdir -p log ; nvidia-docker run --name localhost --rm -p 8888:8888 -u `id -u`:`id -g` -v `pwd`/log:/log --entrypoint=./run.sh opsh2oai/h2o4gpu-$(BASE_VERSION)-cuda$(DOCKER_CUDA_VERSION)-runtime &
 	-find log -name jupyter* -type f -printf '%T@ %p\n' | sort -k1 -n | awk '{print $2}' | tail -1 | xargs cat | grep token | grep http | grep -v NotebookApp
 
+.PHONY: docker-build  docker-runtime docker-runtime-run docker-runtests get-docker docker-runtime-load run-rin-docker
 
 ############### CPU
 docker-build-cpu:
@@ -327,7 +237,7 @@ docker-build-cpu:
 	export CONTAINER_NAME="localmake-build" ;\
 	export versionTag=$(BASE_VERSION) ;\
 	export extratag="-cpu" ;\
-	export dockerimage="ubuntu:16.04" ;\
+	export dockerimage="centos:6" ;\
 	export H2O4GPU_BUILD="" ;\
 	export H2O4GPU_SUFFIX="" ;\
 	export makeopts="" ;\
@@ -342,7 +252,7 @@ docker-runtime-cpu:
 	export encodedFullVersionTag=$(BASE_VERSION) ;\
 	export fullVersionTag=$(BASE_VERSION) ;\
 	export buckettype="releases/bleeding-edge" ;\
-	export dockerimage="ubuntu:16.04" ;\
+	export dockerimage="centos:6" ;\
 	bash scripts/make-docker-runtime.sh
 
 docker-runtime-cpu-run:
@@ -353,14 +263,14 @@ docker-runtime-cpu-run:
 	export encodedFullVersionTag=$(BASE_VERSION) ;\
 	export fullVersionTag=$(BASE_VERSION) ;\
 	export buckettype="releases/bleeding-edge" ;\
-	export dockerimage="ubuntu:16.04" ;\
+	export dockerimage="centos:6" ;\
 	docker run --init --rm --name $${CONTAINER_NAME} -d -t -u `id -u`:`id -g` --entrypoint=bash opsh2oai/h2o4gpu-$${versionTag}$${extratag}-runtime:latest
 
 docker-runtests-cpu:
 	@echo "+-- Run tests in docker (-nccl-cuda9) --+"
 	export CONTAINER_NAME="localmake-runtests" ;\
 	export extratag="-cpu" ;\
-	export dockerimage="ubuntu:16.04" ;\
+	export dockerimage="centos:6" ;\
 	export dist="dist4" ;\
 	export target="dotest" ;\
 	bash scripts/make-docker-runtests.sh
@@ -376,76 +286,9 @@ run_in_docker-cpu:
 	-find log -name jupyter* -type f -printf '%T@ %p\n' | sort -k1 -n | awk '{print $2}' | tail -1 | xargs cat | grep token | grep http | grep -v NotebookApp
 
 
-######### CUDA8 (copy/paste above, and then replace cuda9 -> cuda8 and cuda:9.0-cudnn7 -> cuda:8.0-cudnn5 and dist4->dist1)
-
-docker-build-nccl-cuda8:
-	@echo "+-- Building Wheel in Docker (-nccl-cuda8) --+"
-	rm -rf src/interface_py/dist/*.whl
-	export CONTAINER_NAME="localmake-build" ;\
-	export versionTag=$(BASE_VERSION) ;\
-	export extratag="-nccl-cuda8" ;\
-	export dockerimage="nvidia/cuda:8.0-cudnn5-devel-ubuntu16.04" ;\
-	export H2O4GPU_BUILD="" ;\
-	export H2O4GPU_SUFFIX="" ;\
-	export makeopts="" ;\
-	export dist="dist1" ;\
-	bash scripts/make-docker-devel.sh
-
-docker-runtime-nccl-cuda8:
-	@echo "+--Building Runtime Docker Image Part 2 (-nccl-cuda8) --+"
-	export CONTAINER_NAME="localmake-runtime" ;\
-	export versionTag=$(BASE_VERSION) ;\
-	export extratag="-nccl-cuda8" ;\
-	export encodedFullVersionTag=$(BASE_VERSION) ;\
-	export fullVersionTag=$(BASE_VERSION) ;\
-	export buckettype="releases/bleeding-edge" ;\
-	export dockerimage="nvidia/cuda:8.0-cudnn5-runtime-ubuntu16.04" ;\
-	bash scripts/make-docker-runtime.sh
-
-docker-runtime-nccl-cuda8-load:
-	pbzip2 -dc h2o4gpu-$(BASE_VERSION)-nccl-cuda8-runtime.tar.bz2 | nvidia-docker load
-
-.PHONY: docker-runtime-nccl-cuda8-run
-
-docker-runtime-nccl-cuda8-run:
-	@echo "+-Running Docker Runtime Image (-nccl-cuda8) --+"
-	export CONTAINER_NAME="localmake-runtime-run" ;\
-	export versionTag=$(BASE_VERSION) ;\
-	export extratag="-nccl-cuda8" ;\
-	export encodedFullVersionTag=$(BASE_VERSION) ;\
-	export fullVersionTag=$(BASE_VERSION) ;\
-	export buckettype="releases/bleeding-edge" ;\
-	export dockerimage="nvidia/cuda:8.0-cudnn5-devel-ubuntu16.04" ;\
-	nvidia-docker run --init --rm --name $${CONTAINER_NAME} -d -t -u `id -u`:`id -g` --entrypoint=bash opsh2oai/h2o4gpu-$${versionTag}$${extratag}-runtime:latest
-
-docker-runtests-nccl-cuda8:
-	@echo "+-- Run tests in docker (-nccl-cuda8) --+"
-	export CONTAINER_NAME="localmake-runtests" ;\
-	export extratag="-nccl-cuda8" ;\
-	export dockerimage="nvidia/cuda:8.0-cudnn5-devel-ubuntu16.04" ;\
-	export dist="dist1" ;\
-	export target="dotest" ;\
-	bash scripts/make-docker-runtests.sh
-
-get_docker-nccl-cuda8:
-	wget https://s3.amazonaws.com/h2o-release/h2o4gpu/releases/bleeding-edge/ai/h2o/h2o4gpu/$(MAJOR_MINOR)-nccl-cuda8/h2o4gpu-$(BASE_VERSION)-nccl-cuda8-runtime.tar.bz2
-
-run_in_docker-nccl-cuda8:
-	mkdir -p log ; nvidia-docker run --name localhost --rm -p 8888:8888 -u `id -u`:`id -g` -v `pwd`/log:/log --entrypoint=./run.sh opsh2oai/h2o4gpu-$(BASE_VERSION)-nccl-cuda8-runtime &
-	find log -name jupyter* | xargs cat | grep token | grep http | grep -v NotebookApp
-
-# uses https://github.com/Azure/fast_retraining
-testxgboost: # liblightgbm (assumes one installs lightgdm yourself or run make liblightgbm)
-	bash testsxgboost/runtestxgboost.sh
-	bash testsxgboost/extracttestxgboost.sh
-	bash tests_open/showresults.sh # same for all tests
-
-# install daal
-install_daal_x86_64:
-	@echo "----- Install Daal Python library -----"
-	bash scripts/daal/install_daal_locally.sh
-
-################
+#########################################
+# TARGETS INSTALLING LIBRARIES
+#########################################
 
 # http://developer2.download.nvidia.com/compute/cuda/9.0/secure/rc/docs/sidebar/CUDA_Quick_Start_Guide.pdf?_ZyOB0PlGZzBUluXp3FtoWC-LMsTsc5H6SxIaU0i9pGNyWzZCgE-mhnAg2m66Nc3WMDvxWvvQWsXGMqr1hUliGOZvoothMTVnDe12dQQgxwS4Asjoz8XiOvPYOjV6yVQtkFhvDztUlJbNSD4srPWUU2-XegCRFII8_FIpxXERaWV
 libcuda9:
@@ -467,42 +310,16 @@ libnccl2:
 	sudo apt-key add /var/nccl-repo-2.0.5-ga-cuda9.0/7fa2af80.pub
 	sudo apt install libnccl2 libnccl-dev
 
-apply-xgboost-nccl-cuda8: apply-xgboost-nccl-local    #pipxgboost-nccl-cuda8
-apply-xgboost-nonccl-cuda8: apply-xgboost-nonccl-local #pipxgboost-nonccl-cuda8
-apply-xgboost-nccl-cuda9: apply-xgboost-nccl-local #pipxgboost-nccl-cuda9
-apply-xgboost-nonccl-cuda9: apply-xgboost-nonccl-local #pipxgboost-nonccl-cuda9
-
-pipxgboost-nccl-cuda8: pipxgboost
-	@echo "----- pip install xgboost-nccl-cuda8 from S3 -----"
-	mkdir -p xgboost/python-package/dist ; cd xgboost/python-package/dist && pip install https://s3.amazonaws.com/artifacts.h2o.ai/releases/bleeding-edge/ai/h2o/xgboost/0.7-nccl-cuda8/xgboost-0.71-py3-none-any.whl --upgrade --target ../
-pipxgboost-nonccl-cuda8: pipxgboost
-	@echo "----- pip install xgboost-nonccl-cuda8 from S3 -----"
-	mkdir -p xgboost/python-package/dist ; cd xgboost/python-package/dist && pip install https://s3.amazonaws.com/artifacts.h2o.ai/releases/bleeding-edge/ai/h2o/xgboost/0.7-nonccl-cuda8/xgboost-0.71-py3-none-any.whl --upgrade --target ../
-pipxgboost-nccl-cuda9: pipxgboost
-	@echo "----- pip install xgboost-nccl-cuda9 from S3 -----"
-	mkdir -p xgboost/python-package/dist ; cd xgboost/python-package/dist && pip install https://s3.amazonaws.com/artifacts.h2o.ai/releases/bleeding-edge/ai/h2o/xgboost/0.7-nccl-cuda9/xgboost-0.71-py3-none-any.whl --upgrade --target ../
-pipxgboost-nonccl-cuda9: pipxgboost
-	@echo "----- pip install xgboost-nonccl-cuda9 from S3 -----"
-	mkdir -p xgboost/python-package/dist ; cd xgboost/python-package/dist && pip install https://s3.amazonaws.com/artifacts.h2o.ai/releases/bleeding-edge/ai/h2o/xgboost/0.7-nonccl-cuda9/xgboost-0.71-py3-none-any.whl --upgrade --target ../
-
-
-py3nvml_clean:
-	-pip uninstall -y py3nvml
-
-apply_py3nvml:
-	mkdir -p py3nvml ; cd py3nvml # ; pip install -e git+https://github.com/fbcotter/py3nvml#egg=py3nvml --upgrade --root=.
-
-
 liblightgbm: # only done if user directly requests, never an explicit dependency
 	echo "See https://github.com/Microsoft/LightGBM/wiki/Installation-Guide#with-gpu-support for details"
 	echo "sudo apt-get install libboost-dev libboost-system-dev libboost-filesystem-dev cmake"
 	rm -rf LightGBM ; result=`git clone --recursive https://github.com/Microsoft/LightGBM`
-	cd LightGBM && mkdir build ; cd build && cmake .. -DUSE_GPU=1 -DOpenCL_LIBRARY=$(CUDA_HOME)/lib64/libOpenCL.so -DOpenCL_INCLUDE_DIR=$(CUDA_HOME)/include/ && make -j && cd ../python-package ; python setup.py install --precompile --gpu && cd ../ && pip install arff tqdm keras runipy h5py --upgrade
+	cd LightGBM && mkdir build ; cd build && cmake .. -DUSE_GPU=1 -DOpenCL_LIBRARY=$(CUDA_HOME)/lib64/libOpenCL.so -DOpenCL_INCLUDE_DIR=$(CUDA_HOME)/include/ && make -j && cd ../python-package ; $(PYTHON) setup.py install --precompile --gpu && cd ../ && $(PYTHON) -m pip install arff tqdm keras runipy h5py
 
 libsklearn:	# assume already submodule gets sklearn
 	@echo "----- Make sklearn wheel -----"
 	bash scripts/prepare_sklearn.sh # repeated calls don't hurt
-	rm -rf sklearn && mkdir -p sklearn && cd scikit-learn && python setup.py sdist bdist_wheel
+	rm -rf sklearn && mkdir -p sklearn && cd scikit-learn && $(PYTHON) setup.py sdist bdist_wheel
 
 apply-sklearn: libsklearn apply-sklearn_simple
 
@@ -524,36 +341,14 @@ apply-sklearn_link:
 apply-sklearn_initmerge:
 	bash ./scripts/apply_sklearn_initmerge.sh
 
-#################### Jenkins specific
-
-######### h2o.ai systems
-# for nccl cuda8 build
-fullinstalljenkins-nccl-cuda8: mrproper fullinstall-nccl-cuda8
-fullinstalljenkins-nonccl-cuda8: mrproper fullinstall-nonccl-cuda8
-fullinstalljenkins-nccl-cuda9: mrproper fullinstall-nccl-cuda9
-fullinstalljenkins-nonccl-cuda9: mrproper fullinstall-nonccl-cuda9
-fullinstalljenkins-cpu: mrproper fullinstall-cpu-local
-
-# for nccl cuda9 build benchmark
-fullinstalljenkins-nccl-cuda9-benchmark: mrproper clean alldeps-nccl-cuda9 build install
-	mkdir -p src/interface_py/dist6/ && mv src/interface_py/dist/*.whl src/interface_py/dist6/
-
-########## AWS
-# for nccl cuda9 build aws build/test
-fullinstalljenkins-nccl-cuda9-aws1: mrproper clean alldeps-nccl-cuda9 build install
-	mkdir -p src/interface_py/dist5/ && mv src/interface_py/dist/*.whl src/interface_py/dist5/
-
-# for nccl cuda9 build benchmark on aws1
-fullinstalljenkins-nccl-cuda9-aws1-benchmark: mrproper clean alldeps-nccl-cuda9 build install
-	mkdir -p src/interface_py/dist7/ && mv src/interface_py/dist/*.whl src/interface_py/dist7/
-
 .PHONY: mrproper
 mrproper: clean
 	@echo "----- Cleaning properly -----"
 	git clean -f -d -x
 
-
-##################
+#########################################
+# TEST TARGETS
+#########################################
 
 #WIP
 dotestdemos:
@@ -562,7 +357,7 @@ dotestdemos:
 	bash scripts/convert_ipynb2py.sh
     # can't do -n auto due to limits on GPU memory
 	#pytest -s --verbose --durations=10 -n 1 --fulltrace --full-trace --junit-xml=build/test-reports/h2o4gpu-test.xml examples/py 2> ./tmp/h2o4gpu-examplespy.$(LOGEXT).log
-	-pip install pytest-ipynb # can't put in requirements since problem with jenkins and runipy
+	-$(PYTHON) -m pip install pytest-ipynb # can't put in requirements since problem with jenkins and runipy
 	py.test -v -s examples/py 2> ./tmp/h2o4gpu-examplespy.$(LOGEXT).log
 
 
@@ -570,7 +365,7 @@ dotest:
 	rm -rf ./tmp/
 	mkdir -p ./tmp/
   # can't do -n auto due to limits on GPU memory
-	pytest -s --verbose --durations=10 -n 3 --fulltrace --full-trace --junit-xml=build/test-reports/h2o4gpu-test.xml tests_open 2> ./tmp/h2o4gpu-test.$(LOGEXT).log
+	pytest -s --verbose --durations=10 -n 3 --fulltrace --full-trace --junit-xml=build/test-reports/h2o4gpu-test.xml tests/python/open_data 2> ./tmp/h2o4gpu-test.$(LOGEXT).log
 	# Test R package when appropriate
 	bash scripts/test_r_pkg.sh
 
@@ -578,86 +373,88 @@ dotestfast:
 	rm -rf ./tmp/
 	mkdir -p ./tmp/
     # can't do -n auto due to limits on GPU memory
-	pytest -s --verbose --durations=10 -n 3 --fulltrace --full-trace --junit-xml=build/test-reports/h2o4gpu-testfast1.xml tests_open/glm/test_glm_simple.py 2> ./tmp/h2o4gpu-testfast1.$(LOGEXT).log
-	pytest -s --verbose --durations=10 -n 3 --fulltrace --full-trace --junit-xml=build/test-reports/h2o4gpu-testfast2.xml tests_open/gbm/test_xgb_sklearn_wrapper.py 2> ./tmp/h2o4gpu-testfast2.$(LOGEXT).log
-	pytest -s --verbose --durations=10 -n 3 --fulltrace --full-trace --junit-xml=build/test-reports/h2o4gpu-testfast3.xml tests_open/svd/test_tsvd.py 2> ./tmp/h2o4gpu-testfast3.$(LOGEXT).log
-	pytest -s --verbose --durations=10 -n 3 --fulltrace --full-trace --junit-xml=build/test-reports/h2o4gpu-testfast4.xml tests_open/kmeans/test_kmeans.py 2> ./tmp/h2o4gpu-testfast4.$(LOGEXT).log
+	pytest -s --verbose --durations=10 -n 3 --fulltrace --full-trace --junit-xml=build/test-reports/h2o4gpu-testfast1.xml tests/python/open_data/glm/test_glm_simple.py 2> ./tmp/h2o4gpu-testfast1.$(LOGEXT).log
+	pytest -s --verbose --durations=10 -n 3 --fulltrace --full-trace --junit-xml=build/test-reports/h2o4gpu-testfast2.xml tests/python/open_data/gbm/test_xgb_sklearn_wrapper.py 2> ./tmp/h2o4gpu-testfast2.$(LOGEXT).log
+	pytest -s --verbose --durations=10 -n 3 --fulltrace --full-trace --junit-xml=build/test-reports/h2o4gpu-testfast3.xml tests/python/open_data/svd/test_tsvd.py 2> ./tmp/h2o4gpu-testfast3.$(LOGEXT).log
+	pytest -s --verbose --durations=10 -n 3 --fulltrace --full-trace --junit-xml=build/test-reports/h2o4gpu-testfast4.xml tests/python/open_data/kmeans/test_kmeans.py 2> ./tmp/h2o4gpu-testfast4.$(LOGEXT).log
 
 dotestfast_nonccl:
 	rm -rf ./tmp/
 	mkdir -p ./tmp/
 	# can't do -n auto due to limits on GPU memory
-	pytest -s --verbose --durations=10 -n 3 --fulltrace --full-trace --junit-xml=build/test-reports/h2o4gpu-testfast1.xml tests_open/glm/test_glm_simple.py 2> ./tmp/h2o4gpu-testfast1.$(LOGEXT).log
-	pytest -s --verbose --durations=10 -n 3 --fulltrace --full-trace --junit-xml=build/test-reports/h2o4gpu-testfast3.xml tests_open/svd/test_tsvd.py 2> ./tmp/h2o4gpu-testfast3.$(LOGEXT).log
-	pytest -s --verbose --durations=10 -n 3 --fulltrace --full-trace --junit-xml=build/test-reports/h2o4gpu-testfast4.xml tests_open/kmeans/test_kmeans.py 2> ./tmp/h2o4gpu-testfast4.$(LOGEXT).log
+	pytest -s --verbose --durations=10 -n 3 --fulltrace --full-trace --junit-xml=build/test-reports/h2o4gpu-testfast1.xml tests/python/open_data/glm/test_glm_simple.py 2> ./tmp/h2o4gpu-testfast1.$(LOGEXT).log
+	pytest -s --verbose --durations=10 -n 3 --fulltrace --full-trace --junit-xml=build/test-reports/h2o4gpu-testfast3.xml tests/python/open_data/svd/test_tsvd.py 2> ./tmp/h2o4gpu-testfast3.$(LOGEXT).log
+	pytest -s --verbose --durations=10 -n 3 --fulltrace --full-trace --junit-xml=build/test-reports/h2o4gpu-testfast4.xml tests/python/open_data/kmeans/test_kmeans.py 2> ./tmp/h2o4gpu-testfast4.$(LOGEXT).log
 
 dotestsmall:
 	rm -rf ./tmp/
 	rm -rf build/test-reports 2>/dev/null
 	mkdir -p ./tmp/
     # can't do -n auto due to limits on GPU memory
-	pytest -s --verbose --durations=10 -n 3 --fulltrace --full-trace --junit-xml=build/test-reports/h2o4gpu-testsmall.xml tests_small 2> ./tmp/h2o4gpu-testsmall.$(LOGEXT).log
+	pytest -s --verbose --durations=10 -n 3 --fulltrace --full-trace --junit-xml=build/test-reports/h2o4gpu-testsmall.xml tests/python/small 2> ./tmp/h2o4gpu-testsmall.$(LOGEXT).log
 
 dotestbig:
 	mkdir -p ./tmp/
-	pytest -s --verbose --durations=10 -n 1 --fulltrace --full-trace --junit-xml=build/test-reports/h2o4gpu-testbig.xml tests_big 2> ./tmp/h2o4gpu-testbig.$(LOGEXT).log
+	pytest -s --verbose --durations=10 -n 1 --fulltrace --full-trace --junit-xml=build/test-reports/h2o4gpu-testbig.xml tests/python/big 2> ./tmp/h2o4gpu-testbig.$(LOGEXT).log
 
-#####################
+#########################################
+# BENCHMARKING TARGETS
+#########################################
 
 dotestperf:
 	mkdir -p ./tmp/
-	-CHECKPERFORMANCE=1 DISABLEPYTEST=1 pytest -s --verbose --durations=10 -n 1 --fulltrace --full-trace --junit-xml=build/test-reports/h2o4gpu-test.xml tests_open 2> ./tmp/h2o4gpu-testperf.$(LOGEXT).log
-	bash tests_open/showresults.sh &> ./tmp/h2o4gpu-testperf-results.$(LOGEXT).log
+	-CHECKPERFORMANCE=1 DISABLEPYTEST=1 pytest -s --verbose --durations=10 -n 1 --fulltrace --full-trace --junit-xml=build/test-reports/h2o4gpu-test.xml tests/python/open_data 2> ./tmp/h2o4gpu-testperf.$(LOGEXT).log
+	bash tests/python/open_data/showresults.sh &> ./tmp/h2o4gpu-testperf-results.$(LOGEXT).log
 
 dotestsmallperf:
 	mkdir -p ./tmp/
-	-CHECKPERFORMANCE=1 DISABLEPYTEST=1 pytest -s --verbose --durations=10 -n 1 --fulltrace --full-trace --junit-xml=build/test-reports/h2o4gpu-testsmallperf.xml tests_small 2> ./tmp/h2o4gpu-testsmallperf.$(LOGEXT).log
-	bash tests_open/showresults.sh &> ./tmp/h2o4gpu-testsmallperf-results.$(LOGEXT).log
+	-CHECKPERFORMANCE=1 DISABLEPYTEST=1 pytest -s --verbose --durations=10 -n 1 --fulltrace --full-trace --junit-xml=build/test-reports/h2o4gpu-testsmallperf.xml tests/python/small 2> ./tmp/h2o4gpu-testsmallperf.$(LOGEXT).log
+	bash tests/python/open_data/showresults.sh &> ./tmp/h2o4gpu-testsmallperf-results.$(LOGEXT).log
 
 dotestbigperf:
 	mkdir -p ./tmp/
-	-CHECKPERFORMANCE=1 DISABLEPYTEST=1 pytest -s --verbose --durations=10 -n 1 --fulltrace --full-trace --junit-xml=build/test-reports/h2o4gpu-testbigperf.xml tests_big 2> ./tmp/h2o4gpu-testbigperf.$(LOGEXT).log
-	bash tests_open/showresults.sh  &> ./tmp/h2o4gpu-testbigperf-results.$(LOGEXT).log # still just references results directory in base path
+	-CHECKPERFORMANCE=1 DISABLEPYTEST=1 pytest -s --verbose --durations=10 -n 1 --fulltrace --full-trace --junit-xml=build/test-reports/h2o4gpu-testbigperf.xml tests/python/big 2> ./tmp/h2o4gpu-testbigperf.$(LOGEXT).log
+	bash tests/python/open_data/showresults.sh  &> ./tmp/h2o4gpu-testbigperf-results.$(LOGEXT).log # still just references results directory in base path
 
 ######################### use python instead of pytest (required in some cases if pytest leads to hang)
 
 dotestperfpython:
 	mkdir -p ./tmp/
-	-bash tests_open/getresults.sh $(LOGEXT)
-	bash tests_open/showresults.sh
+	-bash tests/python/open_data/getresults.sh $(LOGEXT)
+	bash tests/python/open_data/showresults.sh
 
 dotestbigperfpython:
 	mkdir -p ./tmp/
 	-bash testsbig/getresultsbig.sh $(LOGEXT)
-	bash tests_open/showresults.sh # still just references results directory in base path
+	bash tests/python/open_data/showresults.sh # still just references results directory in base path
 
 ################### H2O.ai public tests for pass/fail
 
 testdemos: dotestdemos
 
-test: buildquick dotest
+test: build_quick dotest
 
 testquick: dotest
 
 ################ H2O.ai public tests for performance
 
-testperf: buildquick dotestperf # faster if also run sync_open_data before doing this test
+testperf: build_quick dotestperf # faster if also run sync_open_data before doing this test
 
 ################### H2O.ai private tests for pass/fail
 
-testsmall: buildquick sync_open_data sync_other_data dotestsmall
+testsmall: build_quick sync_open_data sync_other_data dotestsmall
 
 testsmallquick: dotestsmall
 
-testbig: buildquick sync_open_data sync_other_data dotestbig
+testbig: build_quick sync_open_data sync_other_data dotestbig
 
 testbigquick: dotestbig
 
 ################ H2O.ai private tests for performance
 
-testsmallperf: buildquick sync_open_data sync_other_data dotestsmallperf
+testsmallperf: build_quick sync_open_data sync_other_data dotestsmallperf
 
-testbigperf: buildquick sync_open_data sync_other_data dotestbigperf
+testbigperf: build_quick sync_open_data sync_other_data dotestbigperf
 
 testsmallperfquick: dotestsmallperf
 
@@ -671,7 +468,9 @@ test_cpp:
 clean_test_cpp:
 	$(MAKE) -j clean_cpp_tests -C src/
 
-#################### Build info
+#########################################
+# BUILD INFO TARGETS
+#########################################
 
 # Generate local build info
 src/interface_py/h2o4gpu/BUILD_INFO.txt:
@@ -689,7 +488,7 @@ src/interface_py/h2o4gpu/BUILD_INFO.txt:
 
 build/VERSION.txt: src/interface_py/h2o4gpu/BUILD_INFO.txt
 	@mkdir -p build
-	cd src/interface_py/; python setup.py --version > ../../build/VERSION.txt
+	cd src/interface_py/; $(PYTHON) setup.py --version > ../../build/VERSION.txt
 
 .PHONY: base_version
 base_version:
@@ -703,137 +502,5 @@ endif
 Jenkinsfiles:
 	bash scripts/make_jenkinsfiles.sh
 
-#----------------------------------------------------------------------
-# CentOS 7 build API BEGIN
-#
-# Summary
-#
-#     command:  make centos7_cuda8_in_docker
-#     output:   dist/x86_64-centos7-cuda8/h2o4gpu*.whl
-#
-#     command:  make centos7_cuda9_in_docker
-#     output:   dist/x86_64-centos7-cuda9/h2o4gpu*.whl
-#
-# Details
-#
-#     This is only supported in a docker environment.
-#
-#     The 'centos7' make target does the actual work.
-#
-#     The 'centos7_cudaN_in_docker' make target sets up the docker environment
-#     and then invokes the work inside that environment.
-#
-#     The build output is put in the 'dist' directory in h2o4gpu level.
-#----------------------------------------------------------------------
-
-DIST_DIR = dist
-
-ARCH := $(shell arch)
-PLATFORM = $(ARCH)-centos7-cuda$(MY_CUDA_VERSION)
-
-CONTAINER_NAME_SUFFIX ?= -$(USER)
-CONTAINER_NAME ?= opsh2oai/dai-h2o4gpu$(CONTAINER_NAME_SUFFIX)
-
-PROJECT_VERSION := $(BASE_VERSION)
-BRANCH_NAME ?= $(shell git rev-parse --abbrev-ref HEAD)
-BRANCH_NAME_SUFFIX = +$(BRANCH_NAME)
-BUILD_NUM ?= local
-BUILD_NUM_SUFFIX = .$(BUILD_NUM)
-VERSION = $(PROJECT_VERSION)$(BRANCH_NAME_SUFFIX)$(BUILD_NUM_SUFFIX)
-CONTAINER_TAG := $(shell echo $(VERSION) | sed 's/+/-/g')
-
-CONTAINER_NAME_TAG = $(CONTAINER_NAME):$(CONTAINER_TAG)
-
-ARCH_SUBST = undefined
-FROM_SUBST = undefined
-ifeq ($(ARCH),x86_64)
-    FROM_SUBST = nvidia\/cuda:$(MY_CUDA_VERSION)-cudnn$(MY_CUDNN_VERSION)-devel-centos7
-    ARCH_SUBST = $(ARCH)
-endif
-ifeq ($(ARCH),ppc64le)
-    FROM_SUBST = nvidia\/cuda-ppc64le:$(MY_CUDA_VERSION)-cudnn$(MY_CUDNN_VERSION)-devel-centos7
-    ARCH_SUBST = $(ARCH)
-endif
-
-fullinstalljenkins-nonccl-cuda8-centos: mrproper centos7_in_docker
-
-Dockerfile-build-centos7.$(PLATFORM): Dockerfile-build-centos7.in
-	cat $< | sed 's/FROM_SUBST/$(FROM_SUBST)/'g | sed 's/ARCH_SUBST/$(ARCH_SUBST)/g' | sed 's/MY_CUDA_VERSION_SUBST/$(MY_CUDA_VERSION)/g' > $@
-
-centos7_cuda8_in_docker: MY_CUDA_VERSION=8.0
-centos7_cuda8_in_docker: MY_CUDNN_VERSION=5
-centos7_cuda8_in_docker:
-	$(MAKE) MY_CUDA_VERSION=$(MY_CUDA_VERSION) MY_CUDNN_VERSION=$(MY_CUDNN_VERSION) centos7_in_docker_impl
-
-centos7_cuda9_in_docker: MY_CUDA_VERSION=9.0
-centos7_cuda9_in_docker: MY_CUDNN_VERSION=7
-centos7_cuda9_in_docker:
-	$(MAKE) MY_CUDA_VERSION=$(MY_CUDA_VERSION) MY_CUDNN_VERSION=$(MY_CUDNN_VERSION) centos7_in_docker_impl
-
-centos7_cuda91_in_docker: MY_CUDA_VERSION=9.1
-centos7_cuda91_in_docker: MY_CUDNN_VERSION=7
-centos7_cuda91_in_docker:
-	$(MAKE) MY_CUDA_VERSION=$(MY_CUDA_VERSION) MY_CUDNN_VERSION=$(MY_CUDNN_VERSION) centos7_in_docker_impl
-
-centos7_in_docker_impl: Dockerfile-build-centos7.$(PLATFORM)
-	mkdir -p $(DIST_DIR)/$(PLATFORM)
-	docker build \
-		-t $(CONTAINER_NAME_TAG) \
-		-f Dockerfile-build-centos7.$(PLATFORM) \
-		.
-	docker run \
-		--rm \
-		--init \
-		-u `id -u`:`id -g` \
-		-v `pwd`:/dot \
-		-w /dot \
-		--entrypoint /bin/bash \
-		-e "MY_CUDA_VERSION=$(MY_CUDA_VERSION)" \
-		-e "MY_CUDNN_VERSION=$(MY_CUDNN_VERSION)" \
-		$(CONTAINER_NAME_TAG) \
-		-c 'make centos7'
-	echo $(VERSION) > $(DIST_DIR)/$(PLATFORM)/VERSION.txt
-
-centos7_setup:
-	rm -fr /tmp/build
-	cp -a /dot/. /tmp/build
-	sed -i 's/cmake/# cmake/' /tmp/build/requirements_buildonly.txt
-
-centos7_build:
-	(cd /tmp/build && \
-	 IFLAGS="-I/usr/include/openblas" \
-	 OPENBLAS_PREFIX="open" \
-	 USEPARALLEL=0 \
-	 $(MAKE) \
-		deps_fetch \
-		apply-xgboost-nonccl-local \
-		apply_py3nvml \
-		libsklearn \
-		build)
-	mkdir -p dist/$(PLATFORM)
-	cp /tmp/build/src/interface_py/dist/h2o4gpu*.whl dist/$(PLATFORM)
-	chmod -R o+rwx dist/$(PLATFORM)
-
-centos7:
-	$(MAKE) centos7_setup
-	$(MAKE) centos7_build
-
-# Note:  We don't actually need to run mrproper in docker (as root) because
-#        the build step runs as the user.  But keep the API for consistency.
-mrproper_in_docker:
-	git clean -f -d -x
-
-printvars: MY_CUDA_VERSION=8.0
-printvars: MY_CUDNN_VERSION=5
-printvars:
-	@echo $(PLATFORM)
-	@echo $(PROJECT_VERSION)
-	@echo $(VERSION)
-	@echo $(CONTAINER_TAG)
-
-#----------------------------------------------------------------------
-# CentOS 7 build API END
-#----------------------------------------------------------------------
-
 .PHONY: ALWAYS_REBUILD
 .ALWAYS_REBUILD:
diff --git a/README.md b/README.md
index ca54df121..b3a0265cb 100644
--- a/README.md
+++ b/README.md
@@ -9,14 +9,12 @@ Daal library added for CPU, currently supported only x86_64 architecture.
 
 ## Requirements
 
-* PC with Ubuntu 16.04+ w/ numpy 1.14.x
+* PC running Linux wit glibc 2.17+
 
 * Install CUDA with bundled display drivers (
   [CUDA 8](https://developer.nvidia.com/cuda-downloads)
   or
-  [CUDA 9.0](https://developer.nvidia.com/cuda-release-candidate-download) )
-
-NOTE: CUDA9.1 is not currently compatible with H2O4GPU.
+  [CUDA 9](https://developer.nvidia.com/cuda-release-candidate-download) )
 
 When installing, choose to link the cuda install to /usr/local/cuda .
 Ensure to reboot after installing the new nvidia drivers.
@@ -53,24 +51,18 @@ sudo apt-get -y install libcurl4-openssl-dev libssl-dev libxml2-dev
 Download the Python wheel file (For Python 3.6 on linux_x86_64):
 
   * Stable:
-    * [CUDA8 nccl](https://s3.amazonaws.com/h2o-release/h2o4gpu/releases/stable/ai/h2o/h2o4gpu/0.2-nccl-cuda8/h2o4gpu-0.2.0-cp36-cp36m-linux_x86_64.whl)
-    * [CUDA8 nonccl](https://s3.amazonaws.com/h2o-release/h2o4gpu/releases/stable/ai/h2o/h2o4gpu/0.2-nonccl-cuda8/h2o4gpu-0.2.0-cp36-cp36m-linux_x86_64.whl)
-    * [CUDA9 nccl](https://s3.amazonaws.com/h2o-release/h2o4gpu/releases/stable/ai/h2o/h2o4gpu/0.2-nccl-cuda9/h2o4gpu-0.2.0-cp36-cp36m-linux_x86_64.whl)
-    * [CUDA9 nonccl](https://s3.amazonaws.com/h2o-release/h2o4gpu/releases/stable/ai/h2o/h2o4gpu/0.2-nonccl-cuda9/h2o4gpu-0.2.0-cp36-cp36m-linux_x86_64.whl)
+    * [CUDA8](https://s3.amazonaws.com/h2o-release/h2o4gpu/releases/stable/ai/h2o/h2o4gpu/0.2-nccl-cuda8/h2o4gpu-0.2.0-cp36-cp36m-linux_x86_64.whl)
+    * [CUDA9](https://s3.amazonaws.com/h2o-release/h2o4gpu/releases/stable/ai/h2o/h2o4gpu/0.2-nccl-cuda9/h2o4gpu-0.2.0-cp36-cp36m-linux_x86_64.whl)
   * Bleeding edge (changes with every successful master branch build):
-    * [CUDA8 nccl](https://s3.amazonaws.com/h2o-release/h2o4gpu/releases/bleeding-edge/ai/h2o/h2o4gpu/0.2-nccl-cuda8/h2o4gpu-0.2.0.9999-cp36-cp36m-linux_x86_64.whl)
-    * [CUDA8 nonccl](https://s3.amazonaws.com/h2o-release/h2o4gpu/releases/bleeding-edge/ai/h2o/h2o4gpu/0.2-nonccl-cuda8/h2o4gpu-0.2.0.9999-cp36-cp36m-linux_x86_64.whl)
-    * [CUDA9 nccl](https://s3.amazonaws.com/h2o-release/h2o4gpu/releases/bleeding-edge/ai/h2o/h2o4gpu/0.2-nccl-cuda9/h2o4gpu-0.2.0.9999-cp36-cp36m-linux_x86_64.whl)
-    * [CUDA9 nonccl](https://s3.amazonaws.com/h2o-release/h2o4gpu/releases/bleeding-edge/ai/h2o/h2o4gpu/0.2-nonccl-cuda9/h2o4gpu-0.2.0.9999-cp36-cp36m-linux_x86_64.whl)
+    * [CUDA8](https://s3.amazonaws.com/h2o-release/h2o4gpu/releases/bleeding-edge/ai/h2o/h2o4gpu/0.2-nccl-cuda8/h2o4gpu-0.2.0.9999-cp36-cp36m-linux_x86_64.whl)
+    * [CUDA9.0](https://s3.amazonaws.com/h2o-release/h2o4gpu/releases/bleeding-edge/ai/h2o/h2o4gpu/0.2-nccl-cuda90/h2o4gpu-0.2.0.9999-cp36-cp36m-linux_x86_64.whl)
+    * [CUDA9.2](https://s3.amazonaws.com/h2o-release/h2o4gpu/releases/bleeding-edge/ai/h2o/h2o4gpu/0.2-nccl-cuda92/h2o4gpu-0.2.0.9999-cp36-cp36m-linux_x86_64.whl)
   * [For Conda (unsupported and untested by H2O.ai)]
     ```
         pip install --extra-index-url https://pypi.anaconda.org/gpuopenanalytics/simple h2o4gpu
     ```
 
-The "nccl" (NCCL) versions give support to multi-GPU in xgboost and in other algorithms.  The "nonccl" versions are provided
-in case of system instability in production environments due to NCCL.
- 
-Start a fresh pyenv or virtualenv session.
+ Start a fresh pyenv or virtualenv session.
 
 Install the Python wheel file. NOTE: If you don't use a fresh environment, this will
 overwrite your py3nvml and xgboost installations to use our validated
@@ -131,9 +123,9 @@ predictions <- model %>% predict(x)
 
 ## Next Steps
 
-For more examples using Python API, please check out our [Jupyter notebook demos](https://github.com/h2oai/h2o4gpu/tree/master/examples/py/demos). To run the demos using a local wheel run, at least download `requirements_runtime_demos.txt` from the Github repo and do:
+For more examples using Python API, please check out our [Jupyter notebook demos](https://github.com/h2oai/h2o4gpu/tree/master/examples/py/demos). To run the demos using a local wheel run, at least download `src/interface_py/requirements_runtime_demos.txt` from the Github repo and do:
 ```
-pip install -r requirements_runtime_demos.txt
+pip install -r src/interface_py/requirements_runtime_demos.txt
 ```
 and then run the jupyter notebook demos.
 
@@ -151,10 +143,8 @@ Download the Docker file (for linux_x86_64):
 
   * Bleeding edge (changes with every successful master branch build):
     * [CUDA8 nccl](https://s3.amazonaws.com/h2o-release/h2o4gpu/releases/bleeding-edge/ai/h2o/h2o4gpu/0.2-nccl-cuda8/h2o4gpu-0.2.0.9999-nccl-cuda8-runtime.tar.bz2)
-    * [CUDA8 nonccl](https://s3.amazonaws.com/h2o-release/h2o4gpu/releases/bleeding-edge/ai/h2o/h2o4gpu/0.2-nonccl-cuda8/h2o4gpu-nonccl-cuda8-0.2.0.9999-runtime.tar.bz2)
     * [CUDA9 nccl](https://s3.amazonaws.com/h2o-release/h2o4gpu/releases/bleeding-edge/ai/h2o/h2o4gpu/0.2-nccl-cuda9/h2o4gpu-0.2.0.9999-nccl-cuda9-runtime.tar.bz2)
-    * [CUDA9 nonccl](https://s3.amazonaws.com/h2o-release/h2o4gpu/releases/bleeding-edge/ai/h2o/h2o4gpu/0.2-nonccl-cuda9/h2o4gpu-0.2.0.9999-nonccl-cuda9-runtime.tar.bz2)
-
+    
 Load and run docker file (e.g. for bleeding-edge of nccl-cuda9):
 ```
 pbzip2 -dc h2o4gpu-0.2.0.9999-nccl-cuda9-runtime.tar.bz2 | nvidia-docker load
diff --git a/TROUBLESHOOTING.md b/TROUBLESHOOTING.md
index 81bf319f1..08aab0721 100644
--- a/TROUBLESHOOTING.md
+++ b/TROUBLESHOOTING.md
@@ -46,7 +46,7 @@ To see if things are missing.
 
 3) Run `ldd --version`, we currently require version `2.23` or higher. If your system is running a lower version please update if possible or build the project yourself on your machine.
 
-4) Make sure you are running CUDA 8.x or CUDA 9.0, we currently don't support CUDA 9.1.
+4) Make sure you are running CUDA 8.x or CUDA 9.x.
 
 5) If compiled with icc (default if present) and have conda, need to do:
 
diff --git a/Jenkinsfile-nccl-cuda9-aws1 b/ci/Jenkinsfile-ppc64le-cuda8
similarity index 76%
rename from Jenkinsfile-nccl-cuda9-aws1
rename to ci/Jenkinsfile-ppc64le-cuda8
index c17682df0..194bf1b3b 100644
--- a/Jenkinsfile-nccl-cuda9-aws1
+++ b/ci/Jenkinsfile-ppc64le-cuda8
@@ -15,34 +15,21 @@ import ai.h2o.ci.BuildInfo
 def commitMessage = ''
 def h2o4gpuUtils = null
 
-def dist = "dist5"
-def BUILDTYPE = "nccl-cuda9-aws1"
-def cuda = "nvidia/cuda:9.0-cudnn7-devel-ubuntu16.04"
-def cudart = "nvidia/cuda:9.0-cudnn7-runtime-ubuntu16.04"
-def extratag = "-nccl-cuda9-aws1"
-def linuxwheel = "linux_whl5"
+def platform = "ppc64le-centos7-cuda8.0"
+def BUILDTYPE = "cuda8"
+def cuda = "nvidia/cuda-ppc64le:8.0-cudnn5-devel-centos7"
+def cudart = "nvidia/cuda-ppc64le:8.0-cudnn5-runtime-centos7"
+def extratag = "-cuda8"
+def linuxwheel = "ppc64le-centos7-cuda8.whl"
 def testtype = "dotest"
-def labelbuild = "ec2P32xlarge"
-def labeltest = "ec2P32xlarge"
-def labelruntime = "ec2P32xlarge"
+def labelbuild = "ibm-power || ibm-power-gpu"
+def labeltest = "ibm-power || ibm-power-gpu"
+def labelruntime = "ibm-power || ibm-power-gpu"
 def doingbenchmark = "0"
-def dobenchmark = "1"
-def doruntime = "0"
-
-//################ BELOW IS COPY/PASTE of Jenkinsfile.utils2 (except stage names)
-// Just Notes:
-//
-//def jobnums       = [0 , 1 , 2  , 3]
-//def tags          = ["nccl" , "nonccl" , "nccl"  , "nonccl"]
-//def cudatags      = ["cuda8", "cuda8"  , "cuda9" , "cuda9"]
-//def dobuilds      = [1, 0, 0, 0]
-//def dofulltests   = [1, 0, 0, 0]
-//def dopytests     = [1, 0, 0, 0]
-//def doruntimes    = [1, 1, 1, 1]
-//def dockerimagesbuild    = ["nvidia/cuda:8.0-cudnn5-devel-ubuntu16.04", "nvidia/cuda:8.0-cudnn5-devel-ubuntu16.04", "nvidia/cuda:9.0-cudnn7-devel-ubuntu16.04", "nvidia/cuda:9.0-cudnn7-devel-ubuntu16.04"]
-//def dockerimagesruntime  = ["nvidia/cuda:8.0-cudnn5-runtime-ubuntu16.04", "nvidia/cuda:8.0-cudnn5-runtime-ubuntu16.04", "nvidia/cuda:9.0-cudnn7-runtime-ubuntu16.04", "nvidia/cuda:9.0-cudnn7-runtime-ubuntu16.04"]
-//def dists         = ["dist1","dist2","dist3","dist4"]
-
+def dobenchmark = "0"
+def doruntime = "1"
+def data_dirs = "-v /home/jenkins/smalldata:/smalldata -v /home/jenkins/open_data:/open_data"
+//################ BELOW IS COPY/PASTE of ci/Jenkinsfile.template (except stage names)
 def benchmark_commit_trigger
 
 pipeline {
@@ -64,12 +51,7 @@ pipeline {
     }
 
     stages {
-        /////////////////////////////////////////////////////////////////////
-        //
-        //
-        //  Avoid mr-dl8 and mr-dl10 for build for now due to permission denied issue
-        /////////////////////////////////////////////////////////////////////
-        stage("Git clone on Linux nccl-cuda9-aws1") {
+        stage("Git clone on Linux ppc64le-cuda8") {
 
             agent {
                 label "${labelbuild}"
@@ -88,25 +70,24 @@ pipeline {
                             userRemoteConfigs                : scm.userRemoteConfigs])
                 }
                 script {
-                    h2o4gpuUtils = load "Jenkinsfile.utils"
+                    h2o4gpuUtils = load "ci/Jenkinsfile.utils"
                     buildInfo("h2o4gpu", h2o4gpuUtils.isRelease())
                     commitMessage = sh(script: 'git log -1 --pretty=%B | tr "\n" " "', returnStdout: true).trim()
                     echo "Commit Message: ${commitMessage}"
                     benchmark_commit_trigger = ("${commitMessage}" ==~ /.*trigger_benchmark.*/)
                     echo "benchmark_commit_trigger: ${benchmark_commit_trigger}"
                 }
-                stash includes: "Jenkinsfile*", name: "jenkinsfiles"
+                stash includes: "ci/Jenkinsfile*", name: "jenkinsfiles"
             }
         }
-        stage("Build Wheel on Linux nccl-cuda9-aws1") {
-
+        stage("Build on Centos7 ppc64le-cuda8") {
             agent {
                 label "${labelbuild}"
             }
             when {
                 expression {
                     unstash "jenkinsfiles"
-                    h2o4gpuUtils = load "Jenkinsfile.utils"
+                    h2o4gpuUtils = load "ci/Jenkinsfile.utils"
                     return "${doingbenchmark}" == "1" || h2o4gpuUtils.doBuild() || h2o4gpuUtils.doTests()
                 }
             }
@@ -123,8 +104,8 @@ pipeline {
                             userRemoteConfigs                : scm.userRemoteConfigs])
                 }
                 script {
-                    h2o4gpuUtils = load "Jenkinsfile.utils"
-                    h2o4gpuUtils.buildOnLinux("${cuda}", "${extratag}", "${dist}", "${linuxwheel}")
+                    h2o4gpuUtils = load "ci/Jenkinsfile.utils"
+                    h2o4gpuUtils.buildOnLinux("${cuda}", "${extratag}", "${platform}", "${linuxwheel}")
 
                     buildInfo("h2o4gpu", h2o4gpuUtils.isRelease())
 
@@ -141,14 +122,14 @@ pipeline {
             }
         }
 
-        stage("Test Wheel & Pylint & S3up on Linux nccl-cuda9-aws1") {
+        stage("Test | Lint | S3up on Centos7 ppc64le-cuda8") {
             agent {
                 label "${labeltest}"
             }
             when {
                 expression {
                     unstash "jenkinsfiles"
-                    h2o4gpuUtils = load "Jenkinsfile.utils"
+                    h2o4gpuUtils = load "ci/Jenkinsfile.utils"
                     return  "${doingbenchmark}" == "1" || h2o4gpuUtils.doTests() && (h2o4gpuUtils.rerun_disabled(commitMessage) || !h2o4gpuUtils.wasStageSuccessful("Fast Test Wheel & Pylint & S3up on Linux"))
                 }
             }
@@ -156,39 +137,46 @@ pipeline {
                 dumpInfo 'Linux Test Info'
                 // Get source code (should put tests into wheel, then wouldn't have to checkout)
                 retryWithTimeout(200 /* seconds */, 3 /* retries */) {
-                    checkout scm
+                    deleteDir()
+                    checkout([
+                            $class                           : 'GitSCM',
+                            branches                         : scm.branches,
+                            doGenerateSubmoduleConfigurations: false,
+                            extensions                       : scm.extensions + [[$class: 'SubmoduleOption', disableSubmodules: true, recursiveSubmodules: false, reference: '', trackingSubmodules: false, shallow: true]],
+                            submoduleCfg                     : [],
+                            userRemoteConfigs                : scm.userRemoteConfigs])
                 }
                 script {
                     unstash 'version_info'
                     sh """
                         echo "Before Stashed wheel file:"
-                        ls -l src/interface_py/${dist}/  || true
-                        rm -rf src/interface_py/${dist}/ || true
+                        ls -l src/interface_py/dist/${platform}/  || true
+                        rm -rf src/interface_py/dist/${platform}/ || true
                        """
                     unstash "${linuxwheel}"
                     sh """
                         echo "After Stashed wheel file:"
-                        ls -l src/interface_py/${dist}/  || true
+                        ls -l src/interface_py/dist/${platform}/  || true
                        """
-                    h2o4gpuUtils.runTests(buildInfo.get(), "${cuda}", "${extratag}", "${dist}", "${testtype}")
+                    //h2o4gpuUtils.runTests(buildInfo.get(), "${cuda}", "${extratag}", "${platform}", "${testtype}", "${data_dirs}")
                 }
                 retryWithTimeout(500 /* seconds */, 5 /* retries */) {
                     withCredentials([[$class: 'AmazonWebServicesCredentialsBinding', credentialsId: "awsArtifactsUploader"]]) {
                         script {
-                            h2o4gpuUtils.publishToS3(buildInfo.get(), "${extratag}" , "${dist}")
+                            h2o4gpuUtils.publishToS3(buildInfo.get(), "${extratag}" , "${platform}")
                         }
                     }
                 }
             }
         }
-        stage("Build/Publish Runtime Docker Linux nccl-cuda9-aws1") {
+        stage("Build/Publish Runtime Docker Centos7 ppc64le-cuda8") {
             agent {
                 label "${labelruntime}"
             }
             when {
                 expression {
                     unstash "jenkinsfiles"
-                    h2o4gpuUtils = load "Jenkinsfile.utils"
+                    h2o4gpuUtils = load "ci/Jenkinsfile.utils"
                     return "${doruntime}" == "1" && h2o4gpuUtils.doRuntime()
                 }
             }
@@ -208,19 +196,19 @@ pipeline {
                 script {
                     sh """
                         echo "Before Stashed wheel file:"
-                        ls -l src/interface_py/${dist}/  || true
-                        rm -rf src/interface_py/${dist}/ || true
+                        ls -l src/interface_py/dist/${platform}/  || true
+                        rm -rf src/interface_py/dist/${platform}/ || true
                        """
                     unstash "${linuxwheel}"
                     sh """
                         echo "After Stashed wheel file:"
-                        ls -l src/interface_py/${dist}/  || true
+                        ls -l src/interface_py/dist/${platform}  || true
                        """
                     unstash 'version_info'
                     sh 'echo "Stashed version file:" && ls -l build/'
                 }
                 script {
-                    h2o4gpuUtils.buildRuntime(buildInfo.get(), "${cudart}", "${dist}", "${extratag}")
+                    h2o4gpuUtils.buildRuntime(buildInfo.get(), "${cudart}", "${platform}", "${extratag}", "${data_dirs}")
                 }
                 retryWithTimeout(1000 /* seconds */, 5 /* retries */) {
                 withCredentials([[$class: 'AmazonWebServicesCredentialsBinding', credentialsId: "awsArtifactsUploader"]]) {
@@ -232,14 +220,14 @@ pipeline {
             }
         }
 
-        stage("Benchmarking Linux nccl-cuda9-aws1") {
+        stage("Benchmarking Linux ppc64le-cuda8") {
             agent {
                 label 'master'
             }
             when {
                 expression {
                     unstash "jenkinsfiles"
-                    h2o4gpuUtils = load "Jenkinsfile.utils"
+                    h2o4gpuUtils = load "ci/Jenkinsfile.utils"
                     echo "benchmark_commit_trigger: ${benchmark_commit_trigger}"
                     return "${doingbenchmark}" == "1" || (("${benchmark_commit_trigger}"=="true" || h2o4gpuUtils.doTriggerBenchmarksJob()) && "${dobenchmark}" == "1" && env.BRANCH_NAME == "master")
                 }
diff --git a/ci/Jenkinsfile-ppc64le-cuda9 b/ci/Jenkinsfile-ppc64le-cuda9
new file mode 100644
index 000000000..ad7dc05b4
--- /dev/null
+++ b/ci/Jenkinsfile-ppc64le-cuda9
@@ -0,0 +1,268 @@
+#!/usr/bin/groovy
+
+//################ FILE IS AUTO-GENERATED from .base files
+//################ DO NOT MODIFY
+//################ See scripts/make_jenkinsfiles.sh
+
+// TOOD: rename to @Library('h2o-jenkins-pipeline-lib') _
+@Library('test-shared-library') _
+
+import ai.h2o.ci.Utils
+import static ai.h2o.ci.Utils.banner
+def utilsLib = new Utils()
+import ai.h2o.ci.BuildInfo
+
+def commitMessage = ''
+def h2o4gpuUtils = null
+
+def platform = "ppc64le-centos7-cuda9.0"
+def BUILDTYPE = "cuda9"
+def cuda = "nvidia/cuda-ppc64le:9.0-cudnn7-devel-centos7"
+def cudart = "nvidia/cuda-ppc64le:9.0-cudnn7-runtime-centos7"
+def extratag = "-cuda9"
+def linuxwheel = "ppc64le-centos7-cuda9.whl"
+def testtype = "dotest"
+def labelbuild = "ibm-power || ibm-power-gpu"
+def labeltest = "ibm-power || ibm-power-gpu"
+def labelruntime = "ibm-power || ibm-power-gpu"
+def doingbenchmark = "0"
+def dobenchmark = "0"
+def doruntime = "1"
+def data_dirs = "-v /home/jenkins/smalldata:/smalldata -v /home/jenkins/open_data:/open_data"
+//################ BELOW IS COPY/PASTE of ci/Jenkinsfile.template (except stage names)
+def benchmark_commit_trigger
+
+pipeline {
+    agent none
+
+    // Setup job options
+    options {
+        ansiColor('xterm')
+        timestamps()
+        timeout(time: 300, unit: 'MINUTES')
+        buildDiscarder(logRotator(numToKeepStr: '10'))
+        disableConcurrentBuilds()
+        skipDefaultCheckout()
+    }
+
+    environment {
+        MAKE_OPTS = "-s CI=1" // -s: silent mode
+        BUILD_TYPE = "${BUILDTYPE}"
+    }
+
+    stages {
+        stage("Git clone on Linux ppc64le-cuda9") {
+
+            agent {
+                label "${labelbuild}"
+            }
+            steps {
+                dumpInfo 'Linux Build Info'
+                // Do checkout
+                retryWithTimeout(200 /* seconds */, 3 /* retries */) {
+                    deleteDir()
+                    checkout([
+                            $class                           : 'GitSCM',
+                            branches                         : scm.branches,
+                            doGenerateSubmoduleConfigurations: false,
+                            extensions                       : scm.extensions + [[$class: 'SubmoduleOption', disableSubmodules: true, recursiveSubmodules: false, reference: '', trackingSubmodules: false, shallow: true]],
+                            submoduleCfg                     : [],
+                            userRemoteConfigs                : scm.userRemoteConfigs])
+                }
+                script {
+                    h2o4gpuUtils = load "ci/Jenkinsfile.utils"
+                    buildInfo("h2o4gpu", h2o4gpuUtils.isRelease())
+                    commitMessage = sh(script: 'git log -1 --pretty=%B | tr "\n" " "', returnStdout: true).trim()
+                    echo "Commit Message: ${commitMessage}"
+                    benchmark_commit_trigger = ("${commitMessage}" ==~ /.*trigger_benchmark.*/)
+                    echo "benchmark_commit_trigger: ${benchmark_commit_trigger}"
+                }
+                stash includes: "ci/Jenkinsfile*", name: "jenkinsfiles"
+            }
+        }
+        stage("Build on Centos7 ppc64le-cuda9") {
+            agent {
+                label "${labelbuild}"
+            }
+            when {
+                expression {
+                    unstash "jenkinsfiles"
+                    h2o4gpuUtils = load "ci/Jenkinsfile.utils"
+                    return "${doingbenchmark}" == "1" || h2o4gpuUtils.doBuild() || h2o4gpuUtils.doTests()
+                }
+            }
+            steps {
+                // Do checkout
+                retryWithTimeout(200 /* seconds */, 3 /* retries */) {
+                    deleteDir()
+                    checkout([
+                            $class                           : 'GitSCM',
+                            branches                         : scm.branches,
+                            doGenerateSubmoduleConfigurations: false,
+                            extensions                       : scm.extensions + [[$class: 'SubmoduleOption', disableSubmodules: true, recursiveSubmodules: false, reference: '', trackingSubmodules: false, shallow: true]],
+                            submoduleCfg                     : [],
+                            userRemoteConfigs                : scm.userRemoteConfigs])
+                }
+                script {
+                    h2o4gpuUtils = load "ci/Jenkinsfile.utils"
+                    h2o4gpuUtils.buildOnLinux("${cuda}", "${extratag}", "${platform}", "${linuxwheel}")
+
+                    buildInfo("h2o4gpu", h2o4gpuUtils.isRelease())
+
+                    script {
+                        // Load the version file content
+                        buildInfo.get().setVersion(utilsLib.getCommandOutput("cat build/VERSION.txt"))
+                        utilsLib.setCurrentBuildName(buildInfo.get().getVersion())
+                        utilsLib.appendBuildDescription("""|Authors: ${buildInfo.get().getAuthorNames().join(" ")}
+                                |Git SHA: ${buildInfo.get().getGitSha().substring(0, 8)}
+                                |""".stripMargin("|"))
+                    }
+
+                }
+            }
+        }
+
+        stage("Test | Lint | S3up on Centos7 ppc64le-cuda9") {
+            agent {
+                label "${labeltest}"
+            }
+            when {
+                expression {
+                    unstash "jenkinsfiles"
+                    h2o4gpuUtils = load "ci/Jenkinsfile.utils"
+                    return  "${doingbenchmark}" == "1" || h2o4gpuUtils.doTests() && (h2o4gpuUtils.rerun_disabled(commitMessage) || !h2o4gpuUtils.wasStageSuccessful("Fast Test Wheel & Pylint & S3up on Linux"))
+                }
+            }
+            steps {
+                dumpInfo 'Linux Test Info'
+                // Get source code (should put tests into wheel, then wouldn't have to checkout)
+                retryWithTimeout(200 /* seconds */, 3 /* retries */) {
+                    deleteDir()
+                    checkout([
+                            $class                           : 'GitSCM',
+                            branches                         : scm.branches,
+                            doGenerateSubmoduleConfigurations: false,
+                            extensions                       : scm.extensions + [[$class: 'SubmoduleOption', disableSubmodules: true, recursiveSubmodules: false, reference: '', trackingSubmodules: false, shallow: true]],
+                            submoduleCfg                     : [],
+                            userRemoteConfigs                : scm.userRemoteConfigs])
+                }
+                script {
+                    unstash 'version_info'
+                    sh """
+                        echo "Before Stashed wheel file:"
+                        ls -l src/interface_py/dist/${platform}/  || true
+                        rm -rf src/interface_py/dist/${platform}/ || true
+                       """
+                    unstash "${linuxwheel}"
+                    sh """
+                        echo "After Stashed wheel file:"
+                        ls -l src/interface_py/dist/${platform}/  || true
+                       """
+                    //h2o4gpuUtils.runTests(buildInfo.get(), "${cuda}", "${extratag}", "${platform}", "${testtype}", "${data_dirs}")
+                }
+                retryWithTimeout(500 /* seconds */, 5 /* retries */) {
+                    withCredentials([[$class: 'AmazonWebServicesCredentialsBinding', credentialsId: "awsArtifactsUploader"]]) {
+                        script {
+                            h2o4gpuUtils.publishToS3(buildInfo.get(), "${extratag}" , "${platform}")
+                        }
+                    }
+                }
+            }
+        }
+        stage("Build/Publish Runtime Docker Centos7 ppc64le-cuda9") {
+            agent {
+                label "${labelruntime}"
+            }
+            when {
+                expression {
+                    unstash "jenkinsfiles"
+                    h2o4gpuUtils = load "ci/Jenkinsfile.utils"
+                    return "${doruntime}" == "1" && h2o4gpuUtils.doRuntime()
+                }
+            }
+            steps {
+                dumpInfo 'Linux Build Info'
+                // Do checkout
+                retryWithTimeout(200 /* seconds */, 3 /* retries */) {
+                    deleteDir()
+                    checkout([
+                            $class                           : 'GitSCM',
+                            branches                         : scm.branches,
+                            doGenerateSubmoduleConfigurations: false,
+                            extensions                       : scm.extensions + [[$class: 'SubmoduleOption', disableSubmodules: true, recursiveSubmodules: false, reference: '', trackingSubmodules: false, shallow: true]],
+                            submoduleCfg                     : [],
+                            userRemoteConfigs                : scm.userRemoteConfigs])
+                }
+                script {
+                    sh """
+                        echo "Before Stashed wheel file:"
+                        ls -l src/interface_py/dist/${platform}/  || true
+                        rm -rf src/interface_py/dist/${platform}/ || true
+                       """
+                    unstash "${linuxwheel}"
+                    sh """
+                        echo "After Stashed wheel file:"
+                        ls -l src/interface_py/dist/${platform}  || true
+                       """
+                    unstash 'version_info'
+                    sh 'echo "Stashed version file:" && ls -l build/'
+                }
+                script {
+                    h2o4gpuUtils.buildRuntime(buildInfo.get(), "${cudart}", "${platform}", "${extratag}", "${data_dirs}")
+                }
+                retryWithTimeout(1000 /* seconds */, 5 /* retries */) {
+                withCredentials([[$class: 'AmazonWebServicesCredentialsBinding', credentialsId: "awsArtifactsUploader"]]) {
+                    script {
+                        h2o4gpuUtils.publishRuntimeToS3(buildInfo.get(), "${extratag}")
+                    }
+                }
+                }
+            }
+        }
+
+        stage("Benchmarking Linux ppc64le-cuda9") {
+            agent {
+                label 'master'
+            }
+            when {
+                expression {
+                    unstash "jenkinsfiles"
+                    h2o4gpuUtils = load "ci/Jenkinsfile.utils"
+                    echo "benchmark_commit_trigger: ${benchmark_commit_trigger}"
+                    return "${doingbenchmark}" == "1" || (("${benchmark_commit_trigger}"=="true" || h2o4gpuUtils.doTriggerBenchmarksJob()) && "${dobenchmark}" == "1" && env.BRANCH_NAME == "master")
+                }
+            }
+            steps {
+                script {
+                    utilsLib.appendBuildDescription("BENCH \u2713")
+                }
+
+                echo banner("Triggering downstream jobs h2o4gpu${extratag}-benchmark : RUNTIME_ID=${buildInfo.get().getVersion()}")
+                build job: "/h2o4gpu${extratag}-benchmark/${env.BRANCH_NAME}", parameters: [[$class: 'StringParameterValue', name: 'RUNTIME_ID', value: buildInfo.get().getVersion()]], propagate: false, wait: false, quietPeriod: 60
+            }
+        }
+
+    } // end over stages
+    post {
+        failure {
+            node('linux') {
+                script {
+                    if(env.BRANCH_NAME == "master") {
+                        emailext(
+                                to: "mateusz@h2o.ai, jmckinney@h2o.ai",
+                                subject: "BUILD FAILED: Job '${env.JOB_NAME} [${env.BUILD_NUMBER}]'",
+                                body: '''${JELLY_SCRIPT, template="html_gmail"}''',
+                                attachLog: true,
+                                compressLog: true,
+                                recipientProviders: [
+                                        [$class: 'DevelopersRecipientProvider'],
+                                ]
+                            )
+                    }
+                }
+            }
+        }
+    }
+}
+
+
diff --git a/Jenkinsfile-cpu b/ci/Jenkinsfile-x86_64-cuda8
similarity index 78%
rename from Jenkinsfile-cpu
rename to ci/Jenkinsfile-x86_64-cuda8
index e467db57b..c2ab847d6 100644
--- a/Jenkinsfile-cpu
+++ b/ci/Jenkinsfile-x86_64-cuda8
@@ -15,32 +15,21 @@ import ai.h2o.ci.BuildInfo
 def commitMessage = ''
 def h2o4gpuUtils = null
 
-def dist = "dist8"
-def BUILDTYPE = "cpu"
-def cuda = "ubuntu:16.04"
-def extratag = "-cpu"
-def linuxwheel = "linux_whl2"
-def testtype = "dotestfast_nonccl"
-def labelbuild = "docker && linux"
-def labeltest = "docker"
-def labelruntime = "docker"
+def platform = "x86_64-centos7-cuda8.0"
+def BUILDTYPE = "cuda8"
+def cuda = "nvidia/cuda:8.0-cudnn5-devel-centos7"
+def cudart = "nvidia/cuda:8.0-cudnn5-runtime-centos7"
+def extratag = "-cuda8"
+def linuxwheel = "x86_64-centos7-cuda8.whl"
+def testtype = "dotest"
+def labelbuild = "nvidia-docker"
+def labeltest = "gpu && nvidia-docker"
+def labelruntime = "nvidia-docker"
 def doingbenchmark = "0"
-def dobenchmark = "1"
+def dobenchmark = "0"
 def doruntime = "1"
-//################ BELOW IS COPY/PASTE of Jenkinsfile.utils2 (except stage names)
-// Just Notes:
-//
-//def jobnums       = [0 , 1 , 2  , 3]
-//def tags          = ["nccl" , "nonccl" , "nccl"  , "nonccl"]
-//def cudatags      = ["cuda8", "cuda8"  , "cuda9" , "cuda9"]
-//def dobuilds      = [1, 0, 0, 0]
-//def dofulltests   = [1, 0, 0, 0]
-//def dopytests     = [1, 0, 0, 0]
-//def doruntimes    = [1, 1, 1, 1]
-//def dockerimagesbuild    = ["nvidia/cuda:8.0-cudnn5-devel-ubuntu16.04", "nvidia/cuda:8.0-cudnn5-devel-ubuntu16.04", "nvidia/cuda:9.0-cudnn7-devel-ubuntu16.04", "nvidia/cuda:9.0-cudnn7-devel-ubuntu16.04"]
-//def dockerimagesruntime  = ["nvidia/cuda:8.0-cudnn5-runtime-ubuntu16.04", "nvidia/cuda:8.0-cudnn5-runtime-ubuntu16.04", "nvidia/cuda:9.0-cudnn7-runtime-ubuntu16.04", "nvidia/cuda:9.0-cudnn7-runtime-ubuntu16.04"]
-//def dists         = ["dist1","dist2","dist3","dist4"]
-
+def data_dirs = "-v /home/0xdiag/h2o4gpu/data:/data -v /home/0xdiag/h2o4gpu/open_data:/open_data"
+//################ BELOW IS COPY/PASTE of ci/Jenkinsfile.template (except stage names)
 def benchmark_commit_trigger
 
 pipeline {
@@ -62,12 +51,7 @@ pipeline {
     }
 
     stages {
-        /////////////////////////////////////////////////////////////////////
-        //
-        //
-        //  Avoid mr-dl8 and mr-dl10 for build for now due to permission denied issue
-        /////////////////////////////////////////////////////////////////////
-        stage("Git clone on Linux cpu") {
+        stage("Git clone on Linux x86_64-cuda8") {
 
             agent {
                 label "${labelbuild}"
@@ -86,25 +70,24 @@ pipeline {
                             userRemoteConfigs                : scm.userRemoteConfigs])
                 }
                 script {
-                    h2o4gpuUtils = load "Jenkinsfile.utils"
+                    h2o4gpuUtils = load "ci/Jenkinsfile.utils"
                     buildInfo("h2o4gpu", h2o4gpuUtils.isRelease())
                     commitMessage = sh(script: 'git log -1 --pretty=%B | tr "\n" " "', returnStdout: true).trim()
                     echo "Commit Message: ${commitMessage}"
                     benchmark_commit_trigger = ("${commitMessage}" ==~ /.*trigger_benchmark.*/)
                     echo "benchmark_commit_trigger: ${benchmark_commit_trigger}"
                 }
-                stash includes: "Jenkinsfile*", name: "jenkinsfiles"
+                stash includes: "ci/Jenkinsfile*", name: "jenkinsfiles"
             }
         }
-        stage("Build Wheel on Linux cpu") {
-
+        stage("Build on Centos7 x86_64-cuda8") {
             agent {
                 label "${labelbuild}"
             }
             when {
                 expression {
                     unstash "jenkinsfiles"
-                    h2o4gpuUtils = load "Jenkinsfile.utils"
+                    h2o4gpuUtils = load "ci/Jenkinsfile.utils"
                     return "${doingbenchmark}" == "1" || h2o4gpuUtils.doBuild() || h2o4gpuUtils.doTests()
                 }
             }
@@ -121,8 +104,8 @@ pipeline {
                             userRemoteConfigs                : scm.userRemoteConfigs])
                 }
                 script {
-                    h2o4gpuUtils = load "Jenkinsfile.utils"
-                    h2o4gpuUtils.buildOnLinux("${cuda}", "${extratag}", "${dist}", "${linuxwheel}")
+                    h2o4gpuUtils = load "ci/Jenkinsfile.utils"
+                    h2o4gpuUtils.buildOnLinux("${cuda}", "${extratag}", "${platform}", "${linuxwheel}")
 
                     buildInfo("h2o4gpu", h2o4gpuUtils.isRelease())
 
@@ -139,14 +122,14 @@ pipeline {
             }
         }
 
-        stage("Test Wheel & Pylint & S3up on Linux cpu") {
+        stage("Test | Lint | S3up on Centos7 x86_64-cuda8") {
             agent {
                 label "${labeltest}"
             }
             when {
                 expression {
                     unstash "jenkinsfiles"
-                    h2o4gpuUtils = load "Jenkinsfile.utils"
+                    h2o4gpuUtils = load "ci/Jenkinsfile.utils"
                     return  "${doingbenchmark}" == "1" || h2o4gpuUtils.doTests() && (h2o4gpuUtils.rerun_disabled(commitMessage) || !h2o4gpuUtils.wasStageSuccessful("Fast Test Wheel & Pylint & S3up on Linux"))
                 }
             }
@@ -154,39 +137,46 @@ pipeline {
                 dumpInfo 'Linux Test Info'
                 // Get source code (should put tests into wheel, then wouldn't have to checkout)
                 retryWithTimeout(200 /* seconds */, 3 /* retries */) {
-                    checkout scm
+                    deleteDir()
+                    checkout([
+                            $class                           : 'GitSCM',
+                            branches                         : scm.branches,
+                            doGenerateSubmoduleConfigurations: false,
+                            extensions                       : scm.extensions + [[$class: 'SubmoduleOption', disableSubmodules: true, recursiveSubmodules: false, reference: '', trackingSubmodules: false, shallow: true]],
+                            submoduleCfg                     : [],
+                            userRemoteConfigs                : scm.userRemoteConfigs])
                 }
                 script {
                     unstash 'version_info'
                     sh """
                         echo "Before Stashed wheel file:"
-                        ls -l src/interface_py/${dist}/  || true
-                        rm -rf src/interface_py/${dist}/ || true
+                        ls -l src/interface_py/dist/${platform}/  || true
+                        rm -rf src/interface_py/dist/${platform}/ || true
                        """
                     unstash "${linuxwheel}"
                     sh """
                         echo "After Stashed wheel file:"
-                        ls -l src/interface_py/${dist}/  || true
+                        ls -l src/interface_py/dist/${platform}/  || true
                        """
-                    h2o4gpuUtils.runTests(buildInfo.get(), "${cuda}", "${extratag}", "${dist}", "${testtype}")
+                    h2o4gpuUtils.runTests(buildInfo.get(), "${cuda}", "${extratag}", "${platform}", "${testtype}", "${data_dirs}")
                 }
                 retryWithTimeout(500 /* seconds */, 5 /* retries */) {
                     withCredentials([[$class: 'AmazonWebServicesCredentialsBinding', credentialsId: "awsArtifactsUploader"]]) {
                         script {
-                            h2o4gpuUtils.publishToS3(buildInfo.get(), "${extratag}" , "${dist}")
+                            h2o4gpuUtils.publishToS3(buildInfo.get(), "${extratag}" , "${platform}")
                         }
                     }
                 }
             }
         }
-        stage("Build/Publish Runtime Docker Linux cpu") {
+        stage("Build/Publish Runtime Docker Centos7 x86_64-cuda8") {
             agent {
                 label "${labelruntime}"
             }
             when {
                 expression {
                     unstash "jenkinsfiles"
-                    h2o4gpuUtils = load "Jenkinsfile.utils"
+                    h2o4gpuUtils = load "ci/Jenkinsfile.utils"
                     return "${doruntime}" == "1" && h2o4gpuUtils.doRuntime()
                 }
             }
@@ -206,19 +196,19 @@ pipeline {
                 script {
                     sh """
                         echo "Before Stashed wheel file:"
-                        ls -l src/interface_py/${dist}/  || true
-                        rm -rf src/interface_py/${dist}/ || true
+                        ls -l src/interface_py/dist/${platform}/  || true
+                        rm -rf src/interface_py/dist/${platform}/ || true
                        """
                     unstash "${linuxwheel}"
                     sh """
                         echo "After Stashed wheel file:"
-                        ls -l src/interface_py/${dist}/  || true
+                        ls -l src/interface_py/dist/${platform}  || true
                        """
                     unstash 'version_info'
                     sh 'echo "Stashed version file:" && ls -l build/'
                 }
                 script {
-                    h2o4gpuUtils.buildRuntime(buildInfo.get(), "${cudart}", "${dist}", "${extratag}")
+                    h2o4gpuUtils.buildRuntime(buildInfo.get(), "${cudart}", "${platform}", "${extratag}", "${data_dirs}")
                 }
                 retryWithTimeout(1000 /* seconds */, 5 /* retries */) {
                 withCredentials([[$class: 'AmazonWebServicesCredentialsBinding', credentialsId: "awsArtifactsUploader"]]) {
@@ -230,14 +220,14 @@ pipeline {
             }
         }
 
-        stage("Benchmarking Linux cpu") {
+        stage("Benchmarking Linux x86_64-cuda8") {
             agent {
                 label 'master'
             }
             when {
                 expression {
                     unstash "jenkinsfiles"
-                    h2o4gpuUtils = load "Jenkinsfile.utils"
+                    h2o4gpuUtils = load "ci/Jenkinsfile.utils"
                     echo "benchmark_commit_trigger: ${benchmark_commit_trigger}"
                     return "${doingbenchmark}" == "1" || (("${benchmark_commit_trigger}"=="true" || h2o4gpuUtils.doTriggerBenchmarksJob()) && "${dobenchmark}" == "1" && env.BRANCH_NAME == "master")
                 }
diff --git a/Jenkinsfile-nccl-cuda9 b/ci/Jenkinsfile-x86_64-cuda9
similarity index 77%
rename from Jenkinsfile-nccl-cuda9
rename to ci/Jenkinsfile-x86_64-cuda9
index 4d53ff2f3..ff00fa44f 100644
--- a/Jenkinsfile-nccl-cuda9
+++ b/ci/Jenkinsfile-x86_64-cuda9
@@ -15,34 +15,21 @@ import ai.h2o.ci.BuildInfo
 def commitMessage = ''
 def h2o4gpuUtils = null
 
-def dist = "dist4"
-def BUILDTYPE = "nccl-cuda9"
-def cuda = "nvidia/cuda:9.0-cudnn7-devel-ubuntu16.04"
-def cudart = "nvidia/cuda:9.0-cudnn7-runtime-ubuntu16.04"
-def extratag = "-nccl-cuda9"
-def linuxwheel = "linux_whl4"
+def platform = "x86_64-centos7-cuda9.0"
+def BUILDTYPE = "cuda9"
+def cuda = "nvidia/cuda:9.0-cudnn7-devel-centos7"
+def cudart = "nvidia/cuda:9.0-cudnn7-runtime-centos7"
+def extratag = "-cuda9"
+def linuxwheel = "x86_64-centos7-cuda9.whl"
 def testtype = "dotest"
-def labelbuild = "nvidia-docker && (mr-dl11 || mr-dl16 || mr-dl10)"
-def labeltest = "gpu && nvidia-docker && (mr-dl11 || mr-dl16 || mr-dl10)"
+def labelbuild = "nvidia-docker"
+def labeltest = "gpu && nvidia-docker"
 def labelruntime = "nvidia-docker"
 def doingbenchmark = "0"
-def dobenchmark = "1"
+def dobenchmark = "0"
 def doruntime = "1"
-
-//################ BELOW IS COPY/PASTE of Jenkinsfile.utils2 (except stage names)
-// Just Notes:
-//
-//def jobnums       = [0 , 1 , 2  , 3]
-//def tags          = ["nccl" , "nonccl" , "nccl"  , "nonccl"]
-//def cudatags      = ["cuda8", "cuda8"  , "cuda9" , "cuda9"]
-//def dobuilds      = [1, 0, 0, 0]
-//def dofulltests   = [1, 0, 0, 0]
-//def dopytests     = [1, 0, 0, 0]
-//def doruntimes    = [1, 1, 1, 1]
-//def dockerimagesbuild    = ["nvidia/cuda:8.0-cudnn5-devel-ubuntu16.04", "nvidia/cuda:8.0-cudnn5-devel-ubuntu16.04", "nvidia/cuda:9.0-cudnn7-devel-ubuntu16.04", "nvidia/cuda:9.0-cudnn7-devel-ubuntu16.04"]
-//def dockerimagesruntime  = ["nvidia/cuda:8.0-cudnn5-runtime-ubuntu16.04", "nvidia/cuda:8.0-cudnn5-runtime-ubuntu16.04", "nvidia/cuda:9.0-cudnn7-runtime-ubuntu16.04", "nvidia/cuda:9.0-cudnn7-runtime-ubuntu16.04"]
-//def dists         = ["dist1","dist2","dist3","dist4"]
-
+def data_dirs = "-v /home/0xdiag/h2o4gpu/data:/data -v /home/0xdiag/h2o4gpu/open_data:/open_data"
+//################ BELOW IS COPY/PASTE of ci/Jenkinsfile.template (except stage names)
 def benchmark_commit_trigger
 
 pipeline {
@@ -64,12 +51,7 @@ pipeline {
     }
 
     stages {
-        /////////////////////////////////////////////////////////////////////
-        //
-        //
-        //  Avoid mr-dl8 and mr-dl10 for build for now due to permission denied issue
-        /////////////////////////////////////////////////////////////////////
-        stage("Git clone on Linux nccl-cuda9") {
+        stage("Git clone on Linux x86_64-cuda9") {
 
             agent {
                 label "${labelbuild}"
@@ -88,25 +70,24 @@ pipeline {
                             userRemoteConfigs                : scm.userRemoteConfigs])
                 }
                 script {
-                    h2o4gpuUtils = load "Jenkinsfile.utils"
+                    h2o4gpuUtils = load "ci/Jenkinsfile.utils"
                     buildInfo("h2o4gpu", h2o4gpuUtils.isRelease())
                     commitMessage = sh(script: 'git log -1 --pretty=%B | tr "\n" " "', returnStdout: true).trim()
                     echo "Commit Message: ${commitMessage}"
                     benchmark_commit_trigger = ("${commitMessage}" ==~ /.*trigger_benchmark.*/)
                     echo "benchmark_commit_trigger: ${benchmark_commit_trigger}"
                 }
-                stash includes: "Jenkinsfile*", name: "jenkinsfiles"
+                stash includes: "ci/Jenkinsfile*", name: "jenkinsfiles"
             }
         }
-        stage("Build Wheel on Linux nccl-cuda9") {
-
+        stage("Build on Centos7 x86_64-cuda9") {
             agent {
                 label "${labelbuild}"
             }
             when {
                 expression {
                     unstash "jenkinsfiles"
-                    h2o4gpuUtils = load "Jenkinsfile.utils"
+                    h2o4gpuUtils = load "ci/Jenkinsfile.utils"
                     return "${doingbenchmark}" == "1" || h2o4gpuUtils.doBuild() || h2o4gpuUtils.doTests()
                 }
             }
@@ -123,8 +104,8 @@ pipeline {
                             userRemoteConfigs                : scm.userRemoteConfigs])
                 }
                 script {
-                    h2o4gpuUtils = load "Jenkinsfile.utils"
-                    h2o4gpuUtils.buildOnLinux("${cuda}", "${extratag}", "${dist}", "${linuxwheel}")
+                    h2o4gpuUtils = load "ci/Jenkinsfile.utils"
+                    h2o4gpuUtils.buildOnLinux("${cuda}", "${extratag}", "${platform}", "${linuxwheel}")
 
                     buildInfo("h2o4gpu", h2o4gpuUtils.isRelease())
 
@@ -141,14 +122,14 @@ pipeline {
             }
         }
 
-        stage("Test Wheel & Pylint & S3up on Linux nccl-cuda9") {
+        stage("Test | Lint | S3up on Centos7 x86_64-cuda9") {
             agent {
                 label "${labeltest}"
             }
             when {
                 expression {
                     unstash "jenkinsfiles"
-                    h2o4gpuUtils = load "Jenkinsfile.utils"
+                    h2o4gpuUtils = load "ci/Jenkinsfile.utils"
                     return  "${doingbenchmark}" == "1" || h2o4gpuUtils.doTests() && (h2o4gpuUtils.rerun_disabled(commitMessage) || !h2o4gpuUtils.wasStageSuccessful("Fast Test Wheel & Pylint & S3up on Linux"))
                 }
             }
@@ -156,39 +137,46 @@ pipeline {
                 dumpInfo 'Linux Test Info'
                 // Get source code (should put tests into wheel, then wouldn't have to checkout)
                 retryWithTimeout(200 /* seconds */, 3 /* retries */) {
-                    checkout scm
+                    deleteDir()
+                    checkout([
+                            $class                           : 'GitSCM',
+                            branches                         : scm.branches,
+                            doGenerateSubmoduleConfigurations: false,
+                            extensions                       : scm.extensions + [[$class: 'SubmoduleOption', disableSubmodules: true, recursiveSubmodules: false, reference: '', trackingSubmodules: false, shallow: true]],
+                            submoduleCfg                     : [],
+                            userRemoteConfigs                : scm.userRemoteConfigs])
                 }
                 script {
                     unstash 'version_info'
                     sh """
                         echo "Before Stashed wheel file:"
-                        ls -l src/interface_py/${dist}/  || true
-                        rm -rf src/interface_py/${dist}/ || true
+                        ls -l src/interface_py/dist/${platform}/  || true
+                        rm -rf src/interface_py/dist/${platform}/ || true
                        """
                     unstash "${linuxwheel}"
                     sh """
                         echo "After Stashed wheel file:"
-                        ls -l src/interface_py/${dist}/  || true
+                        ls -l src/interface_py/dist/${platform}/  || true
                        """
-                    h2o4gpuUtils.runTests(buildInfo.get(), "${cuda}", "${extratag}", "${dist}", "${testtype}")
+                    h2o4gpuUtils.runTests(buildInfo.get(), "${cuda}", "${extratag}", "${platform}", "${testtype}", "${data_dirs}")
                 }
                 retryWithTimeout(500 /* seconds */, 5 /* retries */) {
                     withCredentials([[$class: 'AmazonWebServicesCredentialsBinding', credentialsId: "awsArtifactsUploader"]]) {
                         script {
-                            h2o4gpuUtils.publishToS3(buildInfo.get(), "${extratag}" , "${dist}")
+                            h2o4gpuUtils.publishToS3(buildInfo.get(), "${extratag}" , "${platform}")
                         }
                     }
                 }
             }
         }
-        stage("Build/Publish Runtime Docker Linux nccl-cuda9") {
+        stage("Build/Publish Runtime Docker Centos7 x86_64-cuda9") {
             agent {
                 label "${labelruntime}"
             }
             when {
                 expression {
                     unstash "jenkinsfiles"
-                    h2o4gpuUtils = load "Jenkinsfile.utils"
+                    h2o4gpuUtils = load "ci/Jenkinsfile.utils"
                     return "${doruntime}" == "1" && h2o4gpuUtils.doRuntime()
                 }
             }
@@ -208,19 +196,19 @@ pipeline {
                 script {
                     sh """
                         echo "Before Stashed wheel file:"
-                        ls -l src/interface_py/${dist}/  || true
-                        rm -rf src/interface_py/${dist}/ || true
+                        ls -l src/interface_py/dist/${platform}/  || true
+                        rm -rf src/interface_py/dist/${platform}/ || true
                        """
                     unstash "${linuxwheel}"
                     sh """
                         echo "After Stashed wheel file:"
-                        ls -l src/interface_py/${dist}/  || true
+                        ls -l src/interface_py/dist/${platform}  || true
                        """
                     unstash 'version_info'
                     sh 'echo "Stashed version file:" && ls -l build/'
                 }
                 script {
-                    h2o4gpuUtils.buildRuntime(buildInfo.get(), "${cudart}", "${dist}", "${extratag}")
+                    h2o4gpuUtils.buildRuntime(buildInfo.get(), "${cudart}", "${platform}", "${extratag}", "${data_dirs}")
                 }
                 retryWithTimeout(1000 /* seconds */, 5 /* retries */) {
                 withCredentials([[$class: 'AmazonWebServicesCredentialsBinding', credentialsId: "awsArtifactsUploader"]]) {
@@ -232,14 +220,14 @@ pipeline {
             }
         }
 
-        stage("Benchmarking Linux nccl-cuda9") {
+        stage("Benchmarking Linux x86_64-cuda9") {
             agent {
                 label 'master'
             }
             when {
                 expression {
                     unstash "jenkinsfiles"
-                    h2o4gpuUtils = load "Jenkinsfile.utils"
+                    h2o4gpuUtils = load "ci/Jenkinsfile.utils"
                     echo "benchmark_commit_trigger: ${benchmark_commit_trigger}"
                     return "${doingbenchmark}" == "1" || (("${benchmark_commit_trigger}"=="true" || h2o4gpuUtils.doTriggerBenchmarksJob()) && "${dobenchmark}" == "1" && env.BRANCH_NAME == "master")
                 }
diff --git a/Jenkinsfile-nccl-cuda8 b/ci/Jenkinsfile-x86_64-cuda92
similarity index 77%
rename from Jenkinsfile-nccl-cuda8
rename to ci/Jenkinsfile-x86_64-cuda92
index 61db36def..4dab0d862 100644
--- a/Jenkinsfile-nccl-cuda8
+++ b/ci/Jenkinsfile-x86_64-cuda92
@@ -15,34 +15,21 @@ import ai.h2o.ci.BuildInfo
 def commitMessage = ''
 def h2o4gpuUtils = null
 
-def dist = "dist1"
-def BUILDTYPE = "nccl-cuda8"
-def cuda = "nvidia/cuda:8.0-cudnn5-devel-ubuntu16.04"
-def cudart = "nvidia/cuda:8.0-cudnn5-runtime-ubuntu16.04"
-def extratag = "-nccl-cuda8"
-def linuxwheel = "linux_whl1"
-def testtype = "dotestfast"
-def labelbuild = "nvidia-docker && (mr-dl11 || mr-dl16 || mr-dl10)"
-def labeltest = "gpu && nvidia-docker && (mr-dl11 || mr-dl16 || mr-dl10)"
+def platform = "x86_64-centos7-cuda9.2"
+def BUILDTYPE = "cuda92"
+def cuda = "nvidia/cuda:9.2-cudnn7-devel-centos7"
+def cudart = "nvidia/cuda:9.2-cudnn7-runtime-centos7"
+def extratag = "-cuda92"
+def linuxwheel = "x86_64-centos7-cuda92.whl"
+def testtype = "dotest"
+def labelbuild = "nvidia-docker"
+def labeltest = "gpu && nvidia-docker"
 def labelruntime = "nvidia-docker"
 def doingbenchmark = "0"
 def dobenchmark = "0"
 def doruntime = "1"
-
-//################ BELOW IS COPY/PASTE of Jenkinsfile.utils2 (except stage names)
-// Just Notes:
-//
-//def jobnums       = [0 , 1 , 2  , 3]
-//def tags          = ["nccl" , "nonccl" , "nccl"  , "nonccl"]
-//def cudatags      = ["cuda8", "cuda8"  , "cuda9" , "cuda9"]
-//def dobuilds      = [1, 0, 0, 0]
-//def dofulltests   = [1, 0, 0, 0]
-//def dopytests     = [1, 0, 0, 0]
-//def doruntimes    = [1, 1, 1, 1]
-//def dockerimagesbuild    = ["nvidia/cuda:8.0-cudnn5-devel-ubuntu16.04", "nvidia/cuda:8.0-cudnn5-devel-ubuntu16.04", "nvidia/cuda:9.0-cudnn7-devel-ubuntu16.04", "nvidia/cuda:9.0-cudnn7-devel-ubuntu16.04"]
-//def dockerimagesruntime  = ["nvidia/cuda:8.0-cudnn5-runtime-ubuntu16.04", "nvidia/cuda:8.0-cudnn5-runtime-ubuntu16.04", "nvidia/cuda:9.0-cudnn7-runtime-ubuntu16.04", "nvidia/cuda:9.0-cudnn7-runtime-ubuntu16.04"]
-//def dists         = ["dist1","dist2","dist3","dist4"]
-
+def data_dirs = "-v /home/0xdiag/h2o4gpu/data:/data -v /home/0xdiag/h2o4gpu/open_data:/open_data"
+//################ BELOW IS COPY/PASTE of ci/Jenkinsfile.template (except stage names)
 def benchmark_commit_trigger
 
 pipeline {
@@ -64,12 +51,7 @@ pipeline {
     }
 
     stages {
-        /////////////////////////////////////////////////////////////////////
-        //
-        //
-        //  Avoid mr-dl8 and mr-dl10 for build for now due to permission denied issue
-        /////////////////////////////////////////////////////////////////////
-        stage("Git clone on Linux nccl-cuda8") {
+        stage("Git clone on Linux x86_64-cuda92") {
 
             agent {
                 label "${labelbuild}"
@@ -88,25 +70,24 @@ pipeline {
                             userRemoteConfigs                : scm.userRemoteConfigs])
                 }
                 script {
-                    h2o4gpuUtils = load "Jenkinsfile.utils"
+                    h2o4gpuUtils = load "ci/Jenkinsfile.utils"
                     buildInfo("h2o4gpu", h2o4gpuUtils.isRelease())
                     commitMessage = sh(script: 'git log -1 --pretty=%B | tr "\n" " "', returnStdout: true).trim()
                     echo "Commit Message: ${commitMessage}"
                     benchmark_commit_trigger = ("${commitMessage}" ==~ /.*trigger_benchmark.*/)
                     echo "benchmark_commit_trigger: ${benchmark_commit_trigger}"
                 }
-                stash includes: "Jenkinsfile*", name: "jenkinsfiles"
+                stash includes: "ci/Jenkinsfile*", name: "jenkinsfiles"
             }
         }
-        stage("Build Wheel on Linux nccl-cuda8") {
-
+        stage("Build on Centos7 x86_64-cuda92") {
             agent {
                 label "${labelbuild}"
             }
             when {
                 expression {
                     unstash "jenkinsfiles"
-                    h2o4gpuUtils = load "Jenkinsfile.utils"
+                    h2o4gpuUtils = load "ci/Jenkinsfile.utils"
                     return "${doingbenchmark}" == "1" || h2o4gpuUtils.doBuild() || h2o4gpuUtils.doTests()
                 }
             }
@@ -123,8 +104,8 @@ pipeline {
                             userRemoteConfigs                : scm.userRemoteConfigs])
                 }
                 script {
-                    h2o4gpuUtils = load "Jenkinsfile.utils"
-                    h2o4gpuUtils.buildOnLinux("${cuda}", "${extratag}", "${dist}", "${linuxwheel}")
+                    h2o4gpuUtils = load "ci/Jenkinsfile.utils"
+                    h2o4gpuUtils.buildOnLinux("${cuda}", "${extratag}", "${platform}", "${linuxwheel}")
 
                     buildInfo("h2o4gpu", h2o4gpuUtils.isRelease())
 
@@ -141,14 +122,14 @@ pipeline {
             }
         }
 
-        stage("Test Wheel & Pylint & S3up on Linux nccl-cuda8") {
+        stage("Test | Lint | S3up on Centos7 x86_64-cuda92") {
             agent {
                 label "${labeltest}"
             }
             when {
                 expression {
                     unstash "jenkinsfiles"
-                    h2o4gpuUtils = load "Jenkinsfile.utils"
+                    h2o4gpuUtils = load "ci/Jenkinsfile.utils"
                     return  "${doingbenchmark}" == "1" || h2o4gpuUtils.doTests() && (h2o4gpuUtils.rerun_disabled(commitMessage) || !h2o4gpuUtils.wasStageSuccessful("Fast Test Wheel & Pylint & S3up on Linux"))
                 }
             }
@@ -156,39 +137,46 @@ pipeline {
                 dumpInfo 'Linux Test Info'
                 // Get source code (should put tests into wheel, then wouldn't have to checkout)
                 retryWithTimeout(200 /* seconds */, 3 /* retries */) {
-                    checkout scm
+                    deleteDir()
+                    checkout([
+                            $class                           : 'GitSCM',
+                            branches                         : scm.branches,
+                            doGenerateSubmoduleConfigurations: false,
+                            extensions                       : scm.extensions + [[$class: 'SubmoduleOption', disableSubmodules: true, recursiveSubmodules: false, reference: '', trackingSubmodules: false, shallow: true]],
+                            submoduleCfg                     : [],
+                            userRemoteConfigs                : scm.userRemoteConfigs])
                 }
                 script {
                     unstash 'version_info'
                     sh """
                         echo "Before Stashed wheel file:"
-                        ls -l src/interface_py/${dist}/  || true
-                        rm -rf src/interface_py/${dist}/ || true
+                        ls -l src/interface_py/dist/${platform}/  || true
+                        rm -rf src/interface_py/dist/${platform}/ || true
                        """
                     unstash "${linuxwheel}"
                     sh """
                         echo "After Stashed wheel file:"
-                        ls -l src/interface_py/${dist}/  || true
+                        ls -l src/interface_py/dist/${platform}/  || true
                        """
-                    h2o4gpuUtils.runTests(buildInfo.get(), "${cuda}", "${extratag}", "${dist}", "${testtype}")
+                    h2o4gpuUtils.runTests(buildInfo.get(), "${cuda}", "${extratag}", "${platform}", "${testtype}", "${data_dirs}")
                 }
                 retryWithTimeout(500 /* seconds */, 5 /* retries */) {
                     withCredentials([[$class: 'AmazonWebServicesCredentialsBinding', credentialsId: "awsArtifactsUploader"]]) {
                         script {
-                            h2o4gpuUtils.publishToS3(buildInfo.get(), "${extratag}" , "${dist}")
+                            h2o4gpuUtils.publishToS3(buildInfo.get(), "${extratag}" , "${platform}")
                         }
                     }
                 }
             }
         }
-        stage("Build/Publish Runtime Docker Linux nccl-cuda8") {
+        stage("Build/Publish Runtime Docker Centos7 x86_64-cuda92") {
             agent {
                 label "${labelruntime}"
             }
             when {
                 expression {
                     unstash "jenkinsfiles"
-                    h2o4gpuUtils = load "Jenkinsfile.utils"
+                    h2o4gpuUtils = load "ci/Jenkinsfile.utils"
                     return "${doruntime}" == "1" && h2o4gpuUtils.doRuntime()
                 }
             }
@@ -208,19 +196,19 @@ pipeline {
                 script {
                     sh """
                         echo "Before Stashed wheel file:"
-                        ls -l src/interface_py/${dist}/  || true
-                        rm -rf src/interface_py/${dist}/ || true
+                        ls -l src/interface_py/dist/${platform}/  || true
+                        rm -rf src/interface_py/dist/${platform}/ || true
                        """
                     unstash "${linuxwheel}"
                     sh """
                         echo "After Stashed wheel file:"
-                        ls -l src/interface_py/${dist}/  || true
+                        ls -l src/interface_py/dist/${platform}  || true
                        """
                     unstash 'version_info'
                     sh 'echo "Stashed version file:" && ls -l build/'
                 }
                 script {
-                    h2o4gpuUtils.buildRuntime(buildInfo.get(), "${cudart}", "${dist}", "${extratag}")
+                    h2o4gpuUtils.buildRuntime(buildInfo.get(), "${cudart}", "${platform}", "${extratag}", "${data_dirs}")
                 }
                 retryWithTimeout(1000 /* seconds */, 5 /* retries */) {
                 withCredentials([[$class: 'AmazonWebServicesCredentialsBinding', credentialsId: "awsArtifactsUploader"]]) {
@@ -232,14 +220,14 @@ pipeline {
             }
         }
 
-        stage("Benchmarking Linux nccl-cuda8") {
+        stage("Benchmarking Linux x86_64-cuda92") {
             agent {
                 label 'master'
             }
             when {
                 expression {
                     unstash "jenkinsfiles"
-                    h2o4gpuUtils = load "Jenkinsfile.utils"
+                    h2o4gpuUtils = load "ci/Jenkinsfile.utils"
                     echo "benchmark_commit_trigger: ${benchmark_commit_trigger}"
                     return "${doingbenchmark}" == "1" || (("${benchmark_commit_trigger}"=="true" || h2o4gpuUtils.doTriggerBenchmarksJob()) && "${dobenchmark}" == "1" && env.BRANCH_NAME == "master")
                 }
diff --git a/Jenkinsfile.utils2 b/ci/Jenkinsfile.template
similarity index 80%
rename from Jenkinsfile.utils2
rename to ci/Jenkinsfile.template
index 3bce21ade..376d437d5 100644
--- a/Jenkinsfile.utils2
+++ b/ci/Jenkinsfile.template
@@ -1,16 +1,3 @@
-// Just Notes:
-//
-//def jobnums       = [0 , 1 , 2  , 3]
-//def tags          = ["nccl" , "nonccl" , "nccl"  , "nonccl"]
-//def cudatags      = ["cuda8", "cuda8"  , "cuda9" , "cuda9"]
-//def dobuilds      = [1, 0, 0, 0]
-//def dofulltests   = [1, 0, 0, 0]
-//def dopytests     = [1, 0, 0, 0]
-//def doruntimes    = [1, 1, 1, 1]
-//def dockerimagesbuild    = ["nvidia/cuda:8.0-cudnn5-devel-ubuntu16.04", "nvidia/cuda:8.0-cudnn5-devel-ubuntu16.04", "nvidia/cuda:9.0-cudnn7-devel-ubuntu16.04", "nvidia/cuda:9.0-cudnn7-devel-ubuntu16.04"]
-//def dockerimagesruntime  = ["nvidia/cuda:8.0-cudnn5-runtime-ubuntu16.04", "nvidia/cuda:8.0-cudnn5-runtime-ubuntu16.04", "nvidia/cuda:9.0-cudnn7-runtime-ubuntu16.04", "nvidia/cuda:9.0-cudnn7-runtime-ubuntu16.04"]
-//def dists         = ["dist1","dist2","dist3","dist4"]
-
 def benchmark_commit_trigger
 
 pipeline {
@@ -32,11 +19,6 @@ pipeline {
     }
 
     stages {
-        /////////////////////////////////////////////////////////////////////
-        //
-        //
-        //  Avoid mr-dl8 and mr-dl10 for build for now due to permission denied issue
-        /////////////////////////////////////////////////////////////////////
         stage("Git clone on Linux") {
 
             agent {
@@ -56,25 +38,24 @@ pipeline {
                             userRemoteConfigs                : scm.userRemoteConfigs])
                 }
                 script {
-                    h2o4gpuUtils = load "Jenkinsfile.utils"
+                    h2o4gpuUtils = load "ci/Jenkinsfile.utils"
                     buildInfo("h2o4gpu", h2o4gpuUtils.isRelease())
                     commitMessage = sh(script: 'git log -1 --pretty=%B | tr "\n" " "', returnStdout: true).trim()
                     echo "Commit Message: ${commitMessage}"
                     benchmark_commit_trigger = ("${commitMessage}" ==~ /.*trigger_benchmark.*/)
                     echo "benchmark_commit_trigger: ${benchmark_commit_trigger}"
                 }
-                stash includes: "Jenkinsfile*", name: "jenkinsfiles"
+                stash includes: "ci/Jenkinsfile*", name: "jenkinsfiles"
             }
         }
-        stage("Build Wheel on Linux") {
-
+        stage("Build on Centos7") {
             agent {
                 label "${labelbuild}"
             }
             when {
                 expression {
                     unstash "jenkinsfiles"
-                    h2o4gpuUtils = load "Jenkinsfile.utils"
+                    h2o4gpuUtils = load "ci/Jenkinsfile.utils"
                     return "${doingbenchmark}" == "1" || h2o4gpuUtils.doBuild() || h2o4gpuUtils.doTests()
                 }
             }
@@ -91,8 +72,8 @@ pipeline {
                             userRemoteConfigs                : scm.userRemoteConfigs])
                 }
                 script {
-                    h2o4gpuUtils = load "Jenkinsfile.utils"
-                    h2o4gpuUtils.buildOnLinux("${cuda}", "${extratag}", "${dist}", "${linuxwheel}")
+                    h2o4gpuUtils = load "ci/Jenkinsfile.utils"
+                    h2o4gpuUtils.buildOnLinux("${cuda}", "${extratag}", "${platform}", "${linuxwheel}")
 
                     buildInfo("h2o4gpu", h2o4gpuUtils.isRelease())
 
@@ -109,14 +90,14 @@ pipeline {
             }
         }
 
-        stage("Test Wheel & Pylint & S3up on Linux") {
+        stage("Test | Lint | S3up on Centos7") {
             agent {
                 label "${labeltest}"
             }
             when {
                 expression {
                     unstash "jenkinsfiles"
-                    h2o4gpuUtils = load "Jenkinsfile.utils"
+                    h2o4gpuUtils = load "ci/Jenkinsfile.utils"
                     return  "${doingbenchmark}" == "1" || h2o4gpuUtils.doTests() && (h2o4gpuUtils.rerun_disabled(commitMessage) || !h2o4gpuUtils.wasStageSuccessful("Fast Test Wheel & Pylint & S3up on Linux"))
                 }
             }
@@ -124,39 +105,46 @@ pipeline {
                 dumpInfo 'Linux Test Info'
                 // Get source code (should put tests into wheel, then wouldn't have to checkout)
                 retryWithTimeout(200 /* seconds */, 3 /* retries */) {
-                    checkout scm
+                    deleteDir()
+                    checkout([
+                            $class                           : 'GitSCM',
+                            branches                         : scm.branches,
+                            doGenerateSubmoduleConfigurations: false,
+                            extensions                       : scm.extensions + [[$class: 'SubmoduleOption', disableSubmodules: true, recursiveSubmodules: false, reference: '', trackingSubmodules: false, shallow: true]],
+                            submoduleCfg                     : [],
+                            userRemoteConfigs                : scm.userRemoteConfigs])
                 }
                 script {
                     unstash 'version_info'
                     sh """
                         echo "Before Stashed wheel file:"
-                        ls -l src/interface_py/${dist}/  || true
-                        rm -rf src/interface_py/${dist}/ || true
+                        ls -l src/interface_py/dist/${platform}/  || true
+                        rm -rf src/interface_py/dist/${platform}/ || true
                        """
                     unstash "${linuxwheel}"
                     sh """
                         echo "After Stashed wheel file:"
-                        ls -l src/interface_py/${dist}/  || true
+                        ls -l src/interface_py/dist/${platform}/  || true
                        """
-                    h2o4gpuUtils.runTests(buildInfo.get(), "${cuda}", "${extratag}", "${dist}", "${testtype}")
+                    h2o4gpuUtils.runTests(buildInfo.get(), "${cuda}", "${extratag}", "${platform}", "${testtype}", "${data_dirs}")
                 }
                 retryWithTimeout(500 /* seconds */, 5 /* retries */) {
                     withCredentials([[$class: 'AmazonWebServicesCredentialsBinding', credentialsId: "awsArtifactsUploader"]]) {
                         script {
-                            h2o4gpuUtils.publishToS3(buildInfo.get(), "${extratag}" , "${dist}")
+                            h2o4gpuUtils.publishToS3(buildInfo.get(), "${extratag}" , "${platform}")
                         }
                     }
                 }
             }
         }
-        stage("Build/Publish Runtime Docker Linux") {
+        stage("Build/Publish Runtime Docker Centos7") {
             agent {
                 label "${labelruntime}"
             }
             when {
                 expression {
                     unstash "jenkinsfiles"
-                    h2o4gpuUtils = load "Jenkinsfile.utils"
+                    h2o4gpuUtils = load "ci/Jenkinsfile.utils"
                     return "${doruntime}" == "1" && h2o4gpuUtils.doRuntime()
                 }
             }
@@ -176,19 +164,19 @@ pipeline {
                 script {
                     sh """
                         echo "Before Stashed wheel file:"
-                        ls -l src/interface_py/${dist}/  || true
-                        rm -rf src/interface_py/${dist}/ || true
+                        ls -l src/interface_py/dist/${platform}/  || true
+                        rm -rf src/interface_py/dist/${platform}/ || true
                        """
                     unstash "${linuxwheel}"
                     sh """
                         echo "After Stashed wheel file:"
-                        ls -l src/interface_py/${dist}/  || true
+                        ls -l src/interface_py/dist/${platform}  || true
                        """
                     unstash 'version_info'
                     sh 'echo "Stashed version file:" && ls -l build/'
                 }
                 script {
-                    h2o4gpuUtils.buildRuntime(buildInfo.get(), "${cudart}", "${dist}", "${extratag}")
+                    h2o4gpuUtils.buildRuntime(buildInfo.get(), "${cudart}", "${platform}", "${extratag}", "${data_dirs}")
                 }
                 retryWithTimeout(1000 /* seconds */, 5 /* retries */) {
                 withCredentials([[$class: 'AmazonWebServicesCredentialsBinding', credentialsId: "awsArtifactsUploader"]]) {
@@ -207,7 +195,7 @@ pipeline {
             when {
                 expression {
                     unstash "jenkinsfiles"
-                    h2o4gpuUtils = load "Jenkinsfile.utils"
+                    h2o4gpuUtils = load "ci/Jenkinsfile.utils"
                     echo "benchmark_commit_trigger: ${benchmark_commit_trigger}"
                     return "${doingbenchmark}" == "1" || (("${benchmark_commit_trigger}"=="true" || h2o4gpuUtils.doTriggerBenchmarksJob()) && "${dobenchmark}" == "1" && env.BRANCH_NAME == "master")
                 }
diff --git a/Jenkinsfile.utils b/ci/Jenkinsfile.utils
similarity index 78%
rename from Jenkinsfile.utils
rename to ci/Jenkinsfile.utils
index 8b6d7d1dd..e9f0044cf 100644
--- a/Jenkinsfile.utils
+++ b/ci/Jenkinsfile.utils
@@ -68,14 +68,16 @@ String changeId() {
     return "-master"
 }
 
-void publishToS3(BuildInfo buildInfo, String extratag, String dist) {
+void publishToS3(BuildInfo buildInfo, String extratag, String platform) {
     echo "Publishing artifact to S3"
 
+    def buildArch = platform.split('-')[0]
+
     def versionTag = buildInfo.getVersion()
     def majorVersionTag = buildInfo.getMajorVersion()
     def artifactId = "h2o4gpu"
-    def artifact = "${artifactId}-${versionTag}-cp36-cp36m-linux_x86_64.whl"
-    def localArtifact = "src/interface_py/${dist}/${artifact}"
+    def artifact = "${artifactId}-${versionTag}-cp36-cp36m-linux_${buildArch}.whl"
+    def localArtifact = "src/interface_py/dist/${platform}/${artifact}"
 
     sh 'echo "S3 defs: $versionTag $artifactId $artifact $localArtifact" '
 
@@ -83,19 +85,34 @@ void publishToS3(BuildInfo buildInfo, String extratag, String dist) {
     def bucket = "s3://h2o-release/h2o4gpu/snapshots/ai/h2o/${artifactId}/${majorVersionTag}${extratag}/"
     sh "s3cmd put ${localArtifact} ${bucket}"
     sh "s3cmd setacl --acl-public  ${bucket}${artifact}"
+    sh "s3cmd del ${bucket}${localArtifact}"
 
     if (isRelease()) {
         bucket = "s3://h2o-release/h2o4gpu/releases/stable/ai/h2o/${artifactId}/${majorVersionTag}${extratag}/"
         sh "s3cmd put ${localArtifact} ${bucket}"
         sh "s3cmd setacl --acl-public  ${bucket}${artifact}"
+
+        // Temporarily also push to a bucket containing build_id, in the long run only this upload should stay
+        // the above one should get deprecated
+        build_id_bucket = "s3://artifacts.h2o.ai/releases/ai/h2o/${artifactId}/${env.BRANCH_NAME}/${env.BUILD_ID}/${platform}${extratag}/"
+        sh "s3cmd put ${localArtifact} ${build_id_bucket}"
+        sh "s3cmd setacl --acl-public  ${build_id_bucket}${artifact}"
     }
     if (isBleedingEdge()) {
         bucket = "s3://h2o-release/h2o4gpu/releases/bleeding-edge/ai/h2o/${artifactId}/${majorVersionTag}${extratag}/"
 
         def nonLocalVersionTag = versionTag.split('\\+')[0]
-        def bleedingEdgeArtifact = "${artifactId}-${nonLocalVersionTag}-cp36-cp36m-linux_x86_64.whl"
+        def bleedingEdgeArtifact = "${artifactId}-${nonLocalVersionTag}-cp36-cp36m-linux_${buildArch}.whl"
+
         sh "s3cmd put ${localArtifact} ${bucket}${bleedingEdgeArtifact}"
         sh "s3cmd setacl --acl-public  ${bucket}${bleedingEdgeArtifact}"
+
+        // Temporarily also push to a bucket containing build_id, in the long run only this upload should stay
+        // the above one should get deprecated
+        build_id_bucket = "s3://artifacts.h2o.ai/snapshots/ai/h2o/${artifactId}/${versionTag}/${platform}${extratag}/"
+        sh "s3cmd put ${localArtifact} ${build_id_bucket}${bleedingEdgeArtifact}"
+        sh "s3cmd setacl --acl-public  ${build_id_bucket}${bleedingEdgeArtifact}"
+
     }
 }
 
@@ -130,50 +147,62 @@ void publishRuntimeToS3(BuildInfo buildInfo,String extratag) {
     }
 }
 
-void runTests(BuildInfo buildInfo, String dockerimage, String extratag, String dist, String target) {
+void runTests(BuildInfo buildInfo, String dockerimage, String extratag, String platform, String target, String data_dirs) {
     echo "Running tests"
 
    try {
         sh """
-            CONTAINER_NAME=${CONTAINER_NAME} extratag=${extratag} dockerimage=${dockerimage} target=${target} dist=${dist} ./scripts/make-docker-runtests.sh
+            DATA_DIRS="${data_dirs}" \
+            CONTAINER_NAME=${CONTAINER_NAME} \
+            extratag=${extratag} \
+            dockerimage=${dockerimage} \
+            target=${target} \
+            platform=${platform} ./scripts/make-docker-runtests.sh
            """
             currentBuild.result = "SUCCESS"
     } catch (error) {
             currentBuild.result = "FAILURE"
             throw error
     } finally {
-        sh "nvidia-docker stop ${CONTAINER_NAME} || true"
+        sh "docker stop ${CONTAINER_NAME} || true"
         // if snapshot and using buildID or hash in docker image, need to rm that container and image here.
         arch 'tmp/*.log'
         arch 'results/*.dat'
-        junit testResults: 'build/test-reports/*.xml', keepLongStdio: true, allowEmptyResults: false
+        junit testResults: 'build/test-reports/*.xml', keepLongStdio: true, allowEmptyResults: true
     }
 }
 
-void buildOnLinux(String dockerimage, String extratag, String dist, String stashName) {
+void buildOnLinux(String dockerimage, String extratag, String platform, String stashName) {
     echo "Building on linux"
 
     withCredentials([[$class: 'AmazonWebServicesCredentialsBinding', credentialsId: "awsArtifactsUploader"]]) {
         try {
             sh """
-                CONTAINER_NAME=${CONTAINER_NAME} extratag=${extratag} dockerimage=${dockerimage} H2O4GPU_BUILD=${env.BUILD_ID} H2O4GPU_SUFFIX=${isRelease() ? "" : "+" + ciVersionSuffix()} makeopts=${env.MAKE_OPTS} dist=${dist} ./scripts/make-docker-devel.sh
+                git clean -f -d -x
+                CONTAINER_NAME=${CONTAINER_NAME} \
+                extratag=${extratag} \
+                dockerimage=${dockerimage} \
+                H2O4GPU_BUILD=${env.BUILD_ID} \
+                H2O4GPU_SUFFIX=${isRelease() ? "" : "+" + ciVersionSuffix()} \
+                makeopts=${env.MAKE_OPTS} \
+                platform=${platform} ./scripts/make-docker-devel.sh
                """
-            stash includes: "src/interface_py/${dist}/*h2o4gpu-*.whl", name: stashName
+            stash includes: "src/interface_py/dist/${platform}/*h2o4gpu-*.whl", name: stashName
             stash includes: 'build/VERSION.txt', name: 'version_info'
             sh "echo \"Archive artifacts\""
-            arch "src/interface_py/${dist}/*h2o4gpu-*.whl"
+            arch "src/interface_py/dist/${platform}/*h2o4gpu-*.whl"
             currentBuild.result = "SUCCESS"
         }  catch (error) {
             currentBuild.result = "FAILURE"
             throw error
         } finally {
-            sh "nvidia-docker stop ${CONTAINER_NAME} || true"
+            sh "docker stop ${CONTAINER_NAME} || true"
             // if snapshot and using buildID or hash in docker image, need to rm that container and image here.
         }
     }
 }
 
-void buildRuntime(BuildInfo buildInfo, String dockerimage, String dist, String extratag) {
+void buildRuntime(BuildInfo buildInfo, String dockerimage, String platform, String extratag, String data_dirs) {
     echo "Building runtime"
 
     def buckettype = "snapshots"
@@ -184,14 +213,21 @@ void buildRuntime(BuildInfo buildInfo, String dockerimage, String dist, String e
     withCredentials([[$class: 'AmazonWebServicesCredentialsBinding', credentialsId: "awsArtifactsUploader"]]) {
         try {
             sh """
-                CONTAINER_NAME=${CONTAINER_NAME} versionTag=${versionTag} extratag=${extratag} encodedFullVersionTag=${encodedFullVersionTag} fullVersionTag=${fullVersionTag} dist=${dist} dockerimage=${dockerimage} ./scripts/make-docker-runtime.sh
+                DATA_DIRS="${data_dirs}" \
+                CONTAINER_NAME=${CONTAINER_NAME} \
+                versionTag=${versionTag} \
+                extratag=${extratag} \
+                encodedFullVersionTag=${encodedFullVersionTag} \
+                fullVersionTag=${fullVersionTag} \
+                platform=${platform} \
+                dockerimage=${dockerimage} ./scripts/make-docker-runtime.sh
                """
             currentBuild.result = "SUCCESS"
         } catch (error) {
             currentBuild.result = "FAILURE"
             throw error
         } finally {
-            sh "nvidia-docker stop ${CONTAINER_NAME} || true"
+            sh "docker stop ${CONTAINER_NAME} || true"
             // if snapshot and using buildID or hash in docker image, need to rm that container and image here.
         }
     }
@@ -271,15 +307,15 @@ def doTests() {
     def changedFiles = buildInfo.get().getChangedFiles()
     if (changedFiles) {
         echo "Looking for 'tests' in ${changedFiles.join(',')}"
-        // Check if the code change touches tests_open
+        // Check if the code change touches tests/python/open_data
         def doTrigger1 = changedFiles.any { filepath ->
-            filepath.startsWith("tests_open")
+            filepath.startsWith("tests/python/open_data")
         }
         def doTrigger2 = changedFiles.any { filepath ->
-            filepath.startsWith("tests_big")
+            filepath.startsWith("tests/python/big")
         }
         def doTrigger3 = changedFiles.any { filepath ->
-            filepath.startsWith("tests_small")
+            filepath.startsWith("tests/python/small")
         }
         def doTrigger4 = changedFiles.any { filepath ->
             filepath.startsWith("data")
@@ -303,16 +339,15 @@ def doTestperf() {
 
     def changedFiles = buildInfo.get().getChangedFiles()
     if (changedFiles) {
-        echo "Looking for 'testsxgboost' in ${changedFiles.join(',')}"
-        // Check if the code change touches tests_open
+        echo "Looking for 'tests/python/xgboost' in ${changedFiles.join(',')}"
         def doTrigger1 = changedFiles.any { filepath ->
-            filepath.startsWith("testsxgboost")
+            filepath.startsWith("tests/python/xgboost")
         }
         def doTrigger2 = changedFiles.any { filepath ->
-            filepath.startsWith("tests_big")
+            filepath.startsWith("tests/python/big")
         }
         def doTrigger3 = changedFiles.any { filepath ->
-            filepath.startsWith("tests_small")
+            filepath.startsWith("tests/python/small")
         }
         def doTrigger4 = doBuild()
         def doTrigger5 = doTests()
@@ -329,15 +364,14 @@ def doRuntime() {
     def changedFiles = buildInfo.get().getChangedFiles()
     if (changedFiles) {
         echo "Looking for 'examples' in ${changedFiles.join(',')}"
-        // Check if the code change touches tests_open
         def doTrigger1 = changedFiles.any { filepath ->
             filepath.startsWith("examples")
         }
         def doTrigger2 = changedFiles.any { filepath ->
-            filepath.startsWith("tests_big")
+            filepath.startsWith("tests/python/big")
         }
         def doTrigger3 = changedFiles.any { filepath ->
-            filepath.startsWith("tests_small")
+            filepath.startsWith("tests/python/small")
         }
         def doTrigger4 = doBuild()
         echo "doRuntime() Triggers: ${doTrigger1} ${doTrigger2} ${doTrigger3} ${doTrigger4}"
diff --git a/ci/base/Jenkinsfile-ppc64le-cuda8.base b/ci/base/Jenkinsfile-ppc64le-cuda8.base
new file mode 100644
index 000000000..f3a193c8e
--- /dev/null
+++ b/ci/base/Jenkinsfile-ppc64le-cuda8.base
@@ -0,0 +1,25 @@
+// TOOD: rename to @Library('h2o-jenkins-pipeline-lib') _
+@Library('test-shared-library') _
+
+import ai.h2o.ci.Utils
+import static ai.h2o.ci.Utils.banner
+def utilsLib = new Utils()
+import ai.h2o.ci.BuildInfo
+
+def commitMessage = ''
+def h2o4gpuUtils = null
+
+def platform = "ppc64le-centos7-cuda8.0"
+def BUILDTYPE = "cuda8"
+def cuda = "nvidia/cuda-ppc64le:8.0-cudnn5-devel-centos7"
+def cudart = "nvidia/cuda-ppc64le:8.0-cudnn5-runtime-centos7"
+def extratag = "-cuda8"
+def linuxwheel = "ppc64le-centos7-cuda8.whl"
+def testtype = "dotest"
+def labelbuild = "ibm-power || ibm-power-gpu"
+def labeltest = "ibm-power || ibm-power-gpu"
+def labelruntime = "ibm-power || ibm-power-gpu"
+def doingbenchmark = "0"
+def dobenchmark = "0"
+def doruntime = "1"
+def data_dirs = "-v /home/jenkins/smalldata:/smalldata -v /home/jenkins/open_data:/open_data"
diff --git a/ci/base/Jenkinsfile-ppc64le-cuda9.base b/ci/base/Jenkinsfile-ppc64le-cuda9.base
new file mode 100644
index 000000000..623d2b89f
--- /dev/null
+++ b/ci/base/Jenkinsfile-ppc64le-cuda9.base
@@ -0,0 +1,25 @@
+// TOOD: rename to @Library('h2o-jenkins-pipeline-lib') _
+@Library('test-shared-library') _
+
+import ai.h2o.ci.Utils
+import static ai.h2o.ci.Utils.banner
+def utilsLib = new Utils()
+import ai.h2o.ci.BuildInfo
+
+def commitMessage = ''
+def h2o4gpuUtils = null
+
+def platform = "ppc64le-centos7-cuda9.0"
+def BUILDTYPE = "cuda9"
+def cuda = "nvidia/cuda-ppc64le:9.0-cudnn7-devel-centos7"
+def cudart = "nvidia/cuda-ppc64le:9.0-cudnn7-runtime-centos7"
+def extratag = "-cuda9"
+def linuxwheel = "ppc64le-centos7-cuda9.whl"
+def testtype = "dotest"
+def labelbuild = "ibm-power || ibm-power-gpu"
+def labeltest = "ibm-power || ibm-power-gpu"
+def labelruntime = "ibm-power || ibm-power-gpu"
+def doingbenchmark = "0"
+def dobenchmark = "0"
+def doruntime = "1"
+def data_dirs = "-v /home/jenkins/smalldata:/smalldata -v /home/jenkins/open_data:/open_data"
diff --git a/ci/base/Jenkinsfile-x86_64-cuda8.base b/ci/base/Jenkinsfile-x86_64-cuda8.base
new file mode 100644
index 000000000..516dc4ece
--- /dev/null
+++ b/ci/base/Jenkinsfile-x86_64-cuda8.base
@@ -0,0 +1,25 @@
+// TOOD: rename to @Library('h2o-jenkins-pipeline-lib') _
+@Library('test-shared-library') _
+
+import ai.h2o.ci.Utils
+import static ai.h2o.ci.Utils.banner
+def utilsLib = new Utils()
+import ai.h2o.ci.BuildInfo
+
+def commitMessage = ''
+def h2o4gpuUtils = null
+
+def platform = "x86_64-centos7-cuda8.0"
+def BUILDTYPE = "cuda8"
+def cuda = "nvidia/cuda:8.0-cudnn5-devel-centos7"
+def cudart = "nvidia/cuda:8.0-cudnn5-runtime-centos7"
+def extratag = "-cuda8"
+def linuxwheel = "x86_64-centos7-cuda8.whl"
+def testtype = "dotest"
+def labelbuild = "nvidia-docker"
+def labeltest = "gpu && nvidia-docker"
+def labelruntime = "nvidia-docker"
+def doingbenchmark = "0"
+def dobenchmark = "0"
+def doruntime = "1"
+def data_dirs = "-v /home/0xdiag/h2o4gpu/data:/data -v /home/0xdiag/h2o4gpu/open_data:/open_data"
diff --git a/ci/base/Jenkinsfile-x86_64-cuda9.base b/ci/base/Jenkinsfile-x86_64-cuda9.base
new file mode 100644
index 000000000..5875eac99
--- /dev/null
+++ b/ci/base/Jenkinsfile-x86_64-cuda9.base
@@ -0,0 +1,25 @@
+// TOOD: rename to @Library('h2o-jenkins-pipeline-lib') _
+@Library('test-shared-library') _
+
+import ai.h2o.ci.Utils
+import static ai.h2o.ci.Utils.banner
+def utilsLib = new Utils()
+import ai.h2o.ci.BuildInfo
+
+def commitMessage = ''
+def h2o4gpuUtils = null
+
+def platform = "x86_64-centos7-cuda9.0"
+def BUILDTYPE = "cuda9"
+def cuda = "nvidia/cuda:9.0-cudnn7-devel-centos7"
+def cudart = "nvidia/cuda:9.0-cudnn7-runtime-centos7"
+def extratag = "-cuda9"
+def linuxwheel = "x86_64-centos7-cuda9.whl"
+def testtype = "dotest"
+def labelbuild = "nvidia-docker"
+def labeltest = "gpu && nvidia-docker"
+def labelruntime = "nvidia-docker"
+def doingbenchmark = "0"
+def dobenchmark = "0"
+def doruntime = "1"
+def data_dirs = "-v /home/0xdiag/h2o4gpu/data:/data -v /home/0xdiag/h2o4gpu/open_data:/open_data"
diff --git a/ci/base/Jenkinsfile-x86_64-cuda92.base b/ci/base/Jenkinsfile-x86_64-cuda92.base
new file mode 100644
index 000000000..5ad2fcb9c
--- /dev/null
+++ b/ci/base/Jenkinsfile-x86_64-cuda92.base
@@ -0,0 +1,25 @@
+// TOOD: rename to @Library('h2o-jenkins-pipeline-lib') _
+@Library('test-shared-library') _
+
+import ai.h2o.ci.Utils
+import static ai.h2o.ci.Utils.banner
+def utilsLib = new Utils()
+import ai.h2o.ci.BuildInfo
+
+def commitMessage = ''
+def h2o4gpuUtils = null
+
+def platform = "x86_64-centos7-cuda9.2"
+def BUILDTYPE = "cuda92"
+def cuda = "nvidia/cuda:9.2-cudnn7-devel-centos7"
+def cudart = "nvidia/cuda:9.2-cudnn7-runtime-centos7"
+def extratag = "-cuda92"
+def linuxwheel = "x86_64-centos7-cuda92.whl"
+def testtype = "dotest"
+def labelbuild = "nvidia-docker"
+def labeltest = "gpu && nvidia-docker"
+def labelruntime = "nvidia-docker"
+def doingbenchmark = "0"
+def dobenchmark = "0"
+def doruntime = "1"
+def data_dirs = "-v /home/0xdiag/h2o4gpu/data:/data -v /home/0xdiag/h2o4gpu/open_data:/open_data"
diff --git a/Makefile_header.mk b/make/Makefile_header.mk
similarity index 100%
rename from Makefile_header.mk
rename to make/Makefile_header.mk
diff --git a/make/config.mk b/make/config.mk
index 706438995..dd3355373 100644
--- a/make/config.mk
+++ b/make/config.mk
@@ -1,13 +1,52 @@
 #
-# Build configuration
+# BUILD CONFIGURATION VARIABLES
+#
+
+# Set to 1 or ON to build with NVTX support
+USENVTX=0
+
+# By default 0 means Release, set to "Debug" if you want to compile sources with debug flags
+CMAKE_BUILD_TYPE=0
+
+$(warning USENVTX is $(USENVTX))
+$(warning CMAKE_BUILD_TYPE is $(CMAKE_BUILD_TYPE))
+
+#
+# PROJECT DEPENDENCY RELATED VARIABLES
 #
 
-# Location of artifacts
-# E.g. "s3://bucket/dirname"
-ARTIFACTS_BUCKET = s3://artifacts.h2o.ai/releases
 # Location of local directory with dependencies
 DEPS_DIR = deps
 
+# NCCL support in XGBoost. To turn off set USENCCL=0 during build
+USENCCL=1
+
+# By default build both CPU and GPU variant
+USECUDA=1
+
+ifeq ($(USECUDA), 0)
+    $(warning Building with only CPU support ON.)
+    XGBOOST_TARGET=libxgboost-cpu
+else
+    ifeq ($(USENCCL), 0)
+        $(warning XGBoost NCCL support is OFF.)
+        XGBOOST_TARGET=libxgboost2
+    else
+        $(warning XGBoost NCCL support is ON.)
+        XGBOOST_TARGET=libxgboost
+    endif
+    CUDA_LIB=$(CUDA_HOME)/lib64
+    MAKEFILE_CUDA_VERSION ?= $(shell ls $(CUDA_LIB)/libcudart.so.* | head -1 | rev | cut -d "." -f -2 | rev)
+    CUDA_MAJOR_VERSION = $(shell echo $(MAKEFILE_CUDA_VERSION) | cut -d "." -f 1)
+endif
+
+# PyDataTable version. Currently not used in the code.
+#PYDATATABLE_VERSION = 0.1.0+master.97
+
+#
+# TEST DATA VARIABLES
+#
+
 # Location of datasets
 SMALLDATA_BUCKET = s3://h2o-public-test-data/smalldata
 
@@ -24,17 +63,58 @@ OPEN_DATA_BUCKET = s3://h2o-public-test-data/h2o4gpu/open_data
 OPEN_DATA_DIR = open_data
 
 #
-# PyDataTable
+# R PACKAGE CONFIGURATIONS
 #
-#PYDATATABLE_VERSION = 0.1.0+master.97
+INSTALL_R = 1
+R_VERSION = 3.1.0
 
 #
-# XGBoost
+# VARIABLES USED DURING BUILD - YOU PROBABLY DON'T WANT TO CHANGE THESE
 #
-XGBOOST_VERSION = 0.6
 
-#
-# R package Configurations
-#
-INSTALL_R = 1
-R_VERSION = 3.1.0
+# Build version
+MAJOR_MINOR=$(shell echo $(BASE_VERSION) | sed 's/.*\(^[0-9][0-9]*\.[0-9][0-9]*\).*/\1/g' )
+
+# OS info for Python
+# Python has crazy ideas about os names
+OS := $(shell uname)
+ifeq ($(OS), Darwin)
+    PY_OS ?= "macosx"
+else
+	PY_OS ?= $(OS)
+endif
+
+PYTHON ?= python
+
+# UUID for logs
+RANDOM := $(shell bash -c 'echo $$RANDOM')
+LOGEXT=$(RANDOM)$(shell date +'_%Y.%m.%d-%H:%M:%S')
+
+# Utilize all procs in certain tasks
+NUMPROCS := $(shell cat /proc/cpuinfo|grep processor|wc -l)
+
+# Docker image tagging
+DOCKER_VERSION_TAG ?= "latest"
+
+# BUILD_INFO setup
+H2O4GPU_COMMIT ?= $(shell git rev-parse HEAD)
+H2O4GPU_BUILD_DATE := $(shell date)
+H2O4GPU_BUILD ?= "LOCAL BUILD @ $(shell git rev-parse --short HEAD) build at $(H2O4GPU_BUILD_DATE)"
+H2O4GPU_SUFFIX ?= "+local_$(shell git describe --always --dirty)"
+
+# Setup S3 access credentials
+S3_CMD_LINE := aws s3
+
+DIST_DIR = dist
+
+ARCH := $(shell arch)
+ifdef CUDA_MAJOR_VERSION
+    PLATFORM = $(ARCH)-centos7-cuda$(MAKEFILE_CUDA_VERSION)
+else
+    PLATFORM = $(ARCH)-centos7-cpu
+endif
+
+DOCKER_ARCH=
+ifeq (${ARCH}, ppc64le)
+    DOCKER_ARCH="-ppc64le"
+endif
diff --git a/scripts/daal/install_daal.sh b/scripts/daal/install_daal.sh
index 8729bc79d..907c262e6 100644
--- a/scripts/daal/install_daal.sh
+++ b/scripts/daal/install_daal.sh
@@ -8,14 +8,20 @@ set -e
 
 _intel_dall_tar="https://s3.amazonaws.com/intel-daal/daal-linux_x86_64__cp36.tar.gz"
 
-axel -a -n 20 $_intel_dall_tar && tar xzvf daal-linux_x86_64__cp36.tar.gz -C $HOME &&
-rm -rf daal-linux_x86_64__cp36.tar.gz &&
-eval "$(/root/.pyenv/bin/pyenv init -)" && 
-pip install $HOME/daal/pydaal-2018.0.1.20171012-cp36-none-linux_x86_64.whl &&
-ln -sf $HOME/daal/lib/libtbb.so.2 /usr/lib/libtbb.so.2 &&
-ln -sf $HOME/daal/lib/libtbb.so /usr/lib/libtbb.so &&
-ln -sf $HOME/daal/lib/libtbbmalloc.so.2 /usr/lib/libtbbmalloc.so.2 &&
-ln -sf $HOME/daal/lib/libtbbmalloc.so /usr/lib/libtbbmalloc.so &&
-ln -sf $HOME/daal/lib/libdaal_sequential.so /usr/lib/libdaal_sequential.so &&
-ln -sf $HOME/daal/lib/libdaal_core.so /usr/lib/libdaal_core.so &&
+if hash axel 2>/dev/null; then
+    axel -a -n 20 $_intel_dall_tar
+else
+    wget $_intel_dall_tar
+fi
+
+tar xzvf daal-linux_x86_64__cp36.tar.gz -C $HOME
+rm -rf daal-linux_x86_64__cp36.tar.gz
+
+pip install $HOME/daal/pydaal-2018.0.1.20171012-cp36-none-linux_x86_64.whl
+ln -sf $HOME/daal/lib/libtbb.so.2 /usr/lib/libtbb.so.2
+ln -sf $HOME/daal/lib/libtbb.so /usr/lib/libtbb.so
+ln -sf $HOME/daal/lib/libtbbmalloc.so.2 /usr/lib/libtbbmalloc.so.2
+ln -sf $HOME/daal/lib/libtbbmalloc.so /usr/lib/libtbbmalloc.so
+ln -sf $HOME/daal/lib/libdaal_sequential.so /usr/lib/libdaal_sequential.so
+ln -sf $HOME/daal/lib/libdaal_core.so /usr/lib/libdaal_core.so
 ln -sf $HOME/daal/lib/libdaal_thread.so /usr/lib/libdaal_thread.so
diff --git a/scripts/daal/install_daal_locally.sh b/scripts/daal/install_daal_locally.sh
deleted file mode 100644
index 5f771ea61..000000000
--- a/scripts/daal/install_daal_locally.sh
+++ /dev/null
@@ -1,49 +0,0 @@
-#!/bin/bash
-#===============================================================================
-# name: install_daal_locally.sh
-#
-# installs to the system intel daal libraries and pydaal (python version of daal)
-#===============================================================================
-set -e
-
-_intel_dall_tar="https://s3.amazonaws.com/intel-daal/daal-linux_x86_64__cp36.tar.gz"
-
-function daal_downloaded {
-	if [ -f "$HOME/daal/pydaal-2018.0.1.20171012-cp36-none-linux_x86_64.whl" ]; then
-		echo "PyDAAL wheel already downloaded";
-	else
-		echo "PyDAAL wheel must be downloaded, this may take a while.";
-	fi
-}
-
-function pip_wheel {
-	echo "Installing PyDAAL ..."
-	pip install $HOME/daal/pydaal-2018.0.1.20171012-cp36-none-linux_x86_64.whl &&
-	sudo ln -sf $HOME/daal/lib/libtbb.so.2 /usr/lib/libtbb.so.2 &&
-	sudo ln -sf $HOME/daal/lib/libtbb.so /usr/lib/libtbb.so &&
-	sudo ln -sf $HOME/daal/lib/libtbbmalloc.so.2 /usr/lib/libtbbmalloc.so.2 &&
-	sudo ln -sf $HOME/daal/lib/libtbbmalloc.so /usr/lib/libtbbmalloc.so &&
-	sudo ln -sf $HOME/daal/lib/libdaal_sequential.so /usr/lib/libdaal_sequential.so &&
-	sudo ln -sf $HOME/daal/lib/libdaal_core.so /usr/lib/libdaal_core.so &&
-	sudo ln -sf $HOME/daal/lib/libdaal_thread.so /usr/lib/libdaal_thread.so
-}
-
-function install_daal {
-	echo "Unpacking PyDAAL wheel ..."
-	tar xzvf daal-linux_x86_64__cp36.tar.gz -C $HOME &&
-	rm -rf daal-linux_x86_64__cp36.tar.gz &&
-	eval "$(/root/.pyenv/bin/pyenv init -)" && 
-	pip_wheel
-}
-
-# detect if axel is installed
-daal_downloaded
-if [[ $? -ne 0 ]]; then
-	if hash axel 2>/dev/null; then
-		axel -a -n 20 $_intel_dall_tar && install_daal
-	else
-		wget $_intel_dall_tar && install_daal
-	fi
-else
-	pip_wheel
-fi
diff --git a/scripts/data_prep.R b/scripts/data_prep.R
index 1b0dfb1ed..70539cc92 100644
--- a/scripts/data_prep.R
+++ b/scripts/data_prep.R
@@ -5,19 +5,19 @@
 #'@param save_csv_path Path to save processed data as a csv
 #'@param max_label_encoding_levels The maximum number of uniques required in a column to consider it a categorical variable. Default is 1000
 prep_data <- function(data_table, response, save_csv_path = NULL, max_label_encoding_levels = 1000){
-  
+
   if (!is.data.table(data_table)) {
     stop ("Input data should be of type data.table")
   }
-  
+
   if (is.character(response)) {
     print(paste0("Response is -> ",response))
   } else {
     print(paste0("Response is -> ",colnames(data_table)[response]))
   }
-  
+
   print(paste0("Number of columns: ", ncol(data_table)))
-  
+
   print(paste0("Number of rows: ", nrow(data_table)))
 
   ## Label-encoding of categoricals (those cols with fewer than `label_encoding_levels` levels, but not constant)
@@ -26,7 +26,7 @@ prep_data <- function(data_table, response, save_csv_path = NULL, max_label_enco
   for (ff in feature.names) {
     tt <- uniqueN(data_table[[ff]])
     if (tt <= max_label_encoding_levels && tt > 1) {
-      data_table[, (ff):=factor(data_table[[ff]])]  
+      data_table[, (ff):=factor(data_table[[ff]])]
       print(paste0(ff," has ",tt," levels"))
     }
     if (tt < 2) {
@@ -34,37 +34,37 @@ prep_data <- function(data_table, response, save_csv_path = NULL, max_label_enco
       data_table[, (ff):=NULL]
     }
   }
-  
+
   print(paste0("Number of columns after label encoding: ", ncol(data_table)))
-  
+
   num_cols <- names(data_table)[which(sapply(data_table, is.numeric))]
   cat_cols <- names(data_table)[which(sapply(data_table, is.factor))]
   print(paste0("Number of numeric columns: ", length(num_cols)))
   print(paste0("Number of categorical columns: ", length(cat_cols)))
-  
+
   ## impute missing values, drop near-const cols and standardize the data
   print("Imputing missing values using mean...")
   cols <- setdiff(num_cols,c(response))
   for (c in cols) {
     data_table[!is.finite(data_table[[c]]), (c):=mean(data_table[[c]], na.rm=TRUE)]
-    if (!is.finite(sd(data_table[[c]])) || sd(data_table[[c]])<1e-4) 
+    if (!is.finite(sd(data_table[[c]])) || sd(data_table[[c]])<1e-4)
       data_table[,(c):=NULL]
     else
       data_table[,(c):=scale(as.numeric(data_table[[c]]))]
   }
   print(paste0("Number of columns after mean imputation: ", ncol(data_table)))
-  
+
   ## one-hot encode the categoricals
   print("One hot encoding data table categoricals only...")
   data_table2 <- as.data.table(model.matrix(data_table[[response]]~., data = data_table[,c(cat_cols), with=FALSE], sparse=FALSE))[,-1]
   print(paste0("Number of columns that have been one hot encoded: ", ncol(data_table2)))
-  
+
   ## add back the numeric columns and assign back to data_table
   print("Add back numeric columns and assign to data table")
   data_table <- data_table2[,(num_cols):=data_table[,num_cols,with=FALSE]]
-  
+
   print(paste0("Final dimensions of data table after pre processing: ", nrow(data_table), " by ", ncol(data_table)))
-  
+
   ## check validity of data
   print(paste0("Number of NA's in final data table after pre processing: ", sum(sapply(data_table, is.na))))
   print(paste0("Number of numeric's in final data table after pre processing: ", sum(sapply(data_table, is.numeric))))
@@ -73,12 +73,12 @@ prep_data <- function(data_table, response, save_csv_path = NULL, max_label_enco
   } else {
     print("Some entries are not finite in final data table after pre processing. Please inspect final data table")
   }
-  
+
   ## save preprocessed file as CSV
   if (!is.null(save_csv_path)) {
     print(paste0("Saving processed data to ", save_csv_path))
     fwrite(data_table, save_csv_path)
   }
-  
+
   return(data_table)
 }
diff --git a/scripts/g++_wrapper.sh b/scripts/g++_wrapper.sh
deleted file mode 100755
index 2a3e4cd78..000000000
--- a/scripts/g++_wrapper.sh
+++ /dev/null
@@ -1,8 +0,0 @@
-#!/bin/bash
-
-ARGS=()
-for var in "$@"; do
-	[ "$var" != '-fno-plt' ] && [ "$var" != '-mtune=haswell' ] && ARGS+=("$var")
-done
-/usr/bin/g++ "${ARGS[@]}"
-
diff --git a/scripts/gcc_wrapper.sh b/scripts/gcc_wrapper.sh
deleted file mode 100755
index de461e95d..000000000
--- a/scripts/gcc_wrapper.sh
+++ /dev/null
@@ -1,8 +0,0 @@
-#!/bin/bash
-
-ARGS=()
-for var in "$@"; do
-	[ "$var" != '-fno-plt' ] && [ "$var" != '-mtune=haswell' ] && ARGS+=("$var")
-done
-/usr/bin/gcc "${ARGS[@]}"
-
diff --git a/scripts/gitshallow_submodules.sh b/scripts/gitshallow_submodules.sh
index 7bb25cedd..d2ecc68b4 100755
--- a/scripts/gitshallow_submodules.sh
+++ b/scripts/gitshallow_submodules.sh
@@ -1,15 +1,12 @@
-
 #!/bin/bash
 git submodule init
 for i in $(git submodule | awk '{print $2}'); do
     spath=$(git config -f .gitmodules --get submodule.$i.path)
     surl=$(git config -f .gitmodules --get submodule.$i.url)
     echo "submodule:" $i $spath $surl
-#    if [ $spath == "cub" ] || [ $spath == "nccl" ] || [ $spath == "py3nvml" ] || [ $spath == "scikit-learn || [ $spath == "xgboost" ] ; then
-    if [ $spath == "cub" ] || [ $spath == "nccl" ] || [ $spath == "py3nvml" ] || [ $spath == "scikit-learn" ] ; then # can't add xgboost because not pulling from master
-#    if [ $spath == "cub" ] || [ $spath == "nccl" ] || [ $spath == "py3nvml" ] ; then # can't add xgboost because not pulling from master
-        git submodule update --depth 1 $spath
-    else
+    if [ $spath == "xgboost" ] || [ $spath == "tests/googletest" ] ; then
         git submodule update $spath
+    else
+        git submodule update --depth 1 $spath
     fi
 done
diff --git a/scripts/make-docker-devel.sh b/scripts/make-docker-devel.sh
index ae28b6571..b0c7eade3 100755
--- a/scripts/make-docker-devel.sh
+++ b/scripts/make-docker-devel.sh
@@ -1,39 +1,33 @@
 #!/bin/bash
 set -e
 
-# split layer and version
-IFS=':' read -ra LAYER_VERSION <<< "${dockerimage}"
-layer=${LAYER_VERSION[0]}
-version=${LAYER_VERSION[1]}
-
-if [ "$layer" == "ubuntu" ]
-then
-	docker=docker
-else
-	docker=nvidia-docker
-fi
+H2O4GPU_BUILD="${H2O4GPU_BUILD:-0}"
+H2O4GPU_SUFFIX="${H2O4GPU_SUFFIX:-''}"
+CONTAINER_NAME="${CONTAINER_NAME:-$(cat /dev/urandom | tr -dc 'a-zA-Z0-9' | fold -w 32 | head -n 1)}"
+makeopts="${makeopts:-}"
 
+DOCKER_CLI='nvidia-docker'
 
 #--build-arg http_proxy=http://172.16.2.142:3128/
 echo "Docker devel - BEGIN"
-$docker build  -t opsh2oai/h2o4gpu-buildversion${extratag}-build -f Dockerfile-build --rm=false --build-arg layer=$layer --build-arg version=$version .
-#-u `id -u`:`id -g`  -w `pwd` -v `pwd`:`pwd`:rw
-$docker run --init --rm --name ${CONTAINER_NAME} -d -t -u root -v /home/0xdiag/h2o4gpu/data:/data -v /home/0xdiag/h2o4gpu/open_data:/open_data -v `pwd`:/dot  --entrypoint=bash opsh2oai/h2o4gpu-buildversion${extratag}-build
+$DOCKER_CLI build -t opsh2oai/h2o4gpu-buildversion${extratag}-build -f Dockerfile-build --rm=false --build-arg docker_name=${dockerimage} .
+
+$DOCKER_CLI run --init --rm --name ${CONTAINER_NAME} -d -t -u root -v `pwd`:/dot  --entrypoint=bash opsh2oai/h2o4gpu-buildversion${extratag}-build
 
 echo "Docker devel - Copying files"
-$docker exec ${CONTAINER_NAME} bash -c 'mkdir -p repo ; cp -a /dot/. ./repo'
+$DOCKER_CLI exec ${CONTAINER_NAME} bash -c 'mkdir -p repo ; cp -a /dot/. ./repo'
 
-echo "setup pyenv, shallow clone, and make fullinstalljenkins with ${H2O4GPU_BUILD} and ${H2O4GPU_SUFFIX}"
-$docker exec ${CONTAINER_NAME} bash -c "eval \"\$(/root/.pyenv/bin/pyenv init -)\" ; /root/.pyenv/bin/pyenv global 3.6.1; cd repo ; ./scripts/gitshallow_submodules.sh ; make ${makeopts} fullinstalljenkins${extratag} H2O4GPU_BUILD=${H2O4GPU_BUILD} H2O4GPU_SUFFIX=${H2O4GPU_SUFFIX}"
+echo "shallow clone, and make buildinstall with ${H2O4GPU_BUILD} and ${H2O4GPU_SUFFIX}"
+$DOCKER_CLI exec ${CONTAINER_NAME} bash -c "cd repo ; make ${makeopts} buildinstall H2O4GPU_BUILD=${H2O4GPU_BUILD} H2O4GPU_SUFFIX=${H2O4GPU_SUFFIX}"
 
 echo "Docker devel - Clean local wheels and Copying wheel from docker"
-rm -rf src/interface_py/${dist}/*.whl
-$docker cp -a ${CONTAINER_NAME}:repo/src/interface_py/${dist} src/interface_py/
+rm -rf src/interface_py/dist/
+$DOCKER_CLI cp -a ${CONTAINER_NAME}:/root/repo/src/interface_py/dist src/interface_py/
 
 echo "Docker devel - Copying VERSION.txt"
-mkdir -p build ; $docker cp ${CONTAINER_NAME}:repo/build/VERSION.txt build/
+mkdir -p build ; $DOCKER_CLI cp ${CONTAINER_NAME}:/root/repo/build/VERSION.txt build/
 
 echo "Docker devel - Stopping docker"
-$docker stop ${CONTAINER_NAME}
+$DOCKER_CLI stop ${CONTAINER_NAME}
 
 echo "Docker devel - END"
diff --git a/scripts/make-docker-runtests.sh b/scripts/make-docker-runtests.sh
index 2253dea96..344013067 100755
--- a/scripts/make-docker-runtests.sh
+++ b/scripts/make-docker-runtests.sh
@@ -2,55 +2,48 @@
 # Requires one has already done(e.g.): make docker-build-nccl-cuda9 to get wheel built or wheel was unstashed on jenkins
 set -e
 
-# split layer and version
-IFS=':' read -ra LAYER_VERSION <<< "${dockerimage}"
-layer=${LAYER_VERSION[0]}
-version=${LAYER_VERSION[1]}
-
-if [ "$layer" == "ubuntu" ]
-then
-	docker=docker
-else
-	docker=nvidia-docker
-fi
+DOCKER_CLI='nvidia-docker'
 
+H2O4GPU_BUILD="${H2O4GPU_BUILD:-0}"
+DATA_DIRS="${DATA_DIRS:-}"
 
 echo "Docker devel test and pylint - BEGIN"
 # --build-arg http_proxy=http://172.16.2.142:3128/
-$docker build  -t opsh2oai/h2o4gpu-buildversion${extratag}-build -f Dockerfile-build --rm=false --build-arg layer=$layer --build-arg version=$version .
+$DOCKER_CLI build  -t opsh2oai/h2o4gpu-buildversion${extratag}-build -f Dockerfile-runtime --rm=false --build-arg docker_name=${dockerimage} .
+
 #-u `id -u`:`id -g`  -w `pwd` -v `pwd`:`pwd`:rw
-$docker run --init --rm --name ${CONTAINER_NAME} -d -t -u root -v /home/0xdiag/h2o4gpu/data:/data -v /home/0xdiag/h2o4gpu/open_data:/open_data -v `pwd`:/dot  --entrypoint=bash opsh2oai/h2o4gpu-buildversion${extratag}-build
+$DOCKER_CLI run --init --rm --name ${CONTAINER_NAME} -d -t -u root ${DATA_DIRS} -v `pwd`:/dot  --entrypoint=bash opsh2oai/h2o4gpu-buildversion${extratag}-build
 
 echo "Docker devel test and pylint - Copying files"
-$docker exec ${CONTAINER_NAME} bash -c 'mkdir -p repo ; cp -a /dot/. ./repo'
-$docker exec ${CONTAINER_NAME} bash -c 'cd ./repo ; ln -sf /data . || true ; ln -sf /open_data . || true'
+$DOCKER_CLI exec ${CONTAINER_NAME} bash -c 'mkdir -p repo ; cp -a /dot/. ./repo'
+$DOCKER_CLI exec ${CONTAINER_NAME} bash -c 'cd ./repo ; ln -sf /data . || true ; ln -sf /open_data . || true'
 
-echo "Docker devel test and pylint - setup pyenv, pip install wheel from ${dist}, make ${target}"
+echo "Docker devel test and pylint - pip install wheel from dist/${platform}, make ${target}"
 
 # Don't use version in wheel name when find so local call to this script works without specific jenkins versions
-# Just ensure clean ${dist}/*.whl before unstash in jenkins
-$docker exec ${CONTAINER_NAME} bash -c 'export HOME=`pwd`; eval "$(/root/.pyenv/bin/pyenv init -)" ; /root/.pyenv/bin/pyenv global 3.6.1; cd repo ; pip install `find /dot/src/interface_py/'${dist}' -name "*h2o4gpu-*.whl"`; pip freeze ; make '${target}
+# Just ensure clean dist/${platform}/*.whl before unstash in jenkins
+$DOCKER_CLI exec ${CONTAINER_NAME} bash -c 'export HOME=`pwd` ; cd repo ; pip install `find /dot/src/interface_py/dist/'${platform}' -name "*h2o4gpu-*.whl"`; pip freeze ; make '${target}
 
 { # try
     echo "Docker devel test and pylint - copy any dat results"
     rm -rf results ; mkdir -p results/
     touch results/emptyresults.dat
-    nvidia-docker cp -a ${CONTAINER_NAME}:repo/results results/
+    $DOCKER_CLI cp -a ${CONTAINER_NAME}:repo/results results/
 } || { # catch
    echo "No results dat files"
 }
 
 echo "Docker devel test and pylint - copy build reports"
 rm -rf build/test-reports ; mkdir -p build/test-reports/
-$docker cp -a ${CONTAINER_NAME}:repo/build/test-reports build/
+$DOCKER_CLI cp -a ${CONTAINER_NAME}:repo/build/test-reports build/
 
 echo "Docker devel test and pylint - copy logs for arch"
 rm -rf tmp ; mkdir -p tmp
-$docker cp -a ${CONTAINER_NAME}:repo/tmp ./
+$DOCKER_CLI cp -a ${CONTAINER_NAME}:repo/tmp ./
 
 echo "Docker devel test and pylint - pylint"
-$docker exec ${CONTAINER_NAME} touch ./repo/src/interface_py/h2o4gpu/__init__.py
-$docker exec ${CONTAINER_NAME} bash -c 'eval "$(/root/.pyenv/bin/pyenv init -)"  ;  /root/.pyenv/bin/pyenv global 3.6.1; cd repo ; make pylint'
+$DOCKER_CLI exec ${CONTAINER_NAME} touch ./repo/src/interface_py/h2o4gpu/__init__.py
+$DOCKER_CLI exec ${CONTAINER_NAME} bash -c 'cd repo ; make pylint'
 
 echo "Docker devel test and pylint - stop"
-$docker stop ${CONTAINER_NAME}
+$DOCKER_CLI stop ${CONTAINER_NAME}
diff --git a/scripts/make-docker-runtime.sh b/scripts/make-docker-runtime.sh
index 6ec1ada59..b38e4aa18 100755
--- a/scripts/make-docker-runtime.sh
+++ b/scripts/make-docker-runtime.sh
@@ -1,61 +1,52 @@
 #!/bin/bash
 set -e
 
-# split layer and version
-IFS=':' read -ra LAYER_VERSION <<< "${dockerimage}"
-layer=${LAYER_VERSION[0]}
-version=${LAYER_VERSION[1]}
-
-if [ "$layer" == "ubuntu" ]
-then
-	docker=docker
-else
-	docker=nvidia-docker
-fi
+DOCKER_CLI='nvidia-docker'
 
+DATA_DIRS="${DATA_DIRS:-}"
 
 echo "Docker runtime - BEGIN"
 
 echo "Docker runtime - Build"
 # wheel=${encodedFullVersionTag}${extratag}/h2o4gpu-${encodedFullVersionTag}-cp36-cp36m-linux_x86_64.whl # use this if want to pull from s3 in Dockerfile-runtime
 #  --build-arg http_proxy=http://172.16.2.142:3128/
-$docker build -t opsh2oai/h2o4gpu-${versionTag}${extratag}-runtime:latest -f Dockerfile-runtime --rm=false --build-arg layer=$layer --build-arg version=$version .
+$DOCKER_CLI build -t opsh2oai/h2o4gpu-${versionTag}${extratag}-runtime:latest -f Dockerfile-runtime --rm=false --build-arg docker_name=${dockerimage} .
 # -u `id -u`:`id -g` -d -t -w `pwd` -v `pwd`:`pwd`:rw
 
 echo "Runtime Docker - Run"
-$docker run --init --rm --name ${CONTAINER_NAME} -d -t -u root -v /home/0xdiag/h2o4gpu/data:/data -v /home/0xdiag/h2o4gpu/open_data:/open_data -v `pwd`:/dot  --entrypoint=bash opsh2oai/h2o4gpu-${versionTag}${extratag}-runtime:latest
+$DOCKER_CLI run --init --rm --name ${CONTAINER_NAME} -d -t -u root ${DATA_DIRS} -v `pwd`:/dot  --entrypoint=bash opsh2oai/h2o4gpu-${versionTag}${extratag}-runtime:latest
 
 echo "Docker runtime - pip install h2o4gpu and pip freeze"
-$docker exec ${CONTAINER_NAME} bash -c '. /h2o4gpu_env/bin/activate ; pip install `find /dot/src/interface_py/'${dist}' -name "*h2o4gpu-*.whl" | xargs ls -tr | tail -1` ; pip freeze'
+$DOCKER_CLI exec ${CONTAINER_NAME} bash -c 'pip install `find /dot/src/interface_py/dist/'${platform}' -name "*h2o4gpu-*.whl" | xargs ls -tr | tail -1` ; pip freeze'
 
 { # try
     echo "Docker runtime - Getting Data"
-    #nvidia-docker exec ${CONTAINER_NAME} bash -c '. /h2o4gpu_env/bin/activate ; mkdir -p scripts ; rm -rf scripts/fcov_get.py ; echo "from sklearn.datasets import fetch_covtype" > ./scripts/fcov_get.py ; echo "cov = fetch_covtype()" >> ./scripts/fcov_get.py'
-    #nvidia-docker exec ${CONTAINER_NAME} bash -c '. /h2o4gpu_env/bin/activate ; cd /jupyter/ ; python ../scripts/fcov_get.py'
-    $docker exec ${CONTAINER_NAME} bash -c 'cd /jupyter/ ; mkdir -p ./scikit_learn_data/covertype ; cp /open_data/covertype/* ./scikit_learn_data/covertype'
-    $docker exec ${CONTAINER_NAME} bash -c 'cd /jupyter/ ; mkdir -p ./scikit_learn_data/lfw_home ; cp -af /open_data/lfw_home ./scikit_learn_data'
-    $docker exec ${CONTAINER_NAME} bash -c 'cd /jupyter/demos ; cp /data/creditcard.csv .'
-    $docker exec ${CONTAINER_NAME} bash -c 'cd /jupyter/demos ; wget https://s3.amazonaws.com/h2o-public-test-data/h2o4gpu/open_data/kmeans_data/h2o-logo.jpg'
-    $docker exec ${CONTAINER_NAME} bash -c 'cd /jupyter/demos ; wget https://s3.amazonaws.com/h2o-public-test-data/h2o4gpu/open_data/Temples-shrines-and-castles-in-Japan-social-media-image.jpg'
-    $docker exec ${CONTAINER_NAME} bash -c 'cd /jupyter/demos ; wget https://s3.amazonaws.com/h2o-public-test-data/h2o4gpu/open_data/china.jpg'
-    $docker exec ${CONTAINER_NAME} bash -c 'cd /jupyter/demos ; cp /data/ipums_1k.csv .'
-    $docker exec ${CONTAINER_NAME} bash -c 'cd /jupyter/demos ; cp /data/ipums.feather .'
-    $docker exec -u root ${CONTAINER_NAME} bash -c 'cd /jupyter/ ; chmod -R a+rwx .'
+    #$DOCKER_CLI exec ${CONTAINER_NAME} bash -c 'mkdir -p scripts ; rm -rf scripts/fcov_get.py ; echo "from sklearn.datasets import fetch_covtype" > ./scripts/fcov_get.py ; echo "cov = fetch_covtype()" >> ./scripts/fcov_get.py'
+    #$DOCKER_CLI exec ${CONTAINER_NAME} bash -c 'cd /jupyter/ ; python ../scripts/fcov_get.py'
+    $DOCKER_CLI exec ${CONTAINER_NAME} bash -c 'cd /jupyter/ ; mkdir -p ./scikit_learn_data/covertype ; cp /open_data/covertype/* ./scikit_learn_data/covertype'
+    $DOCKER_CLI exec ${CONTAINER_NAME} bash -c 'cd /jupyter/ ; mkdir -p ./scikit_learn_data/lfw_home ; cp -af /open_data/lfw_home ./scikit_learn_data'
+    $DOCKER_CLI exec ${CONTAINER_NAME} bash -c 'cd /jupyter/demos ; cp /data/creditcard.csv .'
+    $DOCKER_CLI exec ${CONTAINER_NAME} bash -c 'cd /jupyter/demos ; wget https://s3.amazonaws.com/h2o-public-test-data/h2o4gpu/open_data/kmeans_data/h2o-logo.jpg'
+    $DOCKER_CLI exec ${CONTAINER_NAME} bash -c 'cd /jupyter/demos ; wget https://s3.amazonaws.com/h2o-public-test-data/h2o4gpu/open_data/Temples-shrines-and-castles-in-Japan-social-media-image.jpg'
+    $DOCKER_CLI exec ${CONTAINER_NAME} bash -c 'cd /jupyter/demos ; wget https://s3.amazonaws.com/h2o-public-test-data/h2o4gpu/open_data/china.jpg'
+    $DOCKER_CLI exec ${CONTAINER_NAME} bash -c 'cd /jupyter/demos ; cp /data/ipums_1k.csv .'
+    $DOCKER_CLI exec ${CONTAINER_NAME} bash -c 'cd /jupyter/demos ; cp /data/ipums.feather .'
+    $DOCKER_CLI exec -u root ${CONTAINER_NAME} bash -c 'cd /jupyter/ ; chmod -R a+rwx .'
 } || { # catch
    echo "Some Data Not Obtained"
 }
-$docker commit ${CONTAINER_NAME} opsh2oai/h2o4gpu-${versionTag}${extratag}-runtime:latest
+$DOCKER_CLI commit ${CONTAINER_NAME} opsh2oai/h2o4gpu-${versionTag}${extratag}-runtime:latest
 
 echo "Docker runtime - stopping docker"
-$docker stop ${CONTAINER_NAME}
+$DOCKER_CLI stop ${CONTAINER_NAME}
 
 if [ -z `command -v pbzip2` ]
 then
     echo "Docker runtime - saving docker to local disk -- native system must have bzip2"
-    $docker save opsh2oai/h2o4gpu-${versionTag}${extratag}-runtime | bzip2 > h2o4gpu-${fullVersionTag}${extratag}-runtime.tar.bz2
+    $DOCKER_CLI save opsh2oai/h2o4gpu-${versionTag}${extratag}-runtime | bzip2 > h2o4gpu-${fullVersionTag}${extratag}-runtime.tar.bz2
 else
     echo "Docker runtime - saving docker to local disk -- native system must have pbzip2"
-    $docker save opsh2oai/h2o4gpu-${versionTag}${extratag}-runtime | pbzip2 > h2o4gpu-${fullVersionTag}${extratag}-runtime.tar.bz2
+    $DOCKER_CLI save opsh2oai/h2o4gpu-${versionTag}${extratag}-runtime | pbzip2 > h2o4gpu-${fullVersionTag}${extratag}-runtime.tar.bz2
 fi
 
 echo "Docker runtime - END"
diff --git a/scripts/make_jenkinsfiles.sh b/scripts/make_jenkinsfiles.sh
index 3f78a67d7..ec4395e39 100755
--- a/scripts/make_jenkinsfiles.sh
+++ b/scripts/make_jenkinsfiles.sh
@@ -4,28 +4,31 @@
 # jenkins that stage names have to be static text labels
 
 ## declare an array variable
-declare -a arr=("nccl-cuda8" "nonccl-cuda9" "nccl-cuda9" "cpu" "nonccl-cuda9" "nccl-cuda9-aws1" "nccl-cuda9-benchmark" "nccl-cuda9-aws1-benchmark" "cpu")
+declare -a arr=("x86_64-cuda8" "x86_64-cuda9" "x86_64-cuda92" "ppc64le-cuda8" "ppc64le-cuda9")
 
 ## now loop through the above array
 for i in "${arr[@]}"
 do
    echo "$i"
-   echo "#!/usr/bin/groovy" > Jenkinsfile-$i
-   echo "" >> Jenkinsfile-$i
-   echo "//################ FILE IS AUTO-GENERATED from .base files" >> Jenkinsfile-$i
-   echo "//################ DO NOT MODIFY" >> Jenkinsfile-$i
-   echo "//################ See scripts/make_jenkinsfiles.sh" >> Jenkinsfile-$i
-   echo "" >> Jenkinsfile-$i
-
-   cat Jenkinsfile-$i.base >> Jenkinsfile-$i
-   echo "//################ BELOW IS COPY/PASTE of Jenkinsfile.utils2 (except stage names)" >> Jenkinsfile-$i
-   cat Jenkinsfile.utils2 >> Jenkinsfile-$i
-   sed -i 's/stage\(.*\)\"/stage\1 '$i'\"/g' Jenkinsfile-$i
+   echo "#!/usr/bin/groovy" > ci/Jenkinsfile-$i
+   echo "" >> ci/Jenkinsfile-$i
+   echo "//################ FILE IS AUTO-GENERATED from .base files" >> ci/Jenkinsfile-$i
+   echo "//################ DO NOT MODIFY" >> ci/Jenkinsfile-$i
+   echo "//################ See scripts/make_jenkinsfiles.sh" >> ci/Jenkinsfile-$i
+   echo "" >> ci/Jenkinsfile-$i
+
+   cat ci/base/Jenkinsfile-$i.base >> ci/Jenkinsfile-$i
+   echo "//################ BELOW IS COPY/PASTE of ci/Jenkinsfile.template (except stage names)" >> ci/Jenkinsfile-$i
+   cat ci/Jenkinsfile.template >> ci/Jenkinsfile-$i
+
+   sed -i .bck 's/stage\(.*\)\"/stage\1 '$i'\"/g' ci/Jenkinsfile-$i
 
    if [[ $i == *"benchmark"* ]]; then
        echo "More for benchmarks"
-       sed -i 's/dobenchmark = \"1\"/dobenchmark = \"0\"/g' Jenkinsfile-$i
-       sed -i 's/doruntime = \"1\"/doruntime = \"0\"/g' Jenkinsfile-$i
+       sed -i .bck 's/dobenchmark = \"1\"/dobenchmark = \"0\"/g' ci/Jenkinsfile-$i
+       sed -i .bck 's/doruntime = \"1\"/doruntime = \"0\"/g' ci/Jenkinsfile-$i
    fi
-   
+
+   rm -rf ci/Jenkinsfile-$i.bck
+
 done
diff --git a/scripts/prepare_sklearn.sh b/scripts/prepare_sklearn.sh
index 4d5a74c58..c76db3339 100755
--- a/scripts/prepare_sklearn.sh
+++ b/scripts/prepare_sklearn.sh
@@ -25,35 +25,40 @@ done
 #files=`find -type f | grep -v pycache`
 files=`find -type f | grep -v pycache | awk '{ print length($0) " " $0; }' | sort  -n | cut -d ' ' -f 2-`
 
-for fil in $files
-do
-    echo "Edit contents of $fil"
-    if [[ "$fil" == *".git"* ]]
+function modify_file() {
+    if [[ "$1" == *".git"* ]]
     then
         #echo "skip .git"
         true
     else
-        sed -i 's/sklearn/h2o4gpu/g' $fil
-        sed -i 's/scikit-learn/h2o4gpu/g' $fil
+        sed -i 's/sklearn/h2o4gpu/g' $1
+        sed -i 's/scikit-learn/h2o4gpu/g' $1
         # replace names
-        sed -i 's/\([^_a-zA-Z0-9]\?\)KMeans\([^_a-zA-Z0-9]\?\)/\1KMeansSklearn\2/g' $fil
-        sed -i 's/\([^_a-zA-Z0-9]\?\)Ridge\([^_a-zA-Z0-9]\?\)/\1RidgeSklearn\2/g' $fil
-        sed -i 's/\([^_a-zA-Z0-9]\?\)Lasso\([^_a-zA-Z0-9]\?\)/\1LassoSklearn\2/g' $fil
-        sed -i 's/\([^_a-zA-Z0-9]\?\)LogisticRegression\([^_a-zA-Z0-9]\?\)/\1LogisticRegressionSklearn\2/g' $fil
-        sed -i 's/\([^_a-zA-Z0-9]\?\)LinearRegression\([^_a-zA-Z0-9]\?\)/\1LinearRegressionSklearn\2/g' $fil 
-        sed -i 's/\([^_a-zA-Z0-9]\?\)ElasticNet\([^_a-zA-Z0-9]\?\)/\1ElasticNetSklearn\2/g' $fil	
-        sed -i 's/\([^_a-zA-Z0-9]\?\)GradientBoostingRegressor\([^_a-zA-Z0-9]\?\)/\1GradientBoostingRegressorSklearn\2/g' $fil
-        sed -i 's/\([^_a-zA-Z0-9]\?\)GradientBoostingClassifier\([^_a-zA-Z0-9]\?\)/\1GradientBoostingClassifierSklearn\2/g' $fil
-        sed -i 's/\([^_a-zA-Z0-9]\?\)RandomForestRegressor\([^_a-zA-Z0-9]\?\)/\1RandomForestRegressorSklearn\2/g' $fil
-        sed -i 's/\([^_a-zA-Z0-9]\?\)RandomForestClassifier\([^_a-zA-Z0-9]\?\)/\1RandomForestClassifierSklearn\2/g' $fil
-        sed -i 's/\([^_a-zA-Z0-9]\?\)TruncatedSVD\([^_a-zA-Z0-9]\?\)/\1TruncatedSVDSklearn\2/g' $fil
-        sed -i 's/\([^_a-zA-Z0-9]\?\)PCA\([^_a-zA-Z0-9]\?\)/\1PCASklearn\2/g' $fil
+        sed -i 's/\([^_a-zA-Z0-9]\?\)KMeans\([^_a-zA-Z0-9]\?\)/\1KMeansSklearn\2/g' $1
+        sed -i 's/\([^_a-zA-Z0-9]\?\)Ridge\([^_a-zA-Z0-9]\?\)/\1RidgeSklearn\2/g' $1
+        sed -i 's/\([^_a-zA-Z0-9]\?\)Lasso\([^_a-zA-Z0-9]\?\)/\1LassoSklearn\2/g' $1
+        sed -i 's/\([^_a-zA-Z0-9]\?\)LogisticRegression\([^_a-zA-Z0-9]\?\)/\1LogisticRegressionSklearn\2/g' $1
+        sed -i 's/\([^_a-zA-Z0-9]\?\)LinearRegression\([^_a-zA-Z0-9]\?\)/\1LinearRegressionSklearn\2/g' $1
+        sed -i 's/\([^_a-zA-Z0-9]\?\)ElasticNet\([^_a-zA-Z0-9]\?\)/\1ElasticNetSklearn\2/g' $1
+        sed -i 's/\([^_a-zA-Z0-9]\?\)GradientBoostingRegressor\([^_a-zA-Z0-9]\?\)/\1GradientBoostingRegressorSklearn\2/g' $1
+        sed -i 's/\([^_a-zA-Z0-9]\?\)GradientBoostingClassifier\([^_a-zA-Z0-9]\?\)/\1GradientBoostingClassifierSklearn\2/g' $1
+        sed -i 's/\([^_a-zA-Z0-9]\?\)RandomForestRegressor\([^_a-zA-Z0-9]\?\)/\1RandomForestRegressorSklearn\2/g' $1
+        sed -i 's/\([^_a-zA-Z0-9]\?\)RandomForestClassifier\([^_a-zA-Z0-9]\?\)/\1RandomForestClassifierSklearn\2/g' $1
+        sed -i 's/\([^_a-zA-Z0-9]\?\)TruncatedSVD\([^_a-zA-Z0-9]\?\)/\1TruncatedSVDSklearn\2/g' $1
+        sed -i 's/\([^_a-zA-Z0-9]\?\)PCA\([^_a-zA-Z0-9]\?\)/\1PCASklearn\2/g' $1
 	# avoid duplicate conversions
-        sed -i 's/Sklearn_Sklearn/Sklearn/g' $fil
+        sed -i 's/Sklearn_Sklearn/Sklearn/g' $1
         # other replacements
-        sed -i "s/from \.\. import get_config as _get_config/import os\n_ASSUME_FINITE = bool(os.environ.get('SKLEARN_ASSUME_FINITE', False))\ndef _get_config\(\):\n    return \{'assume_finite': _ASSUME_FINITE\}/g" $fil
+        sed -i "s/from \.\. import get_config as _get_config/import os\n_ASSUME_FINITE = bool(os.environ.get('SKLEARN_ASSUME_FINITE', False))\ndef _get_config\(\):\n    return \{'assume_finite': _ASSUME_FINITE\}/g" $1
     fi
+}
+
+for fil in $files
+do
+    modify_file $fil &
 done
+wait
+
 cd ..
 
 # inject h2o4gpu into scikit-learn
diff --git a/run.sh b/scripts/run.sh
similarity index 92%
rename from run.sh
rename to scripts/run.sh
index 4bdc7983b..37c02cc81 100755
--- a/run.sh
+++ b/scripts/run.sh
@@ -5,8 +5,6 @@
 
 set -e
 
-source h2o4gpu_env/bin/activate
-
 if [ "x$1" != "x" ]; then
     d=$1
     cd $d
diff --git a/src/config2.mk b/src/config2.mk
deleted file mode 100644
index 48d1cdfdb..000000000
--- a/src/config2.mk
+++ /dev/null
@@ -1,37 +0,0 @@
-location = $(CURDIR)/$(word $(words $(MAKEFILE_LIST)),$(MAKEFILE_LIST))
-WHERE := $(location)
-$(info ** -> $(WHERE))
-$(info ** ------------------------------------------------------------------ **)
-
-NVCC := $(shell command -v nvcc 2> /dev/null)
-
-#local settings
-USENCCL=0
-USENVTX=0
-CMAKE_BUILD_TYPE=0
-
-$(warning CMAKE_BUILD_TYPE is $(CMAKE_BUILD_TYPE))
-$(warning USENVTX is $(USENVTX))
-$(warning USENCCL is $(USENCCL))
-
-# for R (rest can do both at same time)
-#TARGET=gpulib
-#$(warning R TARGET is $(TARGET))
-
-ifdef NVCC
-# CUDA Flags for XGBoost
-CUDA_LIB=$(CUDA_HOME)/lib64
-CUDA_VERSION ?= $(shell ls $(CUDA_LIB)/libcudart.so.* | head -1 | rev | cut -d "." -f -2 | rev)
-CUDA_MAJOR = $(shell echo $(CUDA_VERSION) | cut -d "." -f 1)
-ifeq ($(shell test $(CUDA_MAJOR) -ge 9; echo $$?),0)
-    $(warning Compiling with Cuda9 or higher)
-    # >=52 required for kmeans for larger data of size rows/32>2^16
-    XGB_CUDA ?= -DGPU_COMPUTE_VER="35;52;60;61;70"
-else
-    $(warning Compiling with Cuda8 or lower)
-    # >=52 required for kmeans for larger data of size rows/32>2^16
-    XGB_CUDA ?= -DGPU_COMPUTE_VER="35;52;60;61"
-endif
-else
-$(warning No CUDA found.)
-endif
diff --git a/src/interface_py/Makefile b/src/interface_py/Makefile
index dc33e1422..69ffcc8d6 100644
--- a/src/interface_py/Makefile
+++ b/src/interface_py/Makefile
@@ -6,10 +6,12 @@ $(info ** ------------------------------------------------------------------ **)
 SHELL := /bin/bash # force avoidance of dash as shell
 thepwd = "$(shell pwd)"
 
+include ../../make/config.mk
+
 default: all
 
 pylint:
-	$$SHELL test.sh
+	$$SHELL scripts/run-pylint.sh
 
 pyformat:
 	@status=0; \
@@ -22,8 +24,8 @@ pyformat:
 	done; \
 
 prep:
-	PYVER=`python -c 'import sys; print(".".join(map(str, sys.version_info[:2])))' | sed 's/\.//g'` && sed -i 's/python-tag=.*/python-tag=py'$$PYVER'/g' setup.cfg
-	PYVER=`python -c 'import sys; print(".".join(map(str, sys.version_info[:2])))'` && sed -i 's/python_version==.*/python_version=='$$PYVER'/g' setup.cfg
+	PYVER=`$(PYTHON) -c 'import sys; print(".".join(map(str, sys.version_info[:2])))' | sed 's/\.//g'` && sed -i 's/python-tag=.*/python-tag=py'$$PYVER'/g' setup.cfg
+	PYVER=`$(PYTHON) -c 'import sys; print(".".join(map(str, sys.version_info[:2])))'` && sed -i 's/python_version==.*/python_version=='$$PYVER'/g' setup.cfg
 	cd ../../ && bash scripts/apply_sklearn_initmerge.sh # so if change our init it gets updated
 
 all: prep
@@ -32,7 +34,7 @@ all: prep
 	ln -sf ../../xgboost/python-package/xgboost .
 	ln -sf ../../py3nvml/py3nvml .
 
-	python setup.py sdist bdist_wheel
+	$(PYTHON) setup.py sdist bdist_wheel
 
 	# update build with xgboost shared library
 	mkdir -p build/lib/xgboost/
@@ -40,7 +42,7 @@ all: prep
 
 	# Make wheel with other builds added
 	rm -rf dist/*.whl
-	python setup.py sdist bdist_wheel
+	$(PYTHON) setup.py sdist bdist_wheel
 
 	# for pycharm
 	ln -sf $(thepwd)/../interface_c/_ch2o4gpu_cpu.so .
@@ -48,14 +50,14 @@ all: prep
 
 
 install:
-	-cd ../.. && pip uninstall -y h2o4gpu
-	-cd ../.. && pip uninstall -y xgboost
-	-cd ../.. && pip uninstall -y py3nvml
+	-cd ../.. && $(PYTHON) -m pip uninstall -y h2o4gpu
+	-cd ../.. && $(PYTHON) -m pip uninstall -y xgboost
+	-cd ../.. && $(PYTHON) -m pip uninstall -y py3nvml
 	find -name *.pyc | xargs rm -rf
 	find -name *__pycache__ | xargs rm -rf
 	# something wrong with below
-	#python setup.py install
-	pip install dist/h2o4gpu*.whl --upgrade
+	#$(PYTHON) setup.py install
+	$(PYTHON) -m pip install dist/h2o4gpu*.whl --upgrade
 
 clean:
 	rm -f h2o4gpu/BUILD_INFO.txt
@@ -64,8 +66,7 @@ clean:
     # This *is* required
 	rm -f h2o4gpu/__init__.py
 	touch h2o4gpu/__init__.py
-	pip install numpy
-	python setup.py clean --all && rm -rf h2o4gpu.egg-info && rm -rf h2o4gpu/__pycache__/ && rm -rf dist/
+	$(PYTHON) setup.py clean --all && rm -rf h2o4gpu.egg-info && rm -rf h2o4gpu/__pycache__/ && rm -rf dist/
 	cd h2o4gpu && find -L -name *.pyc | xargs rm -rf
 
 
diff --git a/requirements_buildonly.txt b/src/interface_py/requirements_buildonly.txt
similarity index 84%
rename from requirements_buildonly.txt
rename to src/interface_py/requirements_buildonly.txt
index 053795b33..7a0db6a1f 100644
--- a/requirements_buildonly.txt
+++ b/src/interface_py/requirements_buildonly.txt
@@ -4,12 +4,7 @@ attrs==17.3.0
 execnet==1.5.0
 pluggy==0.6.0
 py==1.5.2
-pytest==3.3.1
-pytest-forked==0.2
-pytest-xdist==1.20.1
-pytest-cov==2.4.0
 six==1.11.0
-pylint==1.8.4
 yapf==0.17.0
 coverage==4.4.1
 # docs
@@ -18,7 +13,6 @@ sphinx_rtd_theme==0.2.4
 pillow==4.2.1
 # compile wheel
 wheel==0.31.0
-cmake==3.11.0
 Cython==0.27.3
 # for make testperf
 h2o==3.18.0.10
diff --git a/requirements_runtime.txt b/src/interface_py/requirements_runtime.txt
similarity index 74%
rename from requirements_runtime.txt
rename to src/interface_py/requirements_runtime.txt
index ea8f5e8a6..b4ecd4a38 100644
--- a/requirements_runtime.txt
+++ b/src/interface_py/requirements_runtime.txt
@@ -11,3 +11,8 @@ psutil==5.4.5
 # below for xgboost
 scikit-learn==0.19.1
 sklearn==0.0
+pytest==3.3.1
+pytest-forked==0.2
+pytest-xdist==1.20.1
+pytest-cov==2.4.0
+pylint==1.8.4
diff --git a/requirements_runtime_demos.txt b/src/interface_py/requirements_runtime_demos.txt
similarity index 100%
rename from requirements_runtime_demos.txt
rename to src/interface_py/requirements_runtime_demos.txt
diff --git a/src/interface_py/test.sh b/src/interface_py/scripts/run-pylint.sh
similarity index 100%
rename from src/interface_py/test.sh
rename to src/interface_py/scripts/run-pylint.sh
diff --git a/src/interface_py/setup.py b/src/interface_py/setup.py
index 170bb6f11..4b63cba60 100644
--- a/src/interface_py/setup.py
+++ b/src/interface_py/setup.py
@@ -66,7 +66,7 @@ def run(self):
 
 # reqs is a list of requirement
 # e.g. ['django==1.5.1', 'mezzanine==1.4.6']
-with open("../../requirements_runtime.txt", "r") as fs:
+with open("requirements_runtime.txt", "r") as fs:
     reqs = [r for r in fs.read().splitlines() if (len(r) > 0 and not r.startswith("#"))]
 
 def get_packages(directory):
diff --git a/src/interface_r/vignettes/getting_started.Rmd b/src/interface_r/vignettes/getting_started.Rmd
index 0ad0d29b1..b90797c0d 100644
--- a/src/interface_r/vignettes/getting_started.Rmd
+++ b/src/interface_r/vignettes/getting_started.Rmd
@@ -24,7 +24,7 @@ The R package makes use of RStudio's [reticulate](https://rstudio.github.io/reti
 
 ## Installation
 
-There are a few [system requirements](https://github.com/h2oai/h2o4gpu#requirements), including Ubuntu 16.04+, Python >=3.6, R >=3.1, CUDA 8 or 9, and a machine with Nvidia GPUs.  The code should still run if you have CPUs, but it will fall back to scikit-learn CPU based versions of the algorithms.
+There are a few [system requirements](https://github.com/h2oai/h2o4gpu#requirements), including Linux with glibc 2.17+, Python >=3.6, R >=3.1, CUDA 8 or 9, and a machine with Nvidia GPUs.  The code should still run if you have CPUs, but it will fall back to scikit-learn CPU based versions of the algorithms.
 
 The **h2o4gpu** Python module is a prerequisite for the R package. So first, follow the instructions [here](https://github.com/h2oai/h2o4gpu#user-installation) to install the **h2o4gpu** Python package (either at the system level or in a Python virtual envivonment). The easiest thing to do is to `pip install` the stable release `whl` file. To ensure compatibility, the Python package version number should match the R package version number. 
 
diff --git a/src/swig/ch2o4gpu_cpu.i b/src/swig/ch2o4gpu_cpu.i
index 60a6e1c76..fbd838bb6 100644
--- a/src/swig/ch2o4gpu_cpu.i
+++ b/src/swig/ch2o4gpu_cpu.i
@@ -14,4 +14,4 @@
 %include "solver/elastic_net.i"
 %include "solver/pogs.i"
 %include "matrix/matrix_dense.i"
-%include "metrics.i"
+%include "metrics.i"
\ No newline at end of file
diff --git a/tests_big/getresultsbig.sh b/tests/python/big/getresultsbig.sh
similarity index 100%
rename from tests_big/getresultsbig.sh
rename to tests/python/big/getresultsbig.sh
diff --git a/tests_big/test_glm_hyatt.py b/tests/python/big/test_glm_hyatt.py
similarity index 100%
rename from tests_big/test_glm_hyatt.py
rename to tests/python/big/test_glm_hyatt.py
diff --git a/tests_big/test_glm_ipums.py b/tests/python/big/test_glm_ipums.py
similarity index 100%
rename from tests_big/test_glm_ipums.py
rename to tests/python/big/test_glm_ipums.py
diff --git a/tests_open/daal/test_daal_normalization.py b/tests/python/open_data/daal/test_daal_normalization.py
similarity index 100%
rename from tests_open/daal/test_daal_normalization.py
rename to tests/python/open_data/daal/test_daal_normalization.py
diff --git a/tests_open/daal/test_daal_regression.py b/tests/python/open_data/daal/test_daal_regression.py
similarity index 100%
rename from tests_open/daal/test_daal_regression.py
rename to tests/python/open_data/daal/test_daal_regression.py
diff --git a/tests_open/daal/test_daal_ridge_regression.py b/tests/python/open_data/daal/test_daal_ridge_regression.py
similarity index 100%
rename from tests_open/daal/test_daal_ridge_regression.py
rename to tests/python/open_data/daal/test_daal_ridge_regression.py
diff --git a/tests_open/daal/test_daal_svd.py b/tests/python/open_data/daal/test_daal_svd.py
similarity index 100%
rename from tests_open/daal/test_daal_svd.py
rename to tests/python/open_data/daal/test_daal_svd.py
diff --git a/tests_open/gbm/model_saved.pkl b/tests/python/open_data/gbm/model_saved.pkl
similarity index 100%
rename from tests_open/gbm/model_saved.pkl
rename to tests/python/open_data/gbm/model_saved.pkl
diff --git a/tests_open/gbm/test_gpu_prediction_pickledmodel.py b/tests/python/open_data/gbm/test_gpu_prediction_pickledmodel.py
similarity index 99%
rename from tests_open/gbm/test_gpu_prediction_pickledmodel.py
rename to tests/python/open_data/gbm/test_gpu_prediction_pickledmodel.py
index f0c257551..7e15fba1b 100644
--- a/tests_open/gbm/test_gpu_prediction_pickledmodel.py
+++ b/tests/python/open_data/gbm/test_gpu_prediction_pickledmodel.py
@@ -227,7 +227,7 @@ def test_predict_sklearn_frompickle(self):
         Xtest = makeXtest()
 
         # load model
-        model = load_obj("./tests_open/gbm/model_saved.pkl")
+        model = load_obj("./tests/python/open_data/gbm/model_saved.pkl")
 
         # continue as before
         print("Before model.predict")
diff --git a/tests_open/gbm/test_xgb_sklearn_wrapper.py b/tests/python/open_data/gbm/test_xgb_sklearn_wrapper.py
similarity index 100%
rename from tests_open/gbm/test_xgb_sklearn_wrapper.py
rename to tests/python/open_data/gbm/test_xgb_sklearn_wrapper.py
diff --git a/tests_open/gbm/test_xgboost.py b/tests/python/open_data/gbm/test_xgboost.py
similarity index 100%
rename from tests_open/gbm/test_xgboost.py
rename to tests/python/open_data/gbm/test_xgboost.py
diff --git a/tests_open/gbm/test_xgboost_dtinput.py b/tests/python/open_data/gbm/test_xgboost_dtinput.py
similarity index 100%
rename from tests_open/gbm/test_xgboost_dtinput.py
rename to tests/python/open_data/gbm/test_xgboost_dtinput.py
diff --git a/tests_open/getresults.sh b/tests/python/open_data/getresults.sh
similarity index 100%
rename from tests_open/getresults.sh
rename to tests/python/open_data/getresults.sh
diff --git a/tests_open/glm/test_elastic_net_ptr_driver.py b/tests/python/open_data/glm/test_elastic_net_ptr_driver.py
similarity index 100%
rename from tests_open/glm/test_elastic_net_ptr_driver.py
rename to tests/python/open_data/glm/test_elastic_net_ptr_driver.py
diff --git a/tests_open/glm/test_elastic_net_sklearn.py b/tests/python/open_data/glm/test_elastic_net_sklearn.py
similarity index 100%
rename from tests_open/glm/test_elastic_net_sklearn.py
rename to tests/python/open_data/glm/test_elastic_net_sklearn.py
diff --git a/tests_open/glm/test_elasticnet_sklearn_wrapper.py b/tests/python/open_data/glm/test_elasticnet_sklearn_wrapper.py
similarity index 100%
rename from tests_open/glm/test_elasticnet_sklearn_wrapper.py
rename to tests/python/open_data/glm/test_elasticnet_sklearn_wrapper.py
diff --git a/tests_open/glm/test_glm_credit.py b/tests/python/open_data/glm/test_glm_credit.py
similarity index 100%
rename from tests_open/glm/test_glm_credit.py
rename to tests/python/open_data/glm/test_glm_credit.py
diff --git a/tests_open/glm/test_glm_np_input.py b/tests/python/open_data/glm/test_glm_np_input.py
similarity index 100%
rename from tests_open/glm/test_glm_np_input.py
rename to tests/python/open_data/glm/test_glm_np_input.py
diff --git a/tests_open/glm/test_glm_simple.py b/tests/python/open_data/glm/test_glm_simple.py
similarity index 100%
rename from tests_open/glm/test_glm_simple.py
rename to tests/python/open_data/glm/test_glm_simple.py
diff --git a/tests_open/glm/test_glm_sklearn.py b/tests/python/open_data/glm/test_glm_sklearn.py
similarity index 100%
rename from tests_open/glm/test_glm_sklearn.py
rename to tests/python/open_data/glm/test_glm_sklearn.py
diff --git a/tests_open/glm/test_lasso.py b/tests/python/open_data/glm/test_lasso.py
similarity index 100%
rename from tests_open/glm/test_lasso.py
rename to tests/python/open_data/glm/test_lasso.py
diff --git a/tests_open/glm/test_lasso_sklearn_wrapper.py b/tests/python/open_data/glm/test_lasso_sklearn_wrapper.py
similarity index 100%
rename from tests_open/glm/test_lasso_sklearn_wrapper.py
rename to tests/python/open_data/glm/test_lasso_sklearn_wrapper.py
diff --git a/tests_open/glm/test_lasso_sparsity.py b/tests/python/open_data/glm/test_lasso_sparsity.py
similarity index 100%
rename from tests_open/glm/test_lasso_sparsity.py
rename to tests/python/open_data/glm/test_lasso_sparsity.py
diff --git a/tests_open/glm/test_logistic_credit.py b/tests/python/open_data/glm/test_logistic_credit.py
similarity index 100%
rename from tests_open/glm/test_logistic_credit.py
rename to tests/python/open_data/glm/test_logistic_credit.py
diff --git a/tests_open/glm/test_logistic_iris.py b/tests/python/open_data/glm/test_logistic_iris.py
similarity index 100%
rename from tests_open/glm/test_logistic_iris.py
rename to tests/python/open_data/glm/test_logistic_iris.py
diff --git a/tests_open/glm/test_logistic_sklearn_wrapper.py b/tests/python/open_data/glm/test_logistic_sklearn_wrapper.py
similarity index 100%
rename from tests_open/glm/test_logistic_sklearn_wrapper.py
rename to tests/python/open_data/glm/test_logistic_sklearn_wrapper.py
diff --git a/tests_open/glm/test_memory_leak_check.py b/tests/python/open_data/glm/test_memory_leak_check.py
similarity index 100%
rename from tests_open/glm/test_memory_leak_check.py
rename to tests/python/open_data/glm/test_memory_leak_check.py
diff --git a/tests_open/glm/test_regression_sklearn_wrapper.py b/tests/python/open_data/glm/test_regression_sklearn_wrapper.py
similarity index 100%
rename from tests_open/glm/test_regression_sklearn_wrapper.py
rename to tests/python/open_data/glm/test_regression_sklearn_wrapper.py
diff --git a/tests_open/glm/test_ridge.py b/tests/python/open_data/glm/test_ridge.py
similarity index 100%
rename from tests_open/glm/test_ridge.py
rename to tests/python/open_data/glm/test_ridge.py
diff --git a/tests_open/glm/test_ridge_sklearn_wrapper.py b/tests/python/open_data/glm/test_ridge_sklearn_wrapper.py
similarity index 100%
rename from tests_open/glm/test_ridge_sklearn_wrapper.py
rename to tests/python/open_data/glm/test_ridge_sklearn_wrapper.py
diff --git a/tests_open/kmeans/test_kmeans.py b/tests/python/open_data/kmeans/test_kmeans.py
similarity index 100%
rename from tests_open/kmeans/test_kmeans.py
rename to tests/python/open_data/kmeans/test_kmeans.py
diff --git a/tests_open/showresults.sh b/tests/python/open_data/showresults.sh
similarity index 100%
rename from tests_open/showresults.sh
rename to tests/python/open_data/showresults.sh
diff --git a/tests_open/svd/test_pca.py b/tests/python/open_data/svd/test_pca.py
similarity index 100%
rename from tests_open/svd/test_pca.py
rename to tests/python/open_data/svd/test_pca.py
diff --git a/tests_open/svd/test_tsvd.py b/tests/python/open_data/svd/test_tsvd.py
similarity index 100%
rename from tests_open/svd/test_tsvd.py
rename to tests/python/open_data/svd/test_tsvd.py
diff --git a/tests_open/svd/test_tsvd_bench.py b/tests/python/open_data/svd/test_tsvd_bench.py
similarity index 100%
rename from tests_open/svd/test_tsvd_bench.py
rename to tests/python/open_data/svd/test_tsvd_bench.py
diff --git a/tests_open/svd/test_tsvd_gpuid.py b/tests/python/open_data/svd/test_tsvd_gpuid.py
similarity index 100%
rename from tests_open/svd/test_tsvd_gpuid.py
rename to tests/python/open_data/svd/test_tsvd_gpuid.py
diff --git a/tests_open/svd/test_tsvd_power.py b/tests/python/open_data/svd/test_tsvd_power.py
similarity index 100%
rename from tests_open/svd/test_tsvd_power.py
rename to tests/python/open_data/svd/test_tsvd_power.py
diff --git a/tests_open/svd/test_tsvd_wrapper.py b/tests/python/open_data/svd/test_tsvd_wrapper.py
similarity index 100%
rename from tests_open/svd/test_tsvd_wrapper.py
rename to tests/python/open_data/svd/test_tsvd_wrapper.py
diff --git a/tests_open/svd/test_tsvd_wrapper_iris.py b/tests/python/open_data/svd/test_tsvd_wrapper_iris.py
similarity index 100%
rename from tests_open/svd/test_tsvd_wrapper_iris.py
rename to tests/python/open_data/svd/test_tsvd_wrapper_iris.py
diff --git a/tests_open/svd/test_tsvd_wrapper_options.py b/tests/python/open_data/svd/test_tsvd_wrapper_options.py
similarity index 100%
rename from tests_open/svd/test_tsvd_wrapper_options.py
rename to tests/python/open_data/svd/test_tsvd_wrapper_options.py
diff --git a/tests_open/svd/test_tsvd_x_transformed.py b/tests/python/open_data/svd/test_tsvd_x_transformed.py
similarity index 100%
rename from tests_open/svd/test_tsvd_x_transformed.py
rename to tests/python/open_data/svd/test_tsvd_x_transformed.py
diff --git a/tests_open/system/test_import.py b/tests/python/open_data/system/test_import.py
similarity index 100%
rename from tests_open/system/test_import.py
rename to tests/python/open_data/system/test_import.py
diff --git a/tests_open/system/test_metrics.py b/tests/python/open_data/system/test_metrics.py
similarity index 100%
rename from tests_open/system/test_metrics.py
rename to tests/python/open_data/system/test_metrics.py
diff --git a/tests_small/test-LinearModels.ipynb b/tests/python/small/test-LinearModels.ipynb
similarity index 100%
rename from tests_small/test-LinearModels.ipynb
rename to tests/python/small/test-LinearModels.ipynb
diff --git a/tests_small/test_glm_hyatt.py b/tests/python/small/test_glm_hyatt.py
similarity index 100%
rename from tests_small/test_glm_hyatt.py
rename to tests/python/small/test_glm_hyatt.py
diff --git a/tests_small/test_glm_ipums.py b/tests/python/small/test_glm_ipums.py
similarity index 100%
rename from tests_small/test_glm_ipums.py
rename to tests/python/small/test_glm_ipums.py
diff --git a/tests_small/test_glm_paribas.py b/tests/python/small/test_glm_paribas.py
similarity index 100%
rename from tests_small/test_glm_paribas.py
rename to tests/python/small/test_glm_paribas.py
diff --git a/testsxgboost/01_airline_GPU.py b/tests/python/xgboost/01_airline_GPU.py
similarity index 100%
rename from testsxgboost/01_airline_GPU.py
rename to tests/python/xgboost/01_airline_GPU.py
diff --git a/testsxgboost/03_football_GPU.py b/tests/python/xgboost/03_football_GPU.py
similarity index 100%
rename from testsxgboost/03_football_GPU.py
rename to tests/python/xgboost/03_football_GPU.py
diff --git a/testsxgboost/04_PlanetKaggle_GPU.py b/tests/python/xgboost/04_PlanetKaggle_GPU.py
similarity index 100%
rename from testsxgboost/04_PlanetKaggle_GPU.py
rename to tests/python/xgboost/04_PlanetKaggle_GPU.py
diff --git a/testsxgboost/05_FraudDetection_GPU.py b/tests/python/xgboost/05_FraudDetection_GPU.py
similarity index 100%
rename from testsxgboost/05_FraudDetection_GPU.py
rename to tests/python/xgboost/05_FraudDetection_GPU.py
diff --git a/testsxgboost/06_HIGGS_GPU.py b/tests/python/xgboost/06_HIGGS_GPU.py
similarity index 100%
rename from testsxgboost/06_HIGGS_GPU.py
rename to tests/python/xgboost/06_HIGGS_GPU.py
diff --git a/testsxgboost/extractjson.py b/tests/python/xgboost/extractjson.py
similarity index 100%
rename from testsxgboost/extractjson.py
rename to tests/python/xgboost/extractjson.py
diff --git a/tests/python/xgboost/extracttestxgboost.sh b/tests/python/xgboost/extracttestxgboost.sh
new file mode 100644
index 000000000..1ac5253c3
--- /dev/null
+++ b/tests/python/xgboost/extracttestxgboost.sh
@@ -0,0 +1,19 @@
+# get path
+MYPWD=`pwd`
+echo "PWD is $MYPWD"
+export RESULTS_DIR=$MYPWD/results
+
+# collect only required data
+grep -B 2 -A 9 performance $RESULTS_DIR/football.txt > $RESULTS_DIR/football_acc_perf.json
+grep -B 2 -A 10 performance $RESULTS_DIR/credit.txt > $RESULTS_DIR/credit_acc_perf.json     # also has AUC
+grep -B 2 -A 8 performance $RESULTS_DIR/airlines.txt > $RESULTS_DIR/airlines_acc_perf.json
+grep -B 2 -A 8 performance $RESULTS_DIR/planet.txt > $RESULTS_DIR/planet_acc_perf.json
+grep -B 2 -A 8 performance $RESULTS_DIR/higgs.txt > $RESULTS_DIR/higgs_acc_perf.json
+
+# extract results out of the json
+python tests/python/xgboost/extractjson.py test_gbm_football $RESULTS_DIR $RESULTS_DIR/football_acc_perf.json $RESULTS_DIR/test_gbm_football.error.dat $RESULTS_DIR/test_gbm_football.error.h2o.dat $RESULTS_DIR/test_gbm_football.error.h2o4gpu.dat $RESULTS_DIR/test_gbm_football.time.dat $RESULTS_DIR/test_gbm_football.time.h2o.dat $RESULTS_DIR/test_gbm_football.time.h2o4gpu.dat
+python tests/python/xgboost/extractjson.py test_gbm_credit $RESULTS_DIR $RESULTS_DIR/credit_acc_perf.json $RESULTS_DIR/test_gbm_credit.error.dat $RESULTS_DIR/test_gbm_credit.error.h2o.dat $RESULTS_DIR/test_gbm_credit.error.h2o4gpu.dat $RESULTS_DIR/test_gbm_credit.time.dat $RESULTS_DIR/test_gbm_credit.time.h2o.dat $RESULTS_DIR/test_gbm_credit.time.h2o4gpu.dat
+python tests/python/xgboost/extractjson.py test_gbm_airlines $RESULTS_DIR $RESULTS_DIR/airlines_acc_perf.json $RESULTS_DIR/test_gbm_airlines.error.dat $RESULTS_DIR/test_gbm_airlines.error.h2o.dat $RESULTS_DIR/test_gbm_airlines.error.h2o4gpu.dat $RESULTS_DIR/test_gbm_airlines.time.dat $RESULTS_DIR/test_gbm_airlines.time.h2o.dat $RESULTS_DIR/test_gbm_airlines.time.h2o4gpu.dat
+python tests/python/xgboost/extractjson.py test_gbm_planet $RESULTS_DIR $RESULTS_DIR/planet_acc_perf.json $RESULTS_DIR/test_gbm_planet.error.dat $RESULTS_DIR/test_gbm_planet.error.h2o.dat $RESULTS_DIR/test_gbm_planet.error.h2o4gpu.dat $RESULTS_DIR/test_gbm_planet.time.dat $RESULTS_DIR/test_gbm_planet.time.h2o.dat $RESULTS_DIR/test_gbm_planet.time.h2o4gpu.dat
+python tests/python/xgboost/extractjson.py test_gbm_higgs $RESULTS_DIR $RESULTS_DIR/higgs_acc_perf.json $RESULTS_DIR/test_gbm_higgs.error.dat $RESULTS_DIR/test_gbm_higgs.error.h2o.dat $RESULTS_DIR/test_gbm_higgs.error.h2o4gpu.dat $RESULTS_DIR/test_gbm_higgs.time.dat $RESULTS_DIR/test_gbm_higgs.time.h2o.dat $RESULTS_DIR/test_gbm_higgs.time.h2o4gpu.dat
+
diff --git a/testsxgboost/libs/conversion.py b/tests/python/xgboost/libs/conversion.py
similarity index 100%
rename from testsxgboost/libs/conversion.py
rename to tests/python/xgboost/libs/conversion.py
diff --git a/testsxgboost/libs/football.py b/tests/python/xgboost/libs/football.py
similarity index 100%
rename from testsxgboost/libs/football.py
rename to tests/python/xgboost/libs/football.py
diff --git a/testsxgboost/libs/loaders.py b/tests/python/xgboost/libs/loaders.py
similarity index 100%
rename from testsxgboost/libs/loaders.py
rename to tests/python/xgboost/libs/loaders.py
diff --git a/testsxgboost/libs/metrics.py b/tests/python/xgboost/libs/metrics.py
similarity index 100%
rename from testsxgboost/libs/metrics.py
rename to tests/python/xgboost/libs/metrics.py
diff --git a/testsxgboost/libs/notebook_memory_management.py b/tests/python/xgboost/libs/notebook_memory_management.py
similarity index 100%
rename from testsxgboost/libs/notebook_memory_management.py
rename to tests/python/xgboost/libs/notebook_memory_management.py
diff --git a/testsxgboost/libs/planet_kaggle.py b/tests/python/xgboost/libs/planet_kaggle.py
similarity index 100%
rename from testsxgboost/libs/planet_kaggle.py
rename to tests/python/xgboost/libs/planet_kaggle.py
diff --git a/testsxgboost/libs/timer.py b/tests/python/xgboost/libs/timer.py
similarity index 100%
rename from testsxgboost/libs/timer.py
rename to tests/python/xgboost/libs/timer.py
diff --git a/testsxgboost/libs/utils.py b/tests/python/xgboost/libs/utils.py
similarity index 100%
rename from testsxgboost/libs/utils.py
rename to tests/python/xgboost/libs/utils.py
diff --git a/testsxgboost/runtestxgboost.sh b/tests/python/xgboost/runtestxgboost.sh
similarity index 92%
rename from testsxgboost/runtestxgboost.sh
rename to tests/python/xgboost/runtestxgboost.sh
index 68fc9e1c9..b1532042c 100755
--- a/testsxgboost/runtestxgboost.sh
+++ b/tests/python/xgboost/runtestxgboost.sh
@@ -17,7 +17,7 @@ then
 cd $MOUNT_POINT/football/
 unzip -o soccer.zip
 cd ../../
-cd testsxgboost # for libs stuff
+cd tests/python/xgboost # for libs stuff
 ipython 03_football_GPU.py &> $RESULTS_DIR/football.txt # py from export of ipynb removing inline commands
 cd $MYPWD
 
@@ -27,7 +27,7 @@ if [ $runtests -eq 1 ] || [ $runtests -eq -1 ]
 then
 
 # run credit
-cd testsxgboost # for libs stuff
+cd tests/python/xgboost # for libs stuff
 ipython 05_FraudDetection_GPU.py &> $RESULTS_DIR/credit.txt # py from export of ipynb removing inline commands
 cd $MYPWD
 
@@ -37,7 +37,7 @@ if [ $runtests -eq 2 ] || [ $runtests -eq -1 ]
 then
 
 # run airlines
-cd testsxgboost # for libs stuff
+cd tests/python/xgboost # for libs stuff
 ipython 01_airline_GPU.py &> $RESULTS_DIR/airlines.txt # py from export of ipynb removing inline commands
 cd $MYPWD
 
@@ -62,7 +62,7 @@ rm -rf validate-jpg
 #mkdir -p validate-jpg
 #cp -a test-jpg/*.jpg validate-jpg/
 cd ../../
-cd testsxgboost # for libs stuff
+cd tests/python/xgboost # for libs stuff
 ipython 04_PlanetKaggle_GPU.py &> $RESULTS_DIR/planet.txt # py from export of ipynb removing inline commands
 cd $MYPWD
 
@@ -72,7 +72,7 @@ if [ $runtests -eq 4 ] || [ $runtests -eq -1 ]
 then
 
 # run higgs
-cd testsxgboost # for libs stuff
+cd tests/python/xgboost # for libs stuff
 ipython 06_HIGGS_GPU.py &> $RESULTS_DIR/higgs.txt # py from export of ipynb removing inline commands
 cd $MYPWD
 
diff --git a/testsxgboost/extracttestxgboost.sh b/testsxgboost/extracttestxgboost.sh
deleted file mode 100644
index cc0a15b7c..000000000
--- a/testsxgboost/extracttestxgboost.sh
+++ /dev/null
@@ -1,19 +0,0 @@
-# get path
-MYPWD=`pwd`
-echo "PWD is $MYPWD"
-export RESULTS_DIR=$MYPWD/results
-
-# collect only required data
-grep -B 2 -A 9 performance $RESULTS_DIR/football.txt > $RESULTS_DIR/football_acc_perf.json
-grep -B 2 -A 10 performance $RESULTS_DIR/credit.txt > $RESULTS_DIR/credit_acc_perf.json     # also has AUC
-grep -B 2 -A 8 performance $RESULTS_DIR/airlines.txt > $RESULTS_DIR/airlines_acc_perf.json
-grep -B 2 -A 8 performance $RESULTS_DIR/planet.txt > $RESULTS_DIR/planet_acc_perf.json
-grep -B 2 -A 8 performance $RESULTS_DIR/higgs.txt > $RESULTS_DIR/higgs_acc_perf.json
-
-# extract results out of the json
-python testsxgboost/extractjson.py test_gbm_football $RESULTS_DIR $RESULTS_DIR/football_acc_perf.json $RESULTS_DIR/test_gbm_football.error.dat $RESULTS_DIR/test_gbm_football.error.h2o.dat $RESULTS_DIR/test_gbm_football.error.h2o4gpu.dat $RESULTS_DIR/test_gbm_football.time.dat $RESULTS_DIR/test_gbm_football.time.h2o.dat $RESULTS_DIR/test_gbm_football.time.h2o4gpu.dat
-python testsxgboost/extractjson.py test_gbm_credit $RESULTS_DIR $RESULTS_DIR/credit_acc_perf.json $RESULTS_DIR/test_gbm_credit.error.dat $RESULTS_DIR/test_gbm_credit.error.h2o.dat $RESULTS_DIR/test_gbm_credit.error.h2o4gpu.dat $RESULTS_DIR/test_gbm_credit.time.dat $RESULTS_DIR/test_gbm_credit.time.h2o.dat $RESULTS_DIR/test_gbm_credit.time.h2o4gpu.dat
-python testsxgboost/extractjson.py test_gbm_airlines $RESULTS_DIR $RESULTS_DIR/airlines_acc_perf.json $RESULTS_DIR/test_gbm_airlines.error.dat $RESULTS_DIR/test_gbm_airlines.error.h2o.dat $RESULTS_DIR/test_gbm_airlines.error.h2o4gpu.dat $RESULTS_DIR/test_gbm_airlines.time.dat $RESULTS_DIR/test_gbm_airlines.time.h2o.dat $RESULTS_DIR/test_gbm_airlines.time.h2o4gpu.dat
-python testsxgboost/extractjson.py test_gbm_planet $RESULTS_DIR $RESULTS_DIR/planet_acc_perf.json $RESULTS_DIR/test_gbm_planet.error.dat $RESULTS_DIR/test_gbm_planet.error.h2o.dat $RESULTS_DIR/test_gbm_planet.error.h2o4gpu.dat $RESULTS_DIR/test_gbm_planet.time.dat $RESULTS_DIR/test_gbm_planet.time.h2o.dat $RESULTS_DIR/test_gbm_planet.time.h2o4gpu.dat
-python testsxgboost/extractjson.py test_gbm_higgs $RESULTS_DIR $RESULTS_DIR/higgs_acc_perf.json $RESULTS_DIR/test_gbm_higgs.error.dat $RESULTS_DIR/test_gbm_higgs.error.h2o.dat $RESULTS_DIR/test_gbm_higgs.error.h2o4gpu.dat $RESULTS_DIR/test_gbm_higgs.time.dat $RESULTS_DIR/test_gbm_higgs.time.h2o.dat $RESULTS_DIR/test_gbm_higgs.time.h2o4gpu.dat
-
diff --git a/testsxgboost/libs/__init__.py b/testsxgboost/libs/__init__.py
deleted file mode 100755
index e69de29bb..000000000