Merge branch 'master' into add-openpose

frankier · Jun 25, 2020 · 90f0a1b · 90f0a1b
2 parents dd35761 + 2f0bd18
commit 90f0a1b
Show file tree

Hide file tree

Showing 721 changed files with 19,305 additions and 13,737 deletions.
diff --git a/.travis.yml b/.travis.yml
@@ -541,11 +541,11 @@ jobs:
         env: PROJ=caffe,openpose OS=macosx-x86_64
         install: true
         script: ./ci/install-travis.sh
-      - os: osx
-        osx_image: xcode9.3
-        env: PROJ=cuda OS=macosx-x86_64
-        install: true
-        script: ./ci/install-travis.sh
+#      - os: osx
+#        osx_image: xcode9.3
+#        env: PROJ=cuda OS=macosx-x86_64
+#        install: true
+#        script: ./ci/install-travis.sh
       - os: osx
         osx_image: xcode9.3
         env: PROJ=mxnet OS=macosx-x86_64
@@ -626,41 +626,41 @@ jobs:
         install: true
         script: ./ci/install-travis.sh
 
-      - os: osx
-        osx_image: xcode9.3
-        env: PROJ=opencv OS=macosx-x86_64 EXT=-gpu
-        install: true
-        script: ./ci/install-travis.sh
-      - os: osx
-        osx_image: xcode9.3
-        env: PROJ=caffe OS=macosx-x86_64 EXT=-gpu
-        install: true
-        script: ./ci/install-travis.sh
-      - os: osx
-        osx_image: xcode9.3
-        env: PROJ=mxnet OS=macosx-x86_64 EXT=-gpu
-        install: true
-        script: ./ci/install-travis.sh
-      - os: osx
-        osx_image: xcode9.3
-        env: PROJ=tensorflow OS=macosx-x86_64 EXT=-gpu
-        install: true
-        script: ./ci/install-travis.sh
+#      - os: osx
+#        osx_image: xcode9.3
+#        env: PROJ=opencv OS=macosx-x86_64 EXT=-gpu
+#        install: true
+#        script: ./ci/install-travis.sh
+#      - os: osx
+#        osx_image: xcode9.3
+#        env: PROJ=caffe OS=macosx-x86_64 EXT=-gpu
+#        install: true
+#        script: ./ci/install-travis.sh
+#      - os: osx
+#        osx_image: xcode9.3
+#        env: PROJ=mxnet OS=macosx-x86_64 EXT=-gpu
+#        install: true
+#        script: ./ci/install-travis.sh
+#      - os: osx
+#        osx_image: xcode9.3
+#        env: PROJ=tensorflow OS=macosx-x86_64 EXT=-gpu
+#        install: true
+#        script: ./ci/install-travis.sh
       - os: osx
         osx_image: xcode9.3
         env: PROJ=tensorflow OS=macosx-x86_64 EXT=-python
         install: true
         script: ./ci/install-travis.sh
-      - os: osx
-        osx_image: xcode9.3
-        env: PROJ=tensorflow OS=macosx-x86_64 EXT=-python-gpu
-        install: true
-        script: ./ci/install-travis.sh
-      - os: osx
-        osx_image: xcode9.3
-        env: PROJ=onnxruntime OS=macosx-x86_64 EXT=-gpu
-        install: true
-        script: ./ci/install-travis.sh
+#      - os: osx
+#        osx_image: xcode9.3
+#        env: PROJ=tensorflow OS=macosx-x86_64 EXT=-python-gpu
+#        install: true
+#        script: ./ci/install-travis.sh
+#      - os: osx
+#        osx_image: xcode9.3
+#        env: PROJ=onnxruntime OS=macosx-x86_64 EXT=-gpu
+#        install: true
+#        script: ./ci/install-travis.sh
 
       - stage: Redeploy
         os: linux

diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -1,11 +1,12 @@
 
  * Add presets for OpenPose 1.6.0 ([pull #898](https://github.com/bytedeco/javacpp-presets/pull/898))
+ * Add comparison against MKL in `llvm/samples/polly/MatMulBenchmark.java`
  * Add `requires org.bytedeco.javacpp.${javacpp.platform.module}` to load `jnijavacpp` with JPMS ([pull #893](https://github.com/bytedeco/javacpp-presets/pull/893))
  * Bundle configuration files required by AOT compilation with GraalVM ([issue eclipse/deeplearning4j#7362](https://github.com/eclipse/deeplearning4j/issues/7362))
  * Add support for Windows to presets for Qt ([issue #862](https://github.com/bytedeco/javacpp-presets/issues/862))
  * Fix JPMS modules for CUDA, ARPACK-NG, GSL, SciPy, Gym, MXNet ([pull #880](https://github.com/bytedeco/javacpp-presets/pull/880) and [pull #881](https://github.com/bytedeco/javacpp-presets/pull/881))
  * Build OpenBLAS with a `TARGET` even for `DYNAMIC_ARCH` to avoid SIGILL ([issue eclipse/deeplearning4j#8747](https://github.com/eclipse/deeplearning4j/issues/8747))
- * Upgrade presets for FFmpeg 4.2.3, Arrow 0.17.1, Hyperscan 5.3.0, MKL-DNN 0.21.5, DNNL 1.4, NumPy 1.18.4, Gym 0.17.2, TensorFlow 1.15.3, ONNX 1.7.0 ([pull #882](https://github.com/bytedeco/javacpp-presets/pull/882)), ONNX Runtime 1.3.0 ([pull #887](https://github.com/bytedeco/javacpp-presets/pull/887)), Qt 5.15.0, Skia 1.68.3, and their dependencies
+ * Upgrade presets for FFmpeg 4.3 ([pull #891](https://github.com/bytedeco/javacpp-presets/pull/891)), Arrow 0.17.1, Hyperscan 5.3.0, MKL-DNN 0.21.5, DNNL 1.5, OpenBLAS 0.3.10, NumPy 1.19.0, SciPy 1.5.0, Gym 0.17.2, CUDA 11.0, cuDNN 8.0.0, NCCL 2.7.3, TensorFlow 1.15.3, TensorRT 7.1, ONNX 1.7.0 ([pull #882](https://github.com/bytedeco/javacpp-presets/pull/882)), ONNX Runtime 1.3.1 ([pull #887](https://github.com/bytedeco/javacpp-presets/pull/887)), Qt 5.15.0, Skia 1.68.3, and their dependencies
  * Add `FullOptimization.h` allowing users to fully optimize LLVM modules ([pull #869](https://github.com/bytedeco/javacpp-presets/pull/869))
 
 ### April 14, 2020 version 1.5.3

diff --git a/README.md b/README.md
@@ -107,7 +107,7 @@ The JavaCPP Presets depend on Maven, a powerful build system for Java, so before
 Each child module in turn relies by default on the included [`cppbuild.sh` scripts](#the-cppbuildsh-scripts), explained below, to install its corresponding native libraries in the `cppbuild` subdirectory. To use native libraries already installed somewhere else on the system, other installation directories than `cppbuild` can also be specified either in the `pom.xml` files or in the `.java` configuration files. The following versions are supported:
 
  * OpenCV 4.3.0  https://opencv.org/releases.html
- * FFmpeg 4.2.x  http://ffmpeg.org/download.html
+ * FFmpeg 4.3.x  http://ffmpeg.org/download.html
  * FlyCapture 2.13.x  https://www.flir.com/products/flycapture-sdk
  * Spinnaker 1.27.x https://www.flir.com/products/spinnaker-sdk
  * libdc1394 2.2.6  http://sourceforge.net/projects/libdc1394/files/
@@ -124,32 +124,32 @@ Each child module in turn relies by default on the included [`cppbuild.sh` scrip
  * Hyperscan 5.3.x  https://github.com/intel/hyperscan
  * MKL 2020.x  https://software.intel.com/intel-mkl
  * MKL-DNN 0.21.x  https://github.com/oneapi-src/oneDNN
- * DNNL 1.4.x  https://github.com/oneapi-src/oneDNN
- * OpenBLAS 0.3.9  http://www.openblas.net/
+ * DNNL 1.5.x  https://github.com/oneapi-src/oneDNN
+ * OpenBLAS 0.3.10  http://www.openblas.net/
  * ARPACK-NG 3.7.0  https://github.com/opencollab/arpack-ng
  * CMINPACK 1.3.6  https://github.com/devernay/cminpack
  * FFTW 3.3.8  http://www.fftw.org/download.html
  * GSL 2.6  http://www.gnu.org/software/gsl/#downloading
  * CPython 3.7.7  https://www.python.org/downloads/
- * NumPy 1.18.x  https://github.com/numpy/numpy
- * SciPy 1.4.x  https://github.com/scipy/scipy
+ * NumPy 1.19.x  https://github.com/numpy/numpy
+ * SciPy 1.5.x  https://github.com/scipy/scipy
  * Gym 0.17.x  https://github.com/openai/gym
  * LLVM 10.0.x  http://llvm.org/releases/download.html
  * libpostal 1.1-alpha  https://github.com/openvenues/libpostal
  * Leptonica 1.79.0  http://www.leptonica.org/download.html
  * Tesseract 4.1.1  https://github.com/tesseract-ocr/tesseract
  * Caffe 1.0  https://github.com/BVLC/caffe
  * OpenPose 1.6.0  https://github.com/CMU-Perceptual-Computing-Lab/openpose
- * CUDA 10.2  https://developer.nvidia.com/cuda-downloads
-   * cuDNN 7.6.x  https://developer.nvidia.com/cudnn
-   * NCCL 2.6.x  https://developer.nvidia.com/nccl
+ * CUDA 11.0  https://developer.nvidia.com/cuda-downloads
+   * cuDNN 8.0.x  https://developer.nvidia.com/cudnn
+   * NCCL 2.7.x  https://developer.nvidia.com/nccl
  * MXNet 1.6.0  https://github.com/apache/incubator-mxnet
  * TensorFlow 1.15.x  https://github.com/tensorflow/tensorflow
- * TensorRT 7.0  https://developer.nvidia.com/tensorrt
+ * TensorRT 7.x  https://developer.nvidia.com/tensorrt
  * The Arcade Learning Environment 0.6.x  https://github.com/mgbellemare/Arcade-Learning-Environment
  * ONNX 1.7.0  https://github.com/onnx/onnx
  * nGraph 0.26.0  https://github.com/NervanaSystems/ngraph
- * ONNX Runtime 1.3.0  https://github.com/microsoft/onnxruntime
+ * ONNX Runtime 1.3.x  https://github.com/microsoft/onnxruntime
  * LiquidFun  http://google.github.io/liquidfun/
  * Qt 5.15.x  https://download.qt.io/archive/qt/
  * Mono/Skia 1.68.x  https://github.com/mono/skia

diff --git a/arpack-ng/platform/pom.xml b/arpack-ng/platform/pom.xml
@@ -24,7 +24,7 @@
     <dependency>
       <groupId>org.bytedeco</groupId>
       <artifactId>openblas-platform</artifactId>
-      <version>0.3.9-${project.parent.version}</version>
+      <version>0.3.10-${project.parent.version}</version>
     </dependency>
     <dependency>
       <groupId>${project.groupId}</groupId>

diff --git a/arpack-ng/pom.xml b/arpack-ng/pom.xml
@@ -25,7 +25,7 @@
     <dependency>
       <groupId>org.bytedeco</groupId>
       <artifactId>openblas</artifactId>
-      <version>0.3.9-${project.parent.version}</version>
+      <version>0.3.10-${project.parent.version}</version>
     </dependency>
     <dependency>
       <groupId>org.bytedeco</groupId>
@@ -48,12 +48,12 @@
           <dependency>
             <groupId>org.bytedeco</groupId>
             <artifactId>openblas</artifactId>
-            <version>0.3.9-${project.parent.version}</version>
+            <version>0.3.10-${project.parent.version}</version>
           </dependency>
           <dependency>
             <groupId>org.bytedeco</groupId>
             <artifactId>openblas</artifactId>
-            <version>0.3.9-${project.parent.version}</version>
+            <version>0.3.10-${project.parent.version}</version>
             <classifier>${javacpp.platform}</classifier>
           </dependency>
         </dependencies>

diff --git a/caffe/README.md b/caffe/README.md
@@ -34,29 +34,29 @@ We can use [Maven 3](http://maven.apache.org/) to download and install automatic
     <modelVersion>4.0.0</modelVersion>
     <groupId>org.bytedeco.caffe</groupId>
     <artifactId>caffe</artifactId>
-    <version>1.5.3</version>
+    <version>1.5.4-SNAPSHOT</version>
     <properties>
         <exec.mainClass>caffe</exec.mainClass>
     </properties>
     <dependencies>
         <dependency>
             <groupId>org.bytedeco</groupId>
             <artifactId>caffe-platform</artifactId>
-            <version>1.0-1.5.3</version>
+            <version>1.0-1.5.4-SNAPSHOT</version>
         </dependency>
 
         <!-- Additional dependencies required to use CUDA and cuDNN -->
         <dependency>
             <groupId>org.bytedeco</groupId>
             <artifactId>caffe-platform-gpu</artifactId>
-            <version>1.0-1.5.3</version>
+            <version>1.0-1.5.4-SNAPSHOT</version>
         </dependency>
 
         <!-- Additional dependencies to use bundled CUDA and cuDNN -->
         <dependency>
             <groupId>org.bytedeco</groupId>
             <artifactId>cuda-platform-redist</artifactId>
-            <version>10.2-7.6-1.5.3</version>
+            <version>11.0-8.0-1.5.4-SNAPSHOT</version>
         </dependency>
 
     </dependencies>

diff --git a/caffe/caffe-cudnn8.patch b/caffe/caffe-cudnn8.patch
@@ -0,0 +1,143 @@
+diff -ruN caffe-1.0/include/caffe/layers/cudnn_conv_layer.hpp caffe-1.0-cudnn8/include/caffe/layers/cudnn_conv_layer.hpp
+--- caffe-1.0/include/caffe/layers/cudnn_conv_layer.hpp	2017-04-16 01:17:48.000000000 +0900
++++ caffe-1.0-cudnn8/include/caffe/layers/cudnn_conv_layer.hpp	2020-06-10 15:04:13.540649654 +0900
+@@ -48,9 +48,9 @@
+   cudaStream_t*  stream_;
+
+   // algorithms for forward and backwards convolutions
+-  cudnnConvolutionFwdAlgo_t *fwd_algo_;
+-  cudnnConvolutionBwdFilterAlgo_t *bwd_filter_algo_;
+-  cudnnConvolutionBwdDataAlgo_t *bwd_data_algo_;
++  cudnnConvolutionFwdAlgoPerf_t *fwd_algo_;
++  cudnnConvolutionBwdFilterAlgoPerf_t *bwd_filter_algo_;
++  cudnnConvolutionBwdDataAlgoPerf_t *bwd_data_algo_;
+
+   vector<cudnnTensorDescriptor_t> bottom_descs_, top_descs_;
+   cudnnTensorDescriptor_t    bias_desc_;
+diff -ruN caffe-1.0/src/caffe/layers/cudnn_conv_layer.cpp caffe-1.0-cudnn8/src/caffe/layers/cudnn_conv_layer.cpp
+--- caffe-1.0/src/caffe/layers/cudnn_conv_layer.cpp	2017-04-16 01:17:48.000000000 +0900
++++ caffe-1.0-cudnn8/src/caffe/layers/cudnn_conv_layer.cpp	2020-06-10 15:19:37.811734002 +0900
+@@ -23,9 +23,9 @@
+   handle_         = new cudnnHandle_t[this->group_ * CUDNN_STREAMS_PER_GROUP];
+
+   // Initialize algorithm arrays
+-  fwd_algo_       = new cudnnConvolutionFwdAlgo_t[bottom.size()];
+-  bwd_filter_algo_= new cudnnConvolutionBwdFilterAlgo_t[bottom.size()];
+-  bwd_data_algo_  = new cudnnConvolutionBwdDataAlgo_t[bottom.size()];
++  fwd_algo_       = new cudnnConvolutionFwdAlgoPerf_t[bottom.size()];
++  bwd_filter_algo_= new cudnnConvolutionBwdFilterAlgoPerf_t[bottom.size()];
++  bwd_data_algo_  = new cudnnConvolutionBwdDataAlgoPerf_t[bottom.size()];
+
+   // initialize size arrays
+   workspace_fwd_sizes_ = new size_t[bottom.size()];
+@@ -39,9 +39,9 @@
+
+   for (size_t i = 0; i < bottom.size(); ++i) {
+     // initialize all to default algorithms
+-    fwd_algo_[i] = (cudnnConvolutionFwdAlgo_t)0;
+-    bwd_filter_algo_[i] = (cudnnConvolutionBwdFilterAlgo_t)0;
+-    bwd_data_algo_[i] = (cudnnConvolutionBwdDataAlgo_t)0;
++    fwd_algo_[i].algo = (cudnnConvolutionFwdAlgo_t)0;
++    bwd_filter_algo_[i].algo = (cudnnConvolutionBwdFilterAlgo_t)0;
++    bwd_data_algo_[i].algo = (cudnnConvolutionBwdDataAlgo_t)0;
+     // default algorithms don't require workspace
+     workspace_fwd_sizes_[i] = 0;
+     workspace_bwd_data_sizes_[i] = 0;
+@@ -128,13 +128,13 @@
+         stride_h, stride_w);
+
+     // choose forward and backward algorithms + workspace(s)
+-    CUDNN_CHECK(cudnnGetConvolutionForwardAlgorithm(handle_[0],
++    CUDNN_CHECK(cudnnGetConvolutionForwardAlgorithm_v7(handle_[0],
+       bottom_descs_[i],
+       filter_desc_,
+       conv_descs_[i],
+       top_descs_[i],
+-      CUDNN_CONVOLUTION_FWD_SPECIFY_WORKSPACE_LIMIT,
+-      workspace_limit_bytes,
++      1,
++      0,
+       &fwd_algo_[i]));
+
+     CUDNN_CHECK(cudnnGetConvolutionForwardWorkspaceSize(handle_[0],
+@@ -142,30 +142,28 @@
+       filter_desc_,
+       conv_descs_[i],
+       top_descs_[i],
+-      fwd_algo_[i],
++      fwd_algo_[i].algo,
+       &(workspace_fwd_sizes_[i])));
+
+     // choose backward algorithm for filter
+-    CUDNN_CHECK(cudnnGetConvolutionBackwardFilterAlgorithm(handle_[0],
++    CUDNN_CHECK(cudnnGetConvolutionBackwardFilterAlgorithm_v7(handle_[0],
+           bottom_descs_[i], top_descs_[i], conv_descs_[i], filter_desc_,
+-          CUDNN_CONVOLUTION_BWD_FILTER_SPECIFY_WORKSPACE_LIMIT,
+-          workspace_limit_bytes, &bwd_filter_algo_[i]) );
++          1, 0, &bwd_filter_algo_[i]) );
+
+     // get workspace for backwards filter algorithm
+     CUDNN_CHECK(cudnnGetConvolutionBackwardFilterWorkspaceSize(handle_[0],
+           bottom_descs_[i], top_descs_[i], conv_descs_[i], filter_desc_,
+-          bwd_filter_algo_[i], &workspace_bwd_filter_sizes_[i]));
++          bwd_filter_algo_[i].algo, &workspace_bwd_filter_sizes_[i]));
+
+     // choose backward algo for data
+-    CUDNN_CHECK(cudnnGetConvolutionBackwardDataAlgorithm(handle_[0],
++    CUDNN_CHECK(cudnnGetConvolutionBackwardDataAlgorithm_v7(handle_[0],
+           filter_desc_, top_descs_[i], conv_descs_[i], bottom_descs_[i],
+-          CUDNN_CONVOLUTION_BWD_DATA_SPECIFY_WORKSPACE_LIMIT,
+-        workspace_limit_bytes, &bwd_data_algo_[i]));
++          1, 0, &bwd_data_algo_[i]));
+
+     // get workspace size
+     CUDNN_CHECK(cudnnGetConvolutionBackwardDataWorkspaceSize(handle_[0],
+           filter_desc_, top_descs_[i], conv_descs_[i], bottom_descs_[i],
+-          bwd_data_algo_[i], &workspace_bwd_data_sizes_[i]) );
++          bwd_data_algo_[i].algo, &workspace_bwd_data_sizes_[i]) );
+   }
+
+   // reduce over all workspace sizes to get a maximum to allocate / reallocate
+@@ -204,9 +202,9 @@
+         workspace_fwd_sizes_[i] = 0;
+         workspace_bwd_filter_sizes_[i] = 0;
+         workspace_bwd_data_sizes_[i] = 0;
+-        fwd_algo_[i] = CUDNN_CONVOLUTION_FWD_ALGO_IMPLICIT_GEMM;
+-        bwd_filter_algo_[i] = CUDNN_CONVOLUTION_BWD_FILTER_ALGO_0;
+-        bwd_data_algo_[i] = CUDNN_CONVOLUTION_BWD_DATA_ALGO_0;
++        fwd_algo_[i].algo = CUDNN_CONVOLUTION_FWD_ALGO_IMPLICIT_GEMM;
++        bwd_filter_algo_[i].algo = CUDNN_CONVOLUTION_BWD_FILTER_ALGO_0;
++        bwd_data_algo_[i].algo = CUDNN_CONVOLUTION_BWD_DATA_ALGO_0;
+       }
+
+       // NULL out all workspace pointers
+diff -ruN caffe-1.0/src/caffe/layers/cudnn_conv_layer.cu caffe-1.0-cudnn8/src/caffe/layers/cudnn_conv_layer.cu
+--- caffe-1.0/src/caffe/layers/cudnn_conv_layer.cu	2017-04-16 01:17:48.000000000 +0900
++++ caffe-1.0-cudnn8/src/caffe/layers/cudnn_conv_layer.cu	2020-06-10 15:24:49.248840334 +0900
+@@ -23,7 +23,7 @@
+             bottom_descs_[i], bottom_data + bottom_offset_ * g,
+             filter_desc_, weight + this->weight_offset_ * g,
+             conv_descs_[i],
+-            fwd_algo_[i], workspace[g], workspace_fwd_sizes_[i],
++            fwd_algo_[i].algo, workspace[g], workspace_fwd_sizes_[i],
+             cudnn::dataType<Dtype>::zero,
+             top_descs_[i], top_data + top_offset_ * g));
+
+@@ -80,7 +80,7 @@
+               bottom_descs_[i], bottom_data + bottom_offset_ * g,
+               top_descs_[i],    top_diff + top_offset_ * g,
+               conv_descs_[i],
+-              bwd_filter_algo_[i], workspace[1*this->group_ + g],
++              bwd_filter_algo_[i].algo, workspace[1*this->group_ + g],
+               workspace_bwd_filter_sizes_[i],
+               cudnn::dataType<Dtype>::one,
+               filter_desc_, weight_diff + this->weight_offset_ * g));
+@@ -98,7 +98,7 @@
+               filter_desc_, weight + this->weight_offset_ * g,
+               top_descs_[i], top_diff + top_offset_ * g,
+               conv_descs_[i],
+-              bwd_data_algo_[i], workspace[2*this->group_ + g],
++              bwd_data_algo_[i].algo, workspace[2*this->group_ + g],
+               workspace_bwd_data_sizes_[i],
+               cudnn::dataType<Dtype>::zero,
+               bottom_descs_[i], bottom_diff + bottom_offset_ * g));
diff --git a/caffe/cppbuild.sh b/caffe/cppbuild.sh
@@ -161,15 +161,16 @@ cp SENet-master/src/caffe/layers/axpy_layer.* caffe-$CAFFE_VERSION/src/caffe/lay
 
 cd caffe-$CAFFE_VERSION
 patch -Np1 < ../../../caffe-nogpu.patch
+patch -Np1 < ../../../caffe-cudnn8.patch
 sedinplace 's/CV_LOAD_IMAGE_GRAYSCALE/cv::IMREAD_GRAYSCALE/g' src/caffe/util/io.cpp src/caffe/layers/window_data_layer.cpp
 sedinplace 's/CV_LOAD_IMAGE_COLOR/cv::IMREAD_COLOR/g' src/caffe/util/io.cpp src/caffe/layers/window_data_layer.cpp
 cp Makefile.config.example Makefile.config
 export PATH=../bin:$PATH
 export CXXFLAGS="-I../include -I$OPENCV_PATH/include -I$HDF5_PATH/include -std=c++11"
 export NVCCFLAGS="-I../include -I$OPENCV_PATH/include -I$HDF5_PATH/include $CUDAFLAGS -std=c++11"
 export LINKFLAGS="-L../lib -L$OPENCV_PATH -L$OPENCV_PATH/lib -L$HDF5_PATH -L$HDF5_PATH/lib"
-make -j $MAKEJ BLAS=$BLAS OPENCV_VERSION=3 DISTRIBUTE_DIR=.. CPU_ONLY=$CPU_ONLY CUDA_ARCH=-arch=sm_30 USE_CUDNN=$USE_CUDNN proto
-make -j $MAKEJ BLAS=$BLAS OPENCV_VERSION=3 DISTRIBUTE_DIR=.. CPU_ONLY=$CPU_ONLY CUDA_ARCH=-arch=sm_30 USE_CUDNN=$USE_CUDNN lib
+make -j $MAKEJ BLAS=$BLAS OPENCV_VERSION=3 DISTRIBUTE_DIR=.. CPU_ONLY=$CPU_ONLY CUDA_ARCH=-arch=sm_35 USE_CUDNN=$USE_CUDNN proto
+make -j $MAKEJ BLAS=$BLAS OPENCV_VERSION=3 DISTRIBUTE_DIR=.. CPU_ONLY=$CPU_ONLY CUDA_ARCH=-arch=sm_35 USE_CUDNN=$USE_CUDNN lib
 # Manual deploy to avoid Caffe's python build
 mkdir -p ../include/caffe/proto
 cp -a include/caffe/* ../include/caffe/