From ef549bf73f88588c5e0f5183ed77471b61ae0a0d Mon Sep 17 00:00:00 2001 From: Alex Black Date: Wed, 28 Mar 2018 18:39:24 +1100 Subject: [PATCH 1/8] Add additional output info to CudnnConvolutionHelper --- .../convolution/CudnnConvolutionHelper.java | 88 ++++++++++++++----- 1 file changed, 67 insertions(+), 21 deletions(-) diff --git a/deeplearning4j-cuda/src/main/java/org/deeplearning4j/nn/layers/convolution/CudnnConvolutionHelper.java b/deeplearning4j-cuda/src/main/java/org/deeplearning4j/nn/layers/convolution/CudnnConvolutionHelper.java index 80cc17ee31bb..71bf200f700b 100644 --- a/deeplearning4j-cuda/src/main/java/org/deeplearning4j/nn/layers/convolution/CudnnConvolutionHelper.java +++ b/deeplearning4j-cuda/src/main/java/org/deeplearning4j/nn/layers/convolution/CudnnConvolutionHelper.java @@ -42,7 +42,10 @@ import org.nd4j.linalg.jcublas.context.CudaContext; import org.nd4j.linalg.primitives.Pair; +import java.util.Arrays; + import static org.bytedeco.javacpp.cuda.CUstream_st; +import static org.bytedeco.javacpp.cuda.cudaGetErrorString; import static org.bytedeco.javacpp.cudnn.*; /** @@ -304,6 +307,8 @@ public INDArray preOutput(INDArray input, INDArray weights, INDArray bias, int[] //Same mode + dilation 3: cuDNN status = 9: CUDNN_STATUS_NOT_SUPPORTED return null; } + int code; + int miniBatch = input.size(0); int inH = input.size(2); int inW = input.size(3); @@ -337,12 +342,17 @@ public INDArray preOutput(INDArray input, INDArray weights, INDArray bias, int[] } else z = Nd4j.createUninitialized(new int[] {miniBatch, outDepth, outSize[0], outSize[1]}); - checkCudnn(cudnnSetTensor4dDescriptorEx(cudnnContext.srcTensorDesc, dataType, miniBatch, inDepth, inH, inW, - srcStride[0], srcStride[1], srcStride[2], srcStride[3])); - checkCudnn(cudnnSetFilter4dDescriptor(cudnnContext.filterDesc, dataType, TENSOR_FORMAT, outDepth, inDepth, kH, - kW)); - checkCudnn(cudnnSetConvolution2dDescriptor(cudnnContext.convDesc, pad[0], pad[1], strides[0], strides[1], dilation[0], - dilation[1], CUDNN_CROSS_CORRELATION, dataType)); + code = cudnnSetTensor4dDescriptorEx(cudnnContext.srcTensorDesc, dataType, miniBatch, inDepth, inH, inW, + srcStride[0], srcStride[1], srcStride[2], srcStride[3]); + checkCudnn(true, "cudnnSetTensor4dDescriptorEx", code, input, weights, bias, kernel, strides, pad, mode, fwdAlgo, convolutionMode, dilation); + + code = cudnnSetFilter4dDescriptor(cudnnContext.filterDesc, dataType, TENSOR_FORMAT, outDepth, inDepth, kH, kW); + checkCudnn(true, "cudnnSetFilter4dDescriptor", code, input, weights, bias, kernel, strides, pad, mode, fwdAlgo, convolutionMode, dilation); + + code = cudnnSetConvolution2dDescriptor(cudnnContext.convDesc, pad[0], pad[1], strides[0], strides[1], dilation[0], + dilation[1], CUDNN_CROSS_CORRELATION, dataType); + checkCudnn(true, "cudnnSetConvolution2dDescriptor", code, input, weights, bias, kernel, strides, pad, mode, fwdAlgo, convolutionMode, dilation); + // find dimension of convolution output // checkCudnn(cudnnGetConvolution2dForwardOutputDim(cudnnContext.convDesc, cudnnContext.srcTensorDesc, cudnnContext.filterDesc, n, c, h, w)); @@ -351,8 +361,10 @@ public INDArray preOutput(INDArray input, INDArray weights, INDArray bias, int[] int[] algo = new int[1]; int[] dstStride = z.stride(); - checkCudnn(cudnnSetTensor4dDescriptorEx(cudnnContext.dstTensorDesc, dataType, miniBatch, outDepth, outSize[0], - outSize[1], dstStride[0], dstStride[1], dstStride[2], dstStride[3])); + code = cudnnSetTensor4dDescriptorEx(cudnnContext.dstTensorDesc, dataType, miniBatch, outDepth, outSize[0], + outSize[1], dstStride[0], dstStride[1], dstStride[2], dstStride[3]); + checkCudnn(true, "cudnnSetTensor4dDescriptorEx", code, input, weights, bias, kernel, strides, pad, mode, fwdAlgo, convolutionMode, dilation); + if (mode == AlgoMode.USER_SPECIFIED && fwdAlgo != null) { switch (fwdAlgo) { case IMPLICIT_GEMM: @@ -386,11 +398,12 @@ public INDArray preOutput(INDArray input, INDArray weights, INDArray bias, int[] throw new IllegalArgumentException("Unknown FwdAlgo: " + fwdAlgo); } } else { - checkCudnn(cudnnGetConvolutionForwardAlgorithm(cudnnContext, cudnnContext.srcTensorDesc, - cudnnContext.filterDesc, cudnnContext.convDesc, - cudnnContext.dstTensorDesc, mode == AlgoMode.NO_WORKSPACE - ? CUDNN_CONVOLUTION_FWD_NO_WORKSPACE : CUDNN_CONVOLUTION_FWD_PREFER_FASTEST, - 0, algo)); + code = cudnnGetConvolutionForwardAlgorithm(cudnnContext, cudnnContext.srcTensorDesc, + cudnnContext.filterDesc, cudnnContext.convDesc, + cudnnContext.dstTensorDesc, mode == AlgoMode.NO_WORKSPACE + ? CUDNN_CONVOLUTION_FWD_NO_WORKSPACE : CUDNN_CONVOLUTION_FWD_PREFER_FASTEST, + 0, algo); + checkCudnn(true, "cudnnGetConvolutionForwardAlgorithm", code, input, weights, bias, kernel, strides, pad, mode, fwdAlgo, convolutionMode, dilation); } Allocator allocator = AtomicAllocator.getInstance(); @@ -400,21 +413,30 @@ public INDArray preOutput(INDArray input, INDArray weights, INDArray bias, int[] Pointer biasData = allocator.getPointer(bias, context); Pointer dstData = allocator.getPointer(z, context); - checkCudnn(cudnnSetStream(cudnnContext, new CUstream_st(context.getOldStream()))); - checkCudnn(cudnnGetConvolutionForwardWorkspaceSize(cudnnContext, cudnnContext.srcTensorDesc, + code = cudnnSetStream(cudnnContext, new CUstream_st(context.getOldStream())); + checkCudnn(true, "cudnnSetStream", code, input, weights, bias, kernel, strides, pad, mode, fwdAlgo, convolutionMode, dilation); + + code = cudnnGetConvolutionForwardWorkspaceSize(cudnnContext, cudnnContext.srcTensorDesc, cudnnContext.filterDesc, cudnnContext.convDesc, cudnnContext.dstTensorDesc, algo[0], - sizeInBytes)); + sizeInBytes); + checkCudnn(true, "cudnnGetConvolutionForwardWorkspaceSize", code, input, weights, bias, kernel, strides, pad, mode, fwdAlgo, convolutionMode, dilation); + if (sizeInBytes.get(0) > workSpace.capacity()) { workSpace.deallocate(); workSpace = new DataCache(sizeInBytes.get(0)); } - checkCudnn(cudnnConvolutionForward(cudnnContext, alpha, cudnnContext.srcTensorDesc, srcData, + code = cudnnConvolutionForward(cudnnContext, alpha, cudnnContext.srcTensorDesc, srcData, cudnnContext.filterDesc, filterData, cudnnContext.convDesc, algo[0], workSpace, - workSpace.capacity(), beta, cudnnContext.dstTensorDesc, dstData)); + workSpace.capacity(), beta, cudnnContext.dstTensorDesc, dstData); + checkCudnn(true, "cudnnConvolutionForward", code, input, weights, bias, kernel, strides, pad, mode, fwdAlgo, convolutionMode, dilation); - checkCudnn(cudnnSetTensor4dDescriptor(cudnnContext.biasTensorDesc, TENSOR_FORMAT, dataType, 1, outDepth, 1, 1)); - checkCudnn(cudnnAddTensor(cudnnContext, alpha, cudnnContext.biasTensorDesc, biasData, alpha, - cudnnContext.dstTensorDesc, dstData)); + + code = cudnnSetTensor4dDescriptor(cudnnContext.biasTensorDesc, TENSOR_FORMAT, dataType, 1, outDepth, 1, 1); + checkCudnn(true, "cudnnSetTensor4dDescriptor", code, input, weights, bias, kernel, strides, pad, mode, fwdAlgo, convolutionMode, dilation); + + code = cudnnAddTensor(cudnnContext, alpha, cudnnContext.biasTensorDesc, biasData, alpha, + cudnnContext.dstTensorDesc, dstData); + checkCudnn(true, "cudnnAddTensor", code, input, weights, bias, kernel, strides, pad, mode, fwdAlgo, convolutionMode, dilation); allocator.registerAction(context, z, input, weights, bias); @@ -424,6 +446,30 @@ public INDArray preOutput(INDArray input, INDArray weights, INDArray bias, int[] return z; } + private void checkCudnn(boolean forward, String step, int code, INDArray input, INDArray weights, INDArray bias, int[] kernel, int[] strides, int[] pad, + AlgoMode mode, FwdAlgo fwdAlgo, ConvolutionMode convolutionMode, int[] dilation){ + + if(code != CUDNN_STATUS_SUCCESS) { + StringBuilder sb = new StringBuilder(); + sb.append("CUDA error =").append(code).append(": ").append(cudaGetErrorString(code).getString()) + .append(" during ") + .append(forward ? "forward pass" : "backward pass") + .append(" - step ").append(step) + .append(": inputShape=").append(Arrays.toString(input.shape())) + .append(", weightsShape=").append(Arrays.toString(weights.shape())) + .append(", biasShape=").append(bias == null ? null : Arrays.toString(bias.shape())) + .append(", kernel=").append(Arrays.toString(kernel)) + .append(", stride=").append(Arrays.toString(strides)) + .append(", padding=").append(Arrays.toString(pad)) + .append(", dilation=").append(Arrays.toString(dilation)) + .append(", AlgoMode=").append(mode) + .append(", fwdAlgo=").append(fwdAlgo) + .append(", convolutionMode=").append(convolutionMode); + + throw new RuntimeException(sb.toString()); + } + } + @Override public INDArray activate(INDArray z, IActivation afn) { if (Nd4j.getExecutioner() instanceof GridExecutioner) From 98fd955d3f8a4a29b660cc169e97a60accb1cc1c Mon Sep 17 00:00:00 2001 From: Alex Black Date: Wed, 28 Mar 2018 19:16:12 +1100 Subject: [PATCH 2/8] More error output for CudnnConvolutionHelper backward pass --- .../convolution/CudnnConvolutionHelper.java | 122 +++++++++++------- 1 file changed, 77 insertions(+), 45 deletions(-) diff --git a/deeplearning4j-cuda/src/main/java/org/deeplearning4j/nn/layers/convolution/CudnnConvolutionHelper.java b/deeplearning4j-cuda/src/main/java/org/deeplearning4j/nn/layers/convolution/CudnnConvolutionHelper.java index 71bf200f700b..b27bd073f607 100644 --- a/deeplearning4j-cuda/src/main/java/org/deeplearning4j/nn/layers/convolution/CudnnConvolutionHelper.java +++ b/deeplearning4j-cuda/src/main/java/org/deeplearning4j/nn/layers/convolution/CudnnConvolutionHelper.java @@ -129,6 +129,8 @@ public Pair backpropGradient(INDArray input, INDArray weight //Same mode + dilation 3: cuDNN status = 9: CUDNN_STATUS_NOT_SUPPORTED return null; } + int code; + int miniBatch = input.size(0); int inH = input.size(2); int inW = input.size(3); @@ -163,14 +165,18 @@ public Pair backpropGradient(INDArray input, INDArray weight if (Nd4j.getExecutioner() instanceof GridExecutioner) ((GridExecutioner) Nd4j.getExecutioner()).flushQueue(); - checkCudnn(cudnnSetTensor4dDescriptorEx(cudnnContext.srcTensorDesc, dataType, miniBatch, inDepth, inH, inW, - srcStride[0], srcStride[1], srcStride[2], srcStride[3])); - checkCudnn(cudnnSetTensor4dDescriptorEx(cudnnContext.deltaTensorDesc, dataType, miniBatch, outDepth, outH, outW, - deltaStride[0], deltaStride[1], deltaStride[2], deltaStride[3])); - checkCudnn(cudnnSetConvolution2dDescriptor(cudnnContext.convDesc, pad[0], pad[1], strides[0], strides[1], dilation[0], - dilation[1], CUDNN_CROSS_CORRELATION, dataType)); - checkCudnn(cudnnSetFilter4dDescriptor(cudnnContext.filterDesc, dataType, TENSOR_FORMAT, outDepth, inDepth, kH, - kW)); + code = cudnnSetTensor4dDescriptorEx(cudnnContext.srcTensorDesc, dataType, miniBatch, inDepth, inH, inW, + srcStride[0], srcStride[1], srcStride[2], srcStride[3]); + checkCudnn(false, "cudnnSetTensor4dDescriptorEx", code, input, weights, null, delta, kernel, strides, pad, mode, null, bwdFilterAlgo, bwdDataAlgo, convolutionMode, dilation); + code = cudnnSetTensor4dDescriptorEx(cudnnContext.deltaTensorDesc, dataType, miniBatch, outDepth, outH, outW, + deltaStride[0], deltaStride[1], deltaStride[2], deltaStride[3]); + checkCudnn(false, "cudnnSetTensor4dDescriptorEx", code, input, weights, null, delta, kernel, strides, pad, mode, null, bwdFilterAlgo, bwdDataAlgo, convolutionMode, dilation); + code = cudnnSetConvolution2dDescriptor(cudnnContext.convDesc, pad[0], pad[1], strides[0], strides[1], dilation[0], + dilation[1], CUDNN_CROSS_CORRELATION, dataType); + checkCudnn(false, "cudnnSetConvolution2dDescriptor", code, input, weights, null, delta, kernel, strides, pad, mode, null, bwdFilterAlgo, bwdDataAlgo, convolutionMode, dilation); + code = cudnnSetFilter4dDescriptor(cudnnContext.filterDesc, dataType, TENSOR_FORMAT, outDepth, inDepth, kH, kW); + checkCudnn(false, "cudnnSetFilter4dDescriptor", code, input, weights, null, delta, kernel, strides, pad, mode, null, bwdFilterAlgo, bwdDataAlgo, convolutionMode, dilation); + if (mode == AlgoMode.USER_SPECIFIED && bwdFilterAlgo != null && bwdDataAlgo != null) { switch (bwdFilterAlgo) { case ALGO_0: @@ -227,16 +233,18 @@ public Pair backpropGradient(INDArray input, INDArray weight throw new IllegalArgumentException("Unknown BwdDataAlgo: " + bwdDataAlgo); } } else { - checkCudnn(cudnnGetConvolutionBackwardFilterAlgorithm(cudnnContext, cudnnContext.srcTensorDesc, + code = cudnnGetConvolutionBackwardFilterAlgorithm(cudnnContext, cudnnContext.srcTensorDesc, cudnnContext.deltaTensorDesc, cudnnContext.convDesc, cudnnContext.filterDesc, mode == AlgoMode.NO_WORKSPACE ? CUDNN_CONVOLUTION_BWD_FILTER_NO_WORKSPACE : CUDNN_CONVOLUTION_BWD_FILTER_PREFER_FASTEST, - 0, algo1)); - checkCudnn(cudnnGetConvolutionBackwardDataAlgorithm(cudnnContext, cudnnContext.filterDesc, + 0, algo1); + checkCudnn(false, "cudnnGetConvolutionBackwardFilterAlgorithm", code, input, weights, null, delta, kernel, strides, pad, mode, null, bwdFilterAlgo, bwdDataAlgo, convolutionMode, dilation); + code = cudnnGetConvolutionBackwardDataAlgorithm(cudnnContext, cudnnContext.filterDesc, cudnnContext.deltaTensorDesc, cudnnContext.convDesc, cudnnContext.srcTensorDesc, mode == AlgoMode.NO_WORKSPACE ? CUDNN_CONVOLUTION_BWD_DATA_NO_WORKSPACE : CUDNN_CONVOLUTION_BWD_DATA_PREFER_FASTEST, - 0, algo2)); + 0, algo2); + checkCudnn(false, "cudnnGetConvolutionBackwardDataAlgorithm", code, input, weights, null, delta, kernel, strides, pad, mode, null, bwdFilterAlgo, bwdDataAlgo, convolutionMode, dilation); } INDArray epsNext; @@ -260,31 +268,46 @@ public Pair backpropGradient(INDArray input, INDArray weight Pointer deltaData = allocator.getPointer(delta, context); Pointer dstData = allocator.getPointer(epsNext, context); - checkCudnn(cudnnSetStream(cudnnContext, new CUstream_st(context.getOldStream()))); - checkCudnn(cudnnSetTensor4dDescriptorEx(cudnnContext.dstTensorDesc, dataType, miniBatch, inDepth, inH, inW, - dstStride[0], dstStride[1], dstStride[2], dstStride[3])); - checkCudnn(cudnnGetConvolutionBackwardFilterWorkspaceSize(cudnnContext, cudnnContext.srcTensorDesc, + code = cudnnSetStream(cudnnContext, new CUstream_st(context.getOldStream())); + checkCudnn(false, "cudnnSetStream", code, input, weights, null, delta, kernel, strides, pad, mode, null, bwdFilterAlgo, bwdDataAlgo, convolutionMode, dilation); + + code = cudnnSetTensor4dDescriptorEx(cudnnContext.dstTensorDesc, dataType, miniBatch, inDepth, inH, inW, + dstStride[0], dstStride[1], dstStride[2], dstStride[3]); + checkCudnn(false, "cudnnSetTensor4dDescriptorEx", code, input, weights, null, delta, kernel, strides, pad, mode, null, bwdFilterAlgo, bwdDataAlgo, convolutionMode, dilation); + + code = cudnnGetConvolutionBackwardFilterWorkspaceSize(cudnnContext, cudnnContext.srcTensorDesc, cudnnContext.deltaTensorDesc, cudnnContext.convDesc, cudnnContext.filterDesc, algo1[0], - sizeInBytes)); + sizeInBytes); + checkCudnn(false, "cudnnGetConvolutionBackwardFilterWorkspaceSize", code, input, weights, null, delta, kernel, strides, pad, mode, null, bwdFilterAlgo, bwdDataAlgo, convolutionMode, dilation); + long sizeInBytes1 = sizeInBytes.get(0); - checkCudnn(cudnnGetConvolutionBackwardDataWorkspaceSize(cudnnContext, cudnnContext.filterDesc, + code = cudnnGetConvolutionBackwardDataWorkspaceSize(cudnnContext, cudnnContext.filterDesc, cudnnContext.deltaTensorDesc, cudnnContext.convDesc, cudnnContext.dstTensorDesc, algo2[0], - sizeInBytes)); + sizeInBytes); + checkCudnn(false, "cudnnGetConvolutionBackwardDataWorkspaceSize", code, input, weights, null, delta, kernel, strides, pad, mode, null, bwdFilterAlgo, bwdDataAlgo, convolutionMode, dilation); + long sizeInBytes2 = sizeInBytes.get(0); if (sizeInBytes1 > workSpace.capacity() || sizeInBytes2 > workSpace.capacity()) { workSpace.deallocate(); workSpace = new DataCache(Math.max(sizeInBytes1, sizeInBytes2)); } - checkCudnn(cudnnSetTensor4dDescriptor(cudnnContext.biasTensorDesc, TENSOR_FORMAT, dataType, 1, outDepth, 1, 1)); - checkCudnn(cudnnConvolutionBackwardBias(cudnnContext, alpha, cudnnContext.deltaTensorDesc, deltaData, beta, - cudnnContext.biasTensorDesc, biasGradData)); - checkCudnn(cudnnConvolutionBackwardFilter(cudnnContext, alpha, cudnnContext.srcTensorDesc, srcData, + code = cudnnSetTensor4dDescriptor(cudnnContext.biasTensorDesc, TENSOR_FORMAT, dataType, 1, outDepth, 1, 1); + checkCudnn(false, "cudnnSetTensor4dDescriptor", code, input, weights, null, delta, kernel, strides, pad, mode, null, bwdFilterAlgo, bwdDataAlgo, convolutionMode, dilation); + + code = cudnnConvolutionBackwardBias(cudnnContext, alpha, cudnnContext.deltaTensorDesc, deltaData, beta, + cudnnContext.biasTensorDesc, biasGradData); + checkCudnn(false, "cudnnConvolutionBackwardBias", code, input, weights, null, delta, kernel, strides, pad, mode, null, bwdFilterAlgo, bwdDataAlgo, convolutionMode, dilation); + + code = cudnnConvolutionBackwardFilter(cudnnContext, alpha, cudnnContext.srcTensorDesc, srcData, cudnnContext.deltaTensorDesc, deltaData, cudnnContext.convDesc, algo1[0], workSpace, - workSpace.capacity(), beta, cudnnContext.filterDesc, filterGradData)); - checkCudnn(cudnnConvolutionBackwardData(cudnnContext, alpha, cudnnContext.filterDesc, filterData, + workSpace.capacity(), beta, cudnnContext.filterDesc, filterGradData); + checkCudnn(false, "cudnnConvolutionBackwardFilter", code, input, weights, null, delta, kernel, strides, pad, mode, null, bwdFilterAlgo, bwdDataAlgo, convolutionMode, dilation); + + code = cudnnConvolutionBackwardData(cudnnContext, alpha, cudnnContext.filterDesc, filterData, cudnnContext.deltaTensorDesc, deltaData, cudnnContext.convDesc, algo2[0], workSpace, - workSpace.capacity(), beta, cudnnContext.dstTensorDesc, dstData)); + workSpace.capacity(), beta, cudnnContext.dstTensorDesc, dstData); + checkCudnn(false, "cudnnConvolutionBackwardData", code, input, weights, null, delta, kernel, strides, pad, mode, null, bwdFilterAlgo, bwdDataAlgo, convolutionMode, dilation); allocator.getFlowController().registerActionAllWrite(context, input, weights, weightGradView, biasGradView, delta, epsNext); @@ -344,14 +367,14 @@ public INDArray preOutput(INDArray input, INDArray weights, INDArray bias, int[] code = cudnnSetTensor4dDescriptorEx(cudnnContext.srcTensorDesc, dataType, miniBatch, inDepth, inH, inW, srcStride[0], srcStride[1], srcStride[2], srcStride[3]); - checkCudnn(true, "cudnnSetTensor4dDescriptorEx", code, input, weights, bias, kernel, strides, pad, mode, fwdAlgo, convolutionMode, dilation); + checkCudnn(true, "cudnnSetTensor4dDescriptorEx", code, input, weights, bias, null, kernel, strides, pad, mode, fwdAlgo, null, null, convolutionMode, dilation); code = cudnnSetFilter4dDescriptor(cudnnContext.filterDesc, dataType, TENSOR_FORMAT, outDepth, inDepth, kH, kW); - checkCudnn(true, "cudnnSetFilter4dDescriptor", code, input, weights, bias, kernel, strides, pad, mode, fwdAlgo, convolutionMode, dilation); + checkCudnn(true, "cudnnSetFilter4dDescriptor", code, input, weights, bias, null, kernel, strides, pad, mode, fwdAlgo, null, null, convolutionMode, dilation); code = cudnnSetConvolution2dDescriptor(cudnnContext.convDesc, pad[0], pad[1], strides[0], strides[1], dilation[0], dilation[1], CUDNN_CROSS_CORRELATION, dataType); - checkCudnn(true, "cudnnSetConvolution2dDescriptor", code, input, weights, bias, kernel, strides, pad, mode, fwdAlgo, convolutionMode, dilation); + checkCudnn(true, "cudnnSetConvolution2dDescriptor", code, input, weights, bias, null, kernel, strides, pad, mode, fwdAlgo, null, null, convolutionMode, dilation); // find dimension of convolution output @@ -363,7 +386,7 @@ public INDArray preOutput(INDArray input, INDArray weights, INDArray bias, int[] int[] dstStride = z.stride(); code = cudnnSetTensor4dDescriptorEx(cudnnContext.dstTensorDesc, dataType, miniBatch, outDepth, outSize[0], outSize[1], dstStride[0], dstStride[1], dstStride[2], dstStride[3]); - checkCudnn(true, "cudnnSetTensor4dDescriptorEx", code, input, weights, bias, kernel, strides, pad, mode, fwdAlgo, convolutionMode, dilation); + checkCudnn(true, "cudnnSetTensor4dDescriptorEx", code, input, weights, bias, null, kernel, strides, pad, mode, fwdAlgo, null, null, convolutionMode, dilation); if (mode == AlgoMode.USER_SPECIFIED && fwdAlgo != null) { switch (fwdAlgo) { @@ -403,7 +426,7 @@ public INDArray preOutput(INDArray input, INDArray weights, INDArray bias, int[] cudnnContext.dstTensorDesc, mode == AlgoMode.NO_WORKSPACE ? CUDNN_CONVOLUTION_FWD_NO_WORKSPACE : CUDNN_CONVOLUTION_FWD_PREFER_FASTEST, 0, algo); - checkCudnn(true, "cudnnGetConvolutionForwardAlgorithm", code, input, weights, bias, kernel, strides, pad, mode, fwdAlgo, convolutionMode, dilation); + checkCudnn(true, "cudnnGetConvolutionForwardAlgorithm", code, input, weights, bias, null, kernel, strides, pad, mode, fwdAlgo, null, null, convolutionMode, dilation); } Allocator allocator = AtomicAllocator.getInstance(); @@ -414,12 +437,12 @@ public INDArray preOutput(INDArray input, INDArray weights, INDArray bias, int[] Pointer dstData = allocator.getPointer(z, context); code = cudnnSetStream(cudnnContext, new CUstream_st(context.getOldStream())); - checkCudnn(true, "cudnnSetStream", code, input, weights, bias, kernel, strides, pad, mode, fwdAlgo, convolutionMode, dilation); + checkCudnn(true, "cudnnSetStream", code, input, weights, bias, null, kernel, strides, pad, mode, fwdAlgo, null, null, convolutionMode, dilation); code = cudnnGetConvolutionForwardWorkspaceSize(cudnnContext, cudnnContext.srcTensorDesc, cudnnContext.filterDesc, cudnnContext.convDesc, cudnnContext.dstTensorDesc, algo[0], sizeInBytes); - checkCudnn(true, "cudnnGetConvolutionForwardWorkspaceSize", code, input, weights, bias, kernel, strides, pad, mode, fwdAlgo, convolutionMode, dilation); + checkCudnn(true, "cudnnGetConvolutionForwardWorkspaceSize", code, input, weights, bias, null, kernel, strides, pad, mode, fwdAlgo, null, null, convolutionMode, dilation); if (sizeInBytes.get(0) > workSpace.capacity()) { workSpace.deallocate(); @@ -428,15 +451,15 @@ public INDArray preOutput(INDArray input, INDArray weights, INDArray bias, int[] code = cudnnConvolutionForward(cudnnContext, alpha, cudnnContext.srcTensorDesc, srcData, cudnnContext.filterDesc, filterData, cudnnContext.convDesc, algo[0], workSpace, workSpace.capacity(), beta, cudnnContext.dstTensorDesc, dstData); - checkCudnn(true, "cudnnConvolutionForward", code, input, weights, bias, kernel, strides, pad, mode, fwdAlgo, convolutionMode, dilation); + checkCudnn(true, "cudnnConvolutionForward", code, input, weights, bias, null, kernel, strides, pad, mode, fwdAlgo, null, null, convolutionMode, dilation); code = cudnnSetTensor4dDescriptor(cudnnContext.biasTensorDesc, TENSOR_FORMAT, dataType, 1, outDepth, 1, 1); - checkCudnn(true, "cudnnSetTensor4dDescriptor", code, input, weights, bias, kernel, strides, pad, mode, fwdAlgo, convolutionMode, dilation); + checkCudnn(true, "cudnnSetTensor4dDescriptor", code, input, weights, bias, null, kernel, strides, pad, mode, fwdAlgo, null, null, convolutionMode, dilation); code = cudnnAddTensor(cudnnContext, alpha, cudnnContext.biasTensorDesc, biasData, alpha, cudnnContext.dstTensorDesc, dstData); - checkCudnn(true, "cudnnAddTensor", code, input, weights, bias, kernel, strides, pad, mode, fwdAlgo, convolutionMode, dilation); + checkCudnn(true, "cudnnAddTensor", code, input, weights, bias, null, kernel, strides, pad, mode, fwdAlgo, null, null, convolutionMode, dilation); allocator.registerAction(context, z, input, weights, bias); @@ -446,25 +469,34 @@ public INDArray preOutput(INDArray input, INDArray weights, INDArray bias, int[] return z; } - private void checkCudnn(boolean forward, String step, int code, INDArray input, INDArray weights, INDArray bias, int[] kernel, int[] strides, int[] pad, - AlgoMode mode, FwdAlgo fwdAlgo, ConvolutionMode convolutionMode, int[] dilation){ + private void checkCudnn(boolean forward, String step, int code, INDArray input, INDArray weights, INDArray bias, INDArray delta, + int[] kernel, int[] strides, int[] pad, + AlgoMode mode, FwdAlgo fwdAlgo, BwdFilterAlgo bwdFilterAlgo, BwdDataAlgo bwdDataAlgo, ConvolutionMode convolutionMode, int[] dilation) { - if(code != CUDNN_STATUS_SUCCESS) { + if (code != CUDNN_STATUS_SUCCESS) { StringBuilder sb = new StringBuilder(); - sb.append("CUDA error =").append(code).append(": ").append(cudaGetErrorString(code).getString()) + sb.append("CuDNN error = ").append(code).append(": ").append(cudnnGetErrorString(code).getString()) .append(" during ") .append(forward ? "forward pass" : "backward pass") .append(" - step ").append(step) .append(": inputShape=").append(Arrays.toString(input.shape())) .append(", weightsShape=").append(Arrays.toString(weights.shape())) - .append(", biasShape=").append(bias == null ? null : Arrays.toString(bias.shape())) - .append(", kernel=").append(Arrays.toString(kernel)) + .append(", biasShape=").append(bias == null ? null : Arrays.toString(bias.shape())); + if (!forward) { + sb.append(", gradientShape=").append(Arrays.toString(delta.shape())); + } + sb.append(", kernel=").append(Arrays.toString(kernel)) .append(", stride=").append(Arrays.toString(strides)) .append(", padding=").append(Arrays.toString(pad)) .append(", dilation=").append(Arrays.toString(dilation)) - .append(", AlgoMode=").append(mode) - .append(", fwdAlgo=").append(fwdAlgo) - .append(", convolutionMode=").append(convolutionMode); + .append(", AlgoMode=").append(mode); + if (forward) { + sb.append(", fwdAlgo=").append(fwdAlgo); + } else { + sb.append(", bwdFilterAlgo=").append(bwdFilterAlgo) + .append(", bwdDataAlgo=").append(bwdDataAlgo); + } + sb.append(", convolutionMode=").append(convolutionMode); throw new RuntimeException(sb.toString()); } From ad2095bb34cf1a438389876699ac272d8789614f Mon Sep 17 00:00:00 2001 From: Alex Black Date: Wed, 28 Mar 2018 19:20:54 +1100 Subject: [PATCH 3/8] Temporarily add a copy of gradient check in deeplearning4j-cuda --- .../java/org/deeplearning4j/BaseDL4JTest.java | 25 + .../java/org/deeplearning4j/TestUtils.java | 102 ++ .../gradientcheck/CNNGradientCheckTest.java | 1188 +++++++++++++++++ 3 files changed, 1315 insertions(+) create mode 100644 deeplearning4j-cuda/src/test/java/org/deeplearning4j/BaseDL4JTest.java create mode 100644 deeplearning4j-cuda/src/test/java/org/deeplearning4j/TestUtils.java create mode 100644 deeplearning4j-cuda/src/test/java/org/deeplearning4j/gradientcheck/CNNGradientCheckTest.java diff --git a/deeplearning4j-cuda/src/test/java/org/deeplearning4j/BaseDL4JTest.java b/deeplearning4j-cuda/src/test/java/org/deeplearning4j/BaseDL4JTest.java new file mode 100644 index 000000000000..60c5c3c1a585 --- /dev/null +++ b/deeplearning4j-cuda/src/test/java/org/deeplearning4j/BaseDL4JTest.java @@ -0,0 +1,25 @@ +package org.deeplearning4j; + +import org.junit.After; +import org.junit.Before; +import org.nd4j.linalg.api.ops.executioner.OpExecutioner; +import org.nd4j.linalg.factory.Nd4j; + +public class BaseDL4JTest { + + public OpExecutioner.ProfilingMode getProfilingMode(){ + return OpExecutioner.ProfilingMode.SCOPE_PANIC; + } + + @Before + public void beforeTest(){ + Nd4j.getExecutioner().setProfilingMode(getProfilingMode()); + } + + @After + public void afterTest(){ + //Attempt to keep workspaces isolated between tests + Nd4j.getWorkspaceManager().destroyAllWorkspacesForCurrentThread(); + } + +} diff --git a/deeplearning4j-cuda/src/test/java/org/deeplearning4j/TestUtils.java b/deeplearning4j-cuda/src/test/java/org/deeplearning4j/TestUtils.java new file mode 100644 index 000000000000..e5d9477a2b32 --- /dev/null +++ b/deeplearning4j-cuda/src/test/java/org/deeplearning4j/TestUtils.java @@ -0,0 +1,102 @@ +package org.deeplearning4j; + +import org.deeplearning4j.nn.graph.ComputationGraph; +import org.deeplearning4j.nn.multilayer.MultiLayerNetwork; +import org.deeplearning4j.util.ModelSerializer; +import org.nd4j.linalg.api.ndarray.INDArray; +import org.nd4j.linalg.api.ops.random.impl.BernoulliDistribution; +import org.nd4j.linalg.factory.Nd4j; + +import java.io.ByteArrayInputStream; +import java.io.ByteArrayOutputStream; +import java.io.IOException; +import java.util.Random; + +import static org.junit.Assert.assertEquals; + +public class TestUtils { + + public static MultiLayerNetwork testModelSerialization(MultiLayerNetwork net){ + + try { + ByteArrayOutputStream baos = new ByteArrayOutputStream(); + ModelSerializer.writeModel(net, baos, true); + byte[] bytes = baos.toByteArray(); + + ByteArrayInputStream bais = new ByteArrayInputStream(bytes); + MultiLayerNetwork restored = ModelSerializer.restoreMultiLayerNetwork(bais, true); + + assertEquals(net.getLayerWiseConfigurations(), restored.getLayerWiseConfigurations()); + assertEquals(net.params(), restored.params()); + + return restored; + } catch (IOException e){ + //Should never happen + throw new RuntimeException(e); + } + } + + public static ComputationGraph testModelSerialization(ComputationGraph net){ + + try { + ByteArrayOutputStream baos = new ByteArrayOutputStream(); + ModelSerializer.writeModel(net, baos, true); + byte[] bytes = baos.toByteArray(); + + ByteArrayInputStream bais = new ByteArrayInputStream(bytes); + ComputationGraph restored = ModelSerializer.restoreComputationGraph(bais, true); + + assertEquals(net.getConfiguration(), restored.getConfiguration()); + assertEquals(net.params(), restored.params()); + + return restored; + } catch (IOException e){ + //Should never happen + throw new RuntimeException(e); + } + } + + public static INDArray randomOneHot(int examples, int nOut){ + return randomOneHot(examples, nOut, new Random(12345)); + } + + public static INDArray randomOneHot(int examples, int nOut, long rngSeed){ + return randomOneHot(examples, nOut, new Random(rngSeed)); + } + + public static INDArray randomOneHot(int examples, int nOut, Random rng){ + INDArray arr = Nd4j.create(examples, nOut); + for( int i=0; i (mb,4,2,2) + .layer(new OutputLayer.Builder(LossFunctions.LossFunction.MCXENT) + .activation(Activation.SOFTMAX).nIn(2 * 2 * 4) + .nOut(nOut).build()) + .setInputType(InputType.convolutionalFlat(height, width, inputDepth)) + .build(); + + MultiLayerNetwork net = new MultiLayerNetwork(conf); + net.init(); + + String msg = "PoolingType=" + poolingType + ", minibatch=" + minibatchSize + ", activationFn=" + + afn; + + if (PRINT_RESULTS) { + System.out.println(msg); + for (int j = 0; j < net.getnLayers(); j++) + System.out.println("Layer " + j + " # params: " + net.getLayer(j).numParams()); + } + boolean gradOK = GradientCheckUtil.checkGradients(net, DEFAULT_EPS, DEFAULT_MAX_REL_ERROR, + DEFAULT_MIN_ABS_ERROR, PRINT_RESULTS, RETURN_ON_FIRST_FAILURE, input, labels); + + assertTrue(msg, gradOK); + + TestUtils.testModelSerialization(net); + } + } + } + + @Test + public void testCnnWithSpaceToBatch() { + Nd4j.getRandom().setSeed(12345); + int nOut = 4; + + int[] minibatchSizes = {2, 4}; + int width = 5; + int height = 5; + int inputDepth = 1; + + int[] kernel = {2, 2}; + int[] blocks = {1, 1}; + + String[] activations = {"sigmoid", "tanh"}; + SubsamplingLayer.PoolingType[] poolingTypes = + new SubsamplingLayer.PoolingType[]{SubsamplingLayer.PoolingType.MAX, + SubsamplingLayer.PoolingType.AVG, SubsamplingLayer.PoolingType.PNORM}; + + for (String afn : activations) { + for (SubsamplingLayer.PoolingType poolingType : poolingTypes) { + for (int minibatchSize : minibatchSizes) { + INDArray input = Nd4j.rand(minibatchSize, width * height * inputDepth); + INDArray labels = Nd4j.zeros(minibatchSize, nOut); + for (int i = 0; i < minibatchSize; i++) { + labels.putScalar(new int[]{i, i % nOut}, 1.0); + } + + MultiLayerConfiguration conf = + new NeuralNetConfiguration.Builder() + .updater(new NoOp()).weightInit(WeightInit.DISTRIBUTION) + .dist(new NormalDistribution(0, 1)) + .list().layer(new ConvolutionLayer.Builder(kernel).nIn(inputDepth) + .nOut(3).build())//output: (5-2+0)/1+1 = 4 + .layer(new SpaceToBatchLayer.Builder(blocks).build()) //trivial space to batch + .layer(new OutputLayer.Builder(LossFunctions.LossFunction.MCXENT) + .activation(Activation.SOFTMAX).nIn(4 * 4 * 3) + .nOut(nOut).build()) + .setInputType(InputType.convolutionalFlat(height, width, inputDepth)) + .build(); + + MultiLayerNetwork net = new MultiLayerNetwork(conf); + net.init(); + + String msg = "PoolingType=" + poolingType + ", minibatch=" + minibatchSize + ", activationFn=" + + afn; + + if (PRINT_RESULTS) { + System.out.println(msg); + for (int j = 0; j < net.getnLayers(); j++) + System.out.println("Layer " + j + " # params: " + net.getLayer(j).numParams()); + } + boolean gradOK = GradientCheckUtil.checkGradients(net, DEFAULT_EPS, DEFAULT_MAX_REL_ERROR, + DEFAULT_MIN_ABS_ERROR, PRINT_RESULTS, RETURN_ON_FIRST_FAILURE, input, labels); + + assertTrue(msg, gradOK); + + TestUtils.testModelSerialization(net); + } + } + } + } + + + @Test + public void testCnnWithUpsampling() { + Nd4j.getRandom().setSeed(12345); + int nOut = 4; + + int[] minibatchSizes = {1, 3}; + int width = 5; + int height = 5; + int inputDepth = 1; + + int[] kernel = {2, 2}; + int[] stride = {1, 1}; + int[] padding = {0, 0}; + int size = 2; + + String[] activations = {"sigmoid", "tanh"}; + SubsamplingLayer.PoolingType[] poolingTypes = + new SubsamplingLayer.PoolingType[]{SubsamplingLayer.PoolingType.MAX, + SubsamplingLayer.PoolingType.AVG, SubsamplingLayer.PoolingType.PNORM}; + + for (String afn : activations) { + for (SubsamplingLayer.PoolingType poolingType : poolingTypes) { + for (int minibatchSize : minibatchSizes) { + INDArray input = Nd4j.rand(minibatchSize, width * height * inputDepth); + INDArray labels = Nd4j.zeros(minibatchSize, nOut); + for (int i = 0; i < minibatchSize; i++) { + labels.putScalar(new int[]{i, i % nOut}, 1.0); + } + + MultiLayerConfiguration conf = + new NeuralNetConfiguration.Builder() + .updater(new NoOp()).weightInit(WeightInit.DISTRIBUTION) + .dist(new NormalDistribution(0, 1)) + .list().layer(new ConvolutionLayer.Builder(kernel, + stride, padding).nIn(inputDepth) + .nOut(3).build())//output: (5-2+0)/1+1 = 4 + .layer(new Upsampling2D.Builder().size(size).build()) //output: 4*2 =8 -> 8x8x3 + .layer(new OutputLayer.Builder(LossFunctions.LossFunction.MCXENT) + .activation(Activation.SOFTMAX).nIn(8 * 8 * 3) + .nOut(4).build()) + .setInputType(InputType.convolutionalFlat(height, width, + inputDepth)) + .build(); + + MultiLayerNetwork net = new MultiLayerNetwork(conf); + net.init(); + + String msg = "PoolingType=" + poolingType + ", minibatch=" + minibatchSize + ", activationFn=" + + afn; + + if (PRINT_RESULTS) { + System.out.println(msg); + for (int j = 0; j < net.getnLayers(); j++) + System.out.println("Layer " + j + " # params: " + net.getLayer(j).numParams()); + } + + boolean gradOK = GradientCheckUtil.checkGradients(net, DEFAULT_EPS, DEFAULT_MAX_REL_ERROR, + DEFAULT_MIN_ABS_ERROR, PRINT_RESULTS, RETURN_ON_FIRST_FAILURE, input, labels); + + assertTrue(msg, gradOK); + + TestUtils.testModelSerialization(net); + } + } + } + } + + + @Test + public void testCnnWithSubsampling() { + Nd4j.getRandom().setSeed(12345); + int nOut = 4; + + int[] minibatchSizes = {1, 3}; + int width = 5; + int height = 5; + int inputDepth = 1; + + int[] kernel = {2, 2}; + int[] stride = {1, 1}; + int[] padding = {0, 0}; + int pnorm = 2; + + Activation[] activations = {Activation.SIGMOID, Activation.TANH}; + SubsamplingLayer.PoolingType[] poolingTypes = + new SubsamplingLayer.PoolingType[]{SubsamplingLayer.PoolingType.MAX, + SubsamplingLayer.PoolingType.AVG, SubsamplingLayer.PoolingType.PNORM}; + + for (Activation afn : activations) { + for (SubsamplingLayer.PoolingType poolingType : poolingTypes) { + for (int minibatchSize : minibatchSizes) { + INDArray input = Nd4j.rand(minibatchSize, width * height * inputDepth); + INDArray labels = Nd4j.zeros(minibatchSize, nOut); + for (int i = 0; i < minibatchSize; i++) { + labels.putScalar(new int[]{i, i % nOut}, 1.0); + } + + MultiLayerConfiguration conf = + new NeuralNetConfiguration.Builder().updater(new NoOp()) + .weightInit(WeightInit.DISTRIBUTION) + .dist(new NormalDistribution(0, 1)) + .list().layer(0, + new ConvolutionLayer.Builder(kernel, + stride, padding).nIn(inputDepth) + .nOut(3).build())//output: (5-2+0)/1+1 = 4 + .layer(1, new SubsamplingLayer.Builder(poolingType) + .kernelSize(kernel).stride(stride).padding(padding) + .pnorm(pnorm).build()) //output: (4-2+0)/1+1 =3 -> 3x3x3 + .layer(2, new OutputLayer.Builder(LossFunctions.LossFunction.MCXENT) + .activation(Activation.SOFTMAX).nIn(3 * 3 * 3) + .nOut(4).build()) + .setInputType(InputType.convolutionalFlat(height, width, + inputDepth)) + .build(); + + MultiLayerNetwork net = new MultiLayerNetwork(conf); + net.init(); + + String msg = "PoolingType=" + poolingType + ", minibatch=" + minibatchSize + ", activationFn=" + + afn; + + if (PRINT_RESULTS) { + System.out.println(msg); + for (int j = 0; j < net.getnLayers(); j++) + System.out.println("Layer " + j + " # params: " + net.getLayer(j).numParams()); + } + + boolean gradOK = GradientCheckUtil.checkGradients(net, DEFAULT_EPS, DEFAULT_MAX_REL_ERROR, + DEFAULT_MIN_ABS_ERROR, PRINT_RESULTS, RETURN_ON_FIRST_FAILURE, input, labels); + + assertTrue(msg, gradOK); + + TestUtils.testModelSerialization(net); + } + } + } + } + + @Test + public void testCnnWithSubsamplingV2() { + Nd4j.getRandom().setSeed(12345); + int nOut = 4; + + int[] minibatchSizes = {1, 3}; + int width = 5; + int height = 5; + int inputDepth = 1; + + int[] kernel = {2, 2}; + int[] stride = {1, 1}; + int[] padding = {0, 0}; + int pNorm = 3; + + Activation[] activations = {Activation.SIGMOID, Activation.TANH}; + SubsamplingLayer.PoolingType[] poolingTypes = + new SubsamplingLayer.PoolingType[]{SubsamplingLayer.PoolingType.MAX, + SubsamplingLayer.PoolingType.AVG, SubsamplingLayer.PoolingType.PNORM}; + + for (Activation afn : activations) { + for (SubsamplingLayer.PoolingType poolingType : poolingTypes) { + for (int minibatchSize : minibatchSizes) { + INDArray input = Nd4j.rand(minibatchSize, width * height * inputDepth); + INDArray labels = Nd4j.zeros(minibatchSize, nOut); + for (int i = 0; i < minibatchSize; i++) { + labels.putScalar(new int[]{i, i % nOut}, 1.0); + } + + MultiLayerConfiguration conf = + new NeuralNetConfiguration.Builder().updater(new NoOp()).weightInit(WeightInit.DISTRIBUTION) + .dist(new NormalDistribution(0, 1)) + .list().layer(0, + new ConvolutionLayer.Builder(kernel, + stride, padding).nIn(inputDepth) + .nOut(3).build())//output: (5-2+0)/1+1 = 4 + .layer(1, new SubsamplingLayer.Builder(poolingType) + .kernelSize(kernel).stride(stride).padding(padding) + .pnorm(pNorm).build()) //output: (4-2+0)/1+1 =3 -> 3x3x3 + .layer(2, new ConvolutionLayer.Builder(kernel, stride, padding) + .nIn(3).nOut(2).build()) //Output: (3-2+0)/1+1 = 2 + .layer(3, new OutputLayer.Builder(LossFunctions.LossFunction.MCXENT) + .activation(Activation.SOFTMAX).nIn(2 * 2 * 2) + .nOut(4).build()) + .setInputType(InputType.convolutionalFlat(height, width, + inputDepth)) + .build(); + + MultiLayerNetwork net = new MultiLayerNetwork(conf); + net.init(); + + String msg = "PoolingType=" + poolingType + ", minibatch=" + minibatchSize + ", activationFn=" + + afn; + System.out.println(msg); + + boolean gradOK = GradientCheckUtil.checkGradients(net, DEFAULT_EPS, DEFAULT_MAX_REL_ERROR, + DEFAULT_MIN_ABS_ERROR, PRINT_RESULTS, RETURN_ON_FIRST_FAILURE, input, labels); + + assertTrue(msg, gradOK); + + TestUtils.testModelSerialization(net); + } + } + } + } + + @Test + public void testCnnMultiLayer() { + int nOut = 2; + + int[] minibatchSizes = {1, 2, 5}; + int width = 5; + int height = 5; + int[] inputDepths = {1, 2, 4}; + + Activation[] activations = {Activation.SIGMOID, Activation.TANH}; + SubsamplingLayer.PoolingType[] poolingTypes = new SubsamplingLayer.PoolingType[]{ + SubsamplingLayer.PoolingType.MAX, SubsamplingLayer.PoolingType.AVG}; + + Nd4j.getRandom().setSeed(12345); + + for (int inputDepth : inputDepths) { + for (Activation afn : activations) { + for (SubsamplingLayer.PoolingType poolingType : poolingTypes) { + for (int minibatchSize : minibatchSizes) { + INDArray input = Nd4j.rand(minibatchSize, width * height * inputDepth); + INDArray labels = Nd4j.zeros(minibatchSize, nOut); + for (int i = 0; i < minibatchSize; i++) { + labels.putScalar(new int[]{i, i % nOut}, 1.0); + } + + MultiLayerConfiguration conf = new NeuralNetConfiguration.Builder().seed(12345).updater(new NoOp()) + .activation(afn) + .list() + .layer(0, new ConvolutionLayer.Builder().kernelSize(2, 2).stride(1, 1) + .padding(0, 0).nIn(inputDepth).nOut(2).build())//output: (5-2+0)/1+1 = 4 + .layer(1, new ConvolutionLayer.Builder().nIn(2).nOut(2).kernelSize(2, 2) + .stride(1, 1).padding(0, 0).build()) //(4-2+0)/1+1 = 3 + .layer(2, new ConvolutionLayer.Builder().nIn(2).nOut(2).kernelSize(2, 2) + .stride(1, 1).padding(0, 0).build()) //(3-2+0)/1+1 = 2 + .layer(3, new OutputLayer.Builder(LossFunctions.LossFunction.MCXENT) + .activation(Activation.SOFTMAX).nIn(2 * 2 * 2).nOut(nOut) + .build()) + .setInputType(InputType.convolutionalFlat(height, width, inputDepth)).build(); + + assertEquals(ConvolutionMode.Truncate, + ((ConvolutionLayer) conf.getConf(0).getLayer()).getConvolutionMode()); + + MultiLayerNetwork net = new MultiLayerNetwork(conf); + net.init(); + + for (int i = 0; i < 4; i++) { + System.out.println("nParams, layer " + i + ": " + net.getLayer(i).numParams()); + } + + String msg = "PoolingType=" + poolingType + ", minibatch=" + minibatchSize + ", activationFn=" + + afn; + System.out.println(msg); + + boolean gradOK = GradientCheckUtil.checkGradients(net, DEFAULT_EPS, DEFAULT_MAX_REL_ERROR, + DEFAULT_MIN_ABS_ERROR, PRINT_RESULTS, RETURN_ON_FIRST_FAILURE, input, labels); + + assertTrue(msg, gradOK); + + TestUtils.testModelSerialization(net); + } + } + } + } + } + + + @Test + public void testCnnSamePaddingMode() { + int nOut = 2; + + int[] minibatchSizes = {1, 3}; + int width = 5; + int[] heights = new int[]{4, 5, 6}; //Same padding mode: insensitive to exact input size... + int[] kernelSizes = new int[]{2, 3}; + int[] inputDepths = {1, 2, 4}; + + Nd4j.getRandom().setSeed(12345); + + for (int inputDepth : inputDepths) { + for (int minibatchSize : minibatchSizes) { + for (int height : heights) { + for (int k : kernelSizes) { + + INDArray input = Nd4j.rand(minibatchSize, width * height * inputDepth); + INDArray labels = Nd4j.zeros(minibatchSize, nOut); + for (int i = 0; i < minibatchSize; i++) { + labels.putScalar(new int[]{i, i % nOut}, 1.0); + } + + MultiLayerConfiguration conf = new NeuralNetConfiguration.Builder().seed(12345) + .updater(new NoOp()) + .activation(Activation.TANH).convolutionMode(Same).list() + .layer(0, new ConvolutionLayer.Builder().name("layer 0").kernelSize(k, k) + .stride(1, 1).padding(0, 0).nIn(inputDepth).nOut(2).build()) + .layer(1, new SubsamplingLayer.Builder() + .poolingType(SubsamplingLayer.PoolingType.MAX).kernelSize(k, k) + .stride(1, 1).padding(0, 0).build()) + .layer(2, new ConvolutionLayer.Builder().nIn(2).nOut(2).kernelSize(k, k) + .stride(1, 1).padding(0, 0).build()) + .layer(3, new OutputLayer.Builder(LossFunctions.LossFunction.MCXENT) + .activation(Activation.SOFTMAX).nOut(nOut).build()) + .setInputType(InputType.convolutionalFlat(height, width, inputDepth)).build(); + + MultiLayerNetwork net = new MultiLayerNetwork(conf); + net.init(); + + for (int i = 0; i < net.getLayers().length; i++) { + System.out.println("nParams, layer " + i + ": " + net.getLayer(i).numParams()); + } + + String msg = "Minibatch=" + minibatchSize + ", inDepth=" + inputDepth + ", height=" + height + + ", kernelSize=" + k; + System.out.println(msg); + + boolean gradOK = GradientCheckUtil.checkGradients(net, DEFAULT_EPS, DEFAULT_MAX_REL_ERROR, + DEFAULT_MIN_ABS_ERROR, PRINT_RESULTS, RETURN_ON_FIRST_FAILURE, input, labels); + + assertTrue(msg, gradOK); + + TestUtils.testModelSerialization(net); + } + } + } + } + } + + @Test + public void testCnnSamePaddingModeStrided() { + int nOut = 2; + + int[] minibatchSizes = {1, 3}; + int width = 16; + int height = 16; + int[] kernelSizes = new int[]{2, 3}; + int[] strides = {1, 2, 3}; + int[] inputDepths = {1, 3}; + + Nd4j.getRandom().setSeed(12345); + + for (int inputDepth : inputDepths) { + for (int minibatchSize : minibatchSizes) { + for (int stride : strides) { + for (int k : kernelSizes) { + for (boolean convFirst : new boolean[]{true, false}) { + + INDArray input = Nd4j.rand(minibatchSize, width * height * inputDepth); + INDArray labels = Nd4j.zeros(minibatchSize, nOut); + for (int i = 0; i < minibatchSize; i++) { + labels.putScalar(new int[]{i, i % nOut}, 1.0); + } + + Layer convLayer = new ConvolutionLayer.Builder().name("layer 0").kernelSize(k, k) + .stride(stride, stride).padding(0, 0).nIn(inputDepth).nOut(2).build(); + + Layer poolLayer = new SubsamplingLayer.Builder() + .poolingType(SubsamplingLayer.PoolingType.MAX).kernelSize(k, k) + .stride(stride, stride).padding(0, 0).build(); + + MultiLayerConfiguration conf = new NeuralNetConfiguration.Builder().seed(12345) + .updater(new NoOp()) + .activation(Activation.TANH).convolutionMode(Same).list() + .layer(0, convFirst ? convLayer : poolLayer) + .layer(1, convFirst ? poolLayer : convLayer) + .layer(2, new OutputLayer.Builder(LossFunctions.LossFunction.MCXENT) + .activation(Activation.SOFTMAX).nOut(nOut).build()) + .setInputType(InputType.convolutionalFlat(height, width, inputDepth)) + .build(); + + MultiLayerNetwork net = new MultiLayerNetwork(conf); + net.init(); + + for (int i = 0; i < net.getLayers().length; i++) { + System.out.println("nParams, layer " + i + ": " + net.getLayer(i).numParams()); + } + + String msg = "Minibatch=" + minibatchSize + ", inDepth=" + inputDepth + ", height=" + height + + ", kernelSize=" + k + ", stride = " + stride + ", convLayer first = " + + convFirst; + System.out.println(msg); + + boolean gradOK = GradientCheckUtil.checkGradients(net, DEFAULT_EPS, DEFAULT_MAX_REL_ERROR, + DEFAULT_MIN_ABS_ERROR, PRINT_RESULTS, RETURN_ON_FIRST_FAILURE, input, + labels); + + assertTrue(msg, gradOK); + + TestUtils.testModelSerialization(net); + } + } + } + } + } + } + + + @Test + public void testCnnZeroPaddingLayer() { + Nd4j.getRandom().setSeed(12345); + int nOut = 4; + + int[] minibatchSizes = {1, 3}; + int width = 6; + int height = 6; + int[] inputDepths = {1, 3}; + + int[] kernel = {2, 2}; + int[] stride = {1, 1}; + int[] padding = {0, 0}; + + int[][] zeroPadLayer = new int[][]{{0, 0, 0, 0}, {1, 1, 0, 0}, {2, 2, 2, 2}}; + + for (int inputDepth : inputDepths) { + for (int minibatchSize : minibatchSizes) { + INDArray input = Nd4j.rand(new int[]{minibatchSize, inputDepth, height, width}); + INDArray labels = Nd4j.zeros(minibatchSize, nOut); + for (int i = 0; i < minibatchSize; i++) { + labels.putScalar(new int[]{i, i % nOut}, 1.0); + } + for (int[] zeroPad : zeroPadLayer) { + + MultiLayerConfiguration conf = + new NeuralNetConfiguration.Builder().updater(new NoOp()).weightInit(WeightInit.DISTRIBUTION) + .dist(new NormalDistribution(0, 1)).list() + .layer(0, new ConvolutionLayer.Builder(kernel, stride, padding) + .nIn(inputDepth).nOut(3).build())//output: (6-2+0)/1+1 = 5 + .layer(1, new ZeroPaddingLayer.Builder(zeroPad).build()).layer(2, + new ConvolutionLayer.Builder(kernel, stride, + padding).nIn(3).nOut(3).build())//output: (6-2+0)/1+1 = 5 + .layer(3, new OutputLayer.Builder(LossFunctions.LossFunction.MCXENT) + .activation(Activation.SOFTMAX).nOut(4).build()) + .setInputType(InputType.convolutional(height, width, inputDepth)) + .build(); + + MultiLayerNetwork net = new MultiLayerNetwork(conf); + net.init(); + + //Check zero padding activation shape + org.deeplearning4j.nn.layers.convolution.ZeroPaddingLayer zpl = + (org.deeplearning4j.nn.layers.convolution.ZeroPaddingLayer) net.getLayer(1); + int[] expShape = new int[]{minibatchSize, inputDepth, height + zeroPad[0] + zeroPad[1], + width + zeroPad[2] + zeroPad[3]}; + INDArray out = zpl.activate(input); + assertArrayEquals(expShape, out.shape()); + + String msg = "minibatch=" + minibatchSize + ", depth=" + inputDepth + ", zeroPad = " + + Arrays.toString(zeroPad); + + if (PRINT_RESULTS) { + System.out.println(msg); + for (int j = 0; j < net.getnLayers(); j++) + System.out.println("Layer " + j + " # params: " + net.getLayer(j).numParams()); + } + + boolean gradOK = GradientCheckUtil.checkGradients(net, DEFAULT_EPS, DEFAULT_MAX_REL_ERROR, + DEFAULT_MIN_ABS_ERROR, PRINT_RESULTS, RETURN_ON_FIRST_FAILURE, input, labels); + + assertTrue(msg, gradOK); + + TestUtils.testModelSerialization(net); + } + } + } + } + + @Test(expected = IllegalArgumentException.class) + public void testDeconvolution2DUnsupportedSameModeLayer() { + /* + * When setting border mode same on layer directly, we can catch + * it in the builder + */ + Deconvolution2D.Builder deconv = new Deconvolution2D.Builder(); + deconv.convolutionMode(Same); + } + + @Test(expected = IllegalArgumentException.class) + public void testDeconvolution2DUnsupportedSameModeNetwork() { + /* + * When convolution mode Same is set for the network and a deconvolution layer is added + * then only layer activation will fail. Suboptimal, but I don't think we want special + * logic for NNC in this case. + */ + NeuralNetConfiguration.ListBuilder b = new NeuralNetConfiguration.Builder().seed(12345) + .updater(new NoOp()) + .activation(Activation.SIGMOID) + .convolutionMode(Same) + .list() + .layer(new Deconvolution2D.Builder().name("deconvolution") + .nIn(3).nOut(2).build()); + + MultiLayerConfiguration conf = b.layer(new OutputLayer.Builder(LossFunctions.LossFunction.MCXENT) + .activation(Activation.SOFTMAX).nOut(2).build()) + .setInputType(InputType.convolutionalFlat(7, 7, 3)).build(); + + MultiLayerNetwork net = new MultiLayerNetwork(conf); + net.init(); + net.getLayer(0).activate(Nd4j.rand(10, 7 * 7 * 3)); + } + + @Test + public void testDeconvolution2D() { + int nOut = 2; + + int[] minibatchSizes = new int[]{1, 3, 1, 3, 1, 3, 1, 3}; + int[] kernelSizes = new int[]{1, 1, 3, 3, 1, 1, 3, 3}; + int[] strides = {1, 1, 1, 1, 2, 2, 2, 2}; + int[] dilation = {1, 2, 2, 1, 1, 1, 2, 2}; + Activation[] activations = new Activation[]{Activation.SIGMOID, Activation.TANH, Activation.TANH, Activation.TANH, Activation.TANH, Activation.SIGMOID, Activation.SIGMOID, Activation.SIGMOID}; + ConvolutionMode[] cModes = new ConvolutionMode[]{Truncate, Truncate, Truncate, Truncate, Truncate, Truncate, Truncate, Truncate}; + int width = 7; + int height = 7; + int inputDepth = 3; + + Nd4j.getRandom().setSeed(12345); + + for (int i = 0; i < minibatchSizes.length; i++) { + int minibatchSize = minibatchSizes[i]; + int k = kernelSizes[i]; + int s = strides[i]; + int d = dilation[i]; + ConvolutionMode cm = cModes[i]; + Activation act = activations[i]; + + + int w = d * width; + int h = d * height; + + INDArray input = Nd4j.rand(minibatchSize, w * h * inputDepth); + INDArray labels = Nd4j.zeros(minibatchSize, nOut); + for (int j = 0; j < minibatchSize; j++) { + labels.putScalar(new int[]{j, j % nOut}, 1.0); + } + + NeuralNetConfiguration.ListBuilder b = new NeuralNetConfiguration.Builder().seed(12345) + .updater(new NoOp()) + .activation(act) + .list() + .layer(new Deconvolution2D.Builder().name("deconvolution_2D_layer") + .kernelSize(k, k) + .stride(s, s) + .dilation(d, d) + .convolutionMode(cm) + .nIn(inputDepth).nOut(nOut).build()); + + MultiLayerConfiguration conf = b.layer(new OutputLayer.Builder(LossFunctions.LossFunction.MCXENT) + .activation(Activation.SOFTMAX).nOut(nOut).build()) + .setInputType(InputType.convolutionalFlat(h, w, inputDepth)).build(); + + MultiLayerNetwork net = new MultiLayerNetwork(conf); + net.init(); + + for (int j = 0; j < net.getLayers().length; j++) { + System.out.println("nParams, layer " + j + ": " + net.getLayer(j).numParams()); + } + + String msg = " - mb=" + minibatchSize + ", k=" + + k + ", s=" + s + ", d=" + d + ", cm=" + cm; + System.out.println(msg); + + boolean gradOK = GradientCheckUtil.checkGradients(net, DEFAULT_EPS, DEFAULT_MAX_REL_ERROR, + DEFAULT_MIN_ABS_ERROR, PRINT_RESULTS, RETURN_ON_FIRST_FAILURE, input, labels); + + assertTrue(msg, gradOK); + + TestUtils.testModelSerialization(net); + } + } + + @Test + public void testSeparableConv2D() { + int nOut = 2; + + int[] minibatchSizes = new int[] {1, 3}; + int width = 8; + int height = 8; + int inputDepth = 3; + int[] kernelSizes = new int[]{2, 3}; + int[] strides = {1, 2}; + int[] dilation = {1, 2}; + ConvolutionMode[] cModes = new ConvolutionMode[]{ConvolutionMode.Truncate}; + + Nd4j.getRandom().setSeed(12345); + + int[] ks = new int[]{1,3,1,3,1,3,1,3}; + int[] ss = new int[]{1,1,2,2,1,1,2,2}; + int[] ds = new int[]{1,1,1,1,2,2,2,2}; + ConvolutionMode[] cms = new ConvolutionMode[]{Truncate, Truncate, Truncate, Truncate, Truncate, Truncate, Truncate, Truncate}; + int[] mb = new int[]{1,1,3,3,3,1,3,3}; + + for( int t=0; t builder) { this.cudnnFwdAlgo = builder.cudnnFwdAlgo; this.cudnnBwdFilterAlgo = builder.cudnnBwdFilterAlgo; this.cudnnBwdDataAlgo = builder.cudnnBwdDataAlgo; + this.cudnnAllowFallback = builder.cudnnAllowFallback; initializeConstraints(builder); } @@ -316,6 +314,7 @@ protected static abstract class BaseConvBuilder> ex protected FwdAlgo cudnnFwdAlgo; protected BwdFilterAlgo cudnnBwdFilterAlgo; protected BwdDataAlgo cudnnBwdDataAlgo; + protected boolean cudnnAllowFallback = true; protected BaseConvBuilder(int[] kernelSize, int[] stride, int[] padding) { @@ -393,5 +392,18 @@ public T cudnnBwdDataMode(BwdDataAlgo cudnnBwdDataAlgo) { this.cudnnBwdDataAlgo = cudnnBwdDataAlgo; return (T) this; } + + /** + * When using CuDNN and an error is encountered, should fallback to the non-CuDNN implementatation be allowed? + * If set to false, an exception in CuDNN will be propagated back to the user. If false, the build-in (non-CuDNN) + * implementation for ConvolutionLayer will be used + * + * @param allowFallback Whether fallback to non-CuDNN implementation should be used + * @return + */ + public T cudnnAllowFallback(boolean allowFallback){ + this.cudnnAllowFallback = allowFallback; + return (T) this; + } } } diff --git a/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/conf/layers/SubsamplingLayer.java b/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/conf/layers/SubsamplingLayer.java index 850e73c3d10c..8d4dd9dd71d1 100644 --- a/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/conf/layers/SubsamplingLayer.java +++ b/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/conf/layers/SubsamplingLayer.java @@ -40,6 +40,7 @@ public class SubsamplingLayer extends Layer { protected int[] dilation = new int[]{1,1}; protected int pnorm; protected double eps; + protected boolean cudnnAllowFallback = true; public enum PoolingType { MAX, AVG, SUM, PNORM; @@ -75,6 +76,7 @@ protected SubsamplingLayer(BaseSubsamplingBuilder builder) { } this.pnorm = builder.pnorm; this.eps = builder.eps; + this.cudnnAllowFallback = builder.cudnnAllowFallback; } @Override @@ -313,6 +315,7 @@ protected static abstract class BaseSubsamplingBuilder backpropGradient(INDArray epsilon) { Pair p = preOutput4d(true, true); delta = afn.backprop(p.getFirst(), epsilon).getFirst(); //TODO handle activation function params - if (helper != null) { + if (helper != null && (helperCountFail == 0 || !layerConf().isCudnnAllowFallback())) { if(!hasBias()){ if(dummyBiasGrad == null){ @@ -180,9 +181,19 @@ public Pair backpropGradient(INDArray epsilon) { biasGradView = dummyBiasGrad; } - Pair ret = helper.backpropGradient(input, weights, delta, kernel, strides, pad, - biasGradView, weightGradView, afn, layerConf().getCudnnAlgoMode(), - layerConf().getCudnnBwdFilterAlgo(), layerConf().getCudnnBwdDataAlgo(), convolutionMode, dilation); + Pair ret = null; + try { + ret = helper.backpropGradient(input, weights, delta, kernel, strides, pad, + biasGradView, weightGradView, afn, layerConf().getCudnnAlgoMode(), + layerConf().getCudnnBwdFilterAlgo(), layerConf().getCudnnBwdDataAlgo(), convolutionMode, dilation); + } catch (Exception e){ + if(layerConf().isCudnnAllowFallback()){ + helperCountFail++; + log.warn("CuDNN execution failed - falling back on built-in implementation",e); + } else { + throw new RuntimeException(e); + } + } if (ret != null) { return ret; } @@ -331,7 +342,7 @@ protected Pair preOutput(boolean training, boolean forBackpr int outW = outSize[1]; - if (helper != null) { + if (helper != null && (helperCountFail == 0 || !layerConf().isCudnnAllowFallback())) { if (preOutput != null && forBackprop) { return new Pair<>(preOutput, null); } @@ -346,8 +357,18 @@ protected Pair preOutput(boolean training, boolean forBackpr bias = dummyBias; } - INDArray ret = helper.preOutput(input, weights, bias, kernel, strides, pad, layerConf().getCudnnAlgoMode(), - layerConf().getCudnnFwdAlgo(), convolutionMode, dilation); + INDArray ret = null; + try { + ret = helper.preOutput(input, weights, bias, kernel, strides, pad, layerConf().getCudnnAlgoMode(), + layerConf().getCudnnFwdAlgo(), convolutionMode, dilation); + } catch (Exception e){ + if(layerConf().isCudnnAllowFallback()){ + helperCountFail++; + log.warn("CuDNN execution failed - falling back on built-in implementation",e); + } else { + throw new RuntimeException(e); + } + } if (ret != null) { return new Pair<>(ret, null); } diff --git a/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/layers/convolution/subsampling/SubsamplingLayer.java b/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/layers/convolution/subsampling/SubsamplingLayer.java index 0a700d9cd14c..b4d938b86a95 100644 --- a/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/layers/convolution/subsampling/SubsamplingLayer.java +++ b/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/layers/convolution/subsampling/SubsamplingLayer.java @@ -56,6 +56,7 @@ public class SubsamplingLayer extends AbstractLayer { protected SubsamplingHelper helper = null; + protected int helperCountFail = 0; protected ConvolutionMode convolutionMode; public SubsamplingLayer(NeuralNetConfiguration conf) { @@ -131,9 +132,19 @@ public Pair backpropGradient(INDArray epsilon) { int outW = outSize[1]; - if (helper != null) { - Pair ret = helper.backpropGradient(input, epsilon, kernel, strides, pad, - layerConf().getPoolingType(), convolutionMode, dilation); + if (helper != null && (helperCountFail == 0 || !layerConf().isCudnnAllowFallback())) { + Pair ret = null; + try{ + ret = helper.backpropGradient(input, epsilon, kernel, strides, pad, + layerConf().getPoolingType(), convolutionMode, dilation); + } catch (Exception e){ + if(layerConf().isCudnnAllowFallback()){ + helperCountFail++; + log.warn("CuDNN execution failed - falling back on built-in implementation",e); + } else { + throw new RuntimeException(e); + } + } if (ret != null) { return ret; } @@ -304,9 +315,19 @@ public INDArray activate(boolean training) { int outH = outSize[0]; int outW = outSize[1]; - if (helper != null) { - INDArray ret = helper.activate(input, training, kernel, strides, pad, layerConf().getPoolingType(), - convolutionMode, dilation); + if (helper != null && (helperCountFail == 0 || !layerConf().isCudnnAllowFallback())) { + INDArray ret = null; + try { + ret = helper.activate(input, training, kernel, strides, pad, layerConf().getPoolingType(), + convolutionMode, dilation); + } catch (Exception e){ + if(layerConf().isCudnnAllowFallback()){ + helperCountFail++; + log.warn("CuDNN execution failed - falling back on built-in implementation",e); + } else { + throw new RuntimeException(e); + } + } if (ret != null) { return ret; } From 09f847ea54374f1cbd12514e3debdfffe9e1b169 Mon Sep 17 00:00:00 2001 From: Alex Black Date: Wed, 28 Mar 2018 21:26:03 +1100 Subject: [PATCH 5/8] Disable fallback for cudnn gradient checks --- .../gradientcheck/CNNGradientCheckTest.java | 38 +++++++++++++++++-- 1 file changed, 35 insertions(+), 3 deletions(-) diff --git a/deeplearning4j-cuda/src/test/java/org/deeplearning4j/gradientcheck/CNNGradientCheckTest.java b/deeplearning4j-cuda/src/test/java/org/deeplearning4j/gradientcheck/CNNGradientCheckTest.java index d086bf3ae010..8e63d826418d 100644 --- a/deeplearning4j-cuda/src/test/java/org/deeplearning4j/gradientcheck/CNNGradientCheckTest.java +++ b/deeplearning4j-cuda/src/test/java/org/deeplearning4j/gradientcheck/CNNGradientCheckTest.java @@ -70,7 +70,9 @@ public void testGradientCNNMLN() { MultiLayerConfiguration.Builder builder = new NeuralNetConfiguration.Builder() .optimizationAlgo(OptimizationAlgorithm.CONJUGATE_GRADIENT).updater(new NoOp()) .weightInit(WeightInit.XAVIER).seed(12345L).list() - .layer(0, new ConvolutionLayer.Builder(1, 1).nOut(6).activation(afn).build()) + .layer(0, new ConvolutionLayer.Builder(1, 1).nOut(6).activation(afn) + .cudnnAllowFallback(false) + .build()) .layer(1, new OutputLayer.Builder(lf).activation(outputActivation).nOut(3).build()) .setInputType(InputType.convolutionalFlat(1, 4, 1)).pretrain(false).backprop(true); @@ -156,6 +158,7 @@ public void testGradientCNNL1L2MLN() { OptimizationAlgorithm.CONJUGATE_GRADIENT) .seed(12345L).list() .layer(0, new ConvolutionLayer.Builder(new int[]{1, 1}).nIn(1).nOut(6) + .cudnnAllowFallback(false) .weightInit(WeightInit.XAVIER).activation(afn) .updater(new NoOp()).build()) .layer(1, new OutputLayer.Builder(lf).activation(outputActivation).nOut(3) @@ -240,6 +243,7 @@ public void testCnnWithSpaceToDepth() { .updater(new NoOp()).weightInit(WeightInit.DISTRIBUTION) .dist(new NormalDistribution(0, 1)) .list().layer(new ConvolutionLayer.Builder(kernel).nIn(inputDepth).hasBias(false) + .cudnnAllowFallback(false) .nOut(1).build()) //output: (5-2+0)/1+1 = 4 .layer(new SpaceToDepthLayer.Builder(blocks, SpaceToDepthLayer.DataFormat.NCHW) .build()) // (mb,1,4,4) -> (mb,4,2,2) @@ -302,6 +306,7 @@ public void testCnnWithSpaceToBatch() { .updater(new NoOp()).weightInit(WeightInit.DISTRIBUTION) .dist(new NormalDistribution(0, 1)) .list().layer(new ConvolutionLayer.Builder(kernel).nIn(inputDepth) + .cudnnAllowFallback(false) .nOut(3).build())//output: (5-2+0)/1+1 = 4 .layer(new SpaceToBatchLayer.Builder(blocks).build()) //trivial space to batch .layer(new OutputLayer.Builder(LossFunctions.LossFunction.MCXENT) @@ -368,6 +373,7 @@ public void testCnnWithUpsampling() { .dist(new NormalDistribution(0, 1)) .list().layer(new ConvolutionLayer.Builder(kernel, stride, padding).nIn(inputDepth) + .cudnnAllowFallback(false) .nOut(3).build())//output: (5-2+0)/1+1 = 4 .layer(new Upsampling2D.Builder().size(size).build()) //output: 4*2 =8 -> 8x8x3 .layer(new OutputLayer.Builder(LossFunctions.LossFunction.MCXENT) @@ -437,8 +443,10 @@ public void testCnnWithSubsampling() { .list().layer(0, new ConvolutionLayer.Builder(kernel, stride, padding).nIn(inputDepth) + .cudnnAllowFallback(false) .nOut(3).build())//output: (5-2+0)/1+1 = 4 .layer(1, new SubsamplingLayer.Builder(poolingType) + .cudnnAllowFallback(false) .kernelSize(kernel).stride(stride).padding(padding) .pnorm(pnorm).build()) //output: (4-2+0)/1+1 =3 -> 3x3x3 .layer(2, new OutputLayer.Builder(LossFunctions.LossFunction.MCXENT) @@ -506,11 +514,14 @@ public void testCnnWithSubsamplingV2() { .list().layer(0, new ConvolutionLayer.Builder(kernel, stride, padding).nIn(inputDepth) + .cudnnAllowFallback(false) .nOut(3).build())//output: (5-2+0)/1+1 = 4 .layer(1, new SubsamplingLayer.Builder(poolingType) .kernelSize(kernel).stride(stride).padding(padding) + .cudnnAllowFallback(false) .pnorm(pNorm).build()) //output: (4-2+0)/1+1 =3 -> 3x3x3 .layer(2, new ConvolutionLayer.Builder(kernel, stride, padding) + .cudnnAllowFallback(false) .nIn(3).nOut(2).build()) //Output: (3-2+0)/1+1 = 2 .layer(3, new OutputLayer.Builder(LossFunctions.LossFunction.MCXENT) .activation(Activation.SOFTMAX).nIn(2 * 2 * 2) @@ -566,10 +577,13 @@ public void testCnnMultiLayer() { .activation(afn) .list() .layer(0, new ConvolutionLayer.Builder().kernelSize(2, 2).stride(1, 1) + .cudnnAllowFallback(false) .padding(0, 0).nIn(inputDepth).nOut(2).build())//output: (5-2+0)/1+1 = 4 .layer(1, new ConvolutionLayer.Builder().nIn(2).nOut(2).kernelSize(2, 2) + .cudnnAllowFallback(false) .stride(1, 1).padding(0, 0).build()) //(4-2+0)/1+1 = 3 .layer(2, new ConvolutionLayer.Builder().nIn(2).nOut(2).kernelSize(2, 2) + .cudnnAllowFallback(false) .stride(1, 1).padding(0, 0).build()) //(3-2+0)/1+1 = 2 .layer(3, new OutputLayer.Builder(LossFunctions.LossFunction.MCXENT) .activation(Activation.SOFTMAX).nIn(2 * 2 * 2).nOut(nOut) @@ -630,11 +644,14 @@ public void testCnnSamePaddingMode() { .updater(new NoOp()) .activation(Activation.TANH).convolutionMode(Same).list() .layer(0, new ConvolutionLayer.Builder().name("layer 0").kernelSize(k, k) + .cudnnAllowFallback(false) .stride(1, 1).padding(0, 0).nIn(inputDepth).nOut(2).build()) .layer(1, new SubsamplingLayer.Builder() .poolingType(SubsamplingLayer.PoolingType.MAX).kernelSize(k, k) + .cudnnAllowFallback(false) .stride(1, 1).padding(0, 0).build()) .layer(2, new ConvolutionLayer.Builder().nIn(2).nOut(2).kernelSize(k, k) + .cudnnAllowFallback(false) .stride(1, 1).padding(0, 0).build()) .layer(3, new OutputLayer.Builder(LossFunctions.LossFunction.MCXENT) .activation(Activation.SOFTMAX).nOut(nOut).build()) @@ -689,10 +706,12 @@ public void testCnnSamePaddingModeStrided() { } Layer convLayer = new ConvolutionLayer.Builder().name("layer 0").kernelSize(k, k) + .cudnnAllowFallback(false) .stride(stride, stride).padding(0, 0).nIn(inputDepth).nOut(2).build(); Layer poolLayer = new SubsamplingLayer.Builder() .poolingType(SubsamplingLayer.PoolingType.MAX).kernelSize(k, k) + .cudnnAllowFallback(false) .stride(stride, stride).padding(0, 0).build(); MultiLayerConfiguration conf = new NeuralNetConfiguration.Builder().seed(12345) @@ -761,10 +780,11 @@ public void testCnnZeroPaddingLayer() { new NeuralNetConfiguration.Builder().updater(new NoOp()).weightInit(WeightInit.DISTRIBUTION) .dist(new NormalDistribution(0, 1)).list() .layer(0, new ConvolutionLayer.Builder(kernel, stride, padding) + .cudnnAllowFallback(false) .nIn(inputDepth).nOut(3).build())//output: (6-2+0)/1+1 = 5 .layer(1, new ZeroPaddingLayer.Builder(zeroPad).build()).layer(2, new ConvolutionLayer.Builder(kernel, stride, - padding).nIn(3).nOut(3).build())//output: (6-2+0)/1+1 = 5 + padding).nIn(3).nOut(3).cudnnAllowFallback(false).build())//output: (6-2+0)/1+1 = 5 .layer(3, new OutputLayer.Builder(LossFunctions.LossFunction.MCXENT) .activation(Activation.SOFTMAX).nOut(4).build()) .setInputType(InputType.convolutional(height, width, inputDepth)) @@ -874,6 +894,7 @@ public void testDeconvolution2D() { .activation(act) .list() .layer(new Deconvolution2D.Builder().name("deconvolution_2D_layer") + .cudnnAllowFallback(false) .kernelSize(k, k) .stride(s, s) .dilation(d, d) @@ -949,6 +970,7 @@ public void testSeparableConv2D() { .convolutionMode(cm) .list() .layer(new SeparableConvolution2D.Builder().name("Separable conv 2D layer") + .cudnnAllowFallback(false) .kernelSize(k, k) .stride(s, s) .dilation(d, d) @@ -1022,12 +1044,14 @@ public void testCnnDilated() { .kernelSize(k, k) .stride(s, s) .dilation(d, d) + .cudnnAllowFallback(false) .nIn(inputDepth).nOut(2).build()); if (subsampling) { b.layer(new SubsamplingLayer.Builder() .poolingType(SubsamplingLayer.PoolingType.MAX) .kernelSize(k, k) .stride(s, s) + .cudnnAllowFallback(false) .dilation(d, d) .build()); } else { @@ -1035,6 +1059,7 @@ public void testCnnDilated() { .kernelSize(k, k) .stride(s, s) .dilation(d, d) + .cudnnAllowFallback(false) .build()); } @@ -1094,9 +1119,10 @@ public void testCropping2DLayer() { .convolutionMode(ConvolutionMode.Same) .weightInit(new NormalDistribution(0, 1)).list() .layer(new ConvolutionLayer.Builder(kernel, stride, padding) + .cudnnAllowFallback(false) .nIn(inputDepth).nOut(3).build())//output: (6-2+0)/1+1 = 5 .layer(new Cropping2D(crop)) - .layer(new ConvolutionLayer.Builder(kernel, stride,padding).nIn(3).nOut(3).build()) + .layer(new ConvolutionLayer.Builder(kernel, stride,padding).nIn(3).nOut(3).cudnnAllowFallback(false).build()) .layer(3, new OutputLayer.Builder(LossFunctions.LossFunction.MCXENT) .activation(Activation.SOFTMAX).nOut(4).build()) .setInputType(InputType.convolutional(height, width, inputDepth)) @@ -1157,11 +1183,14 @@ public void DEBUG_testCnnSamePaddingMode() { .updater(new NoOp()) .activation(Activation.TANH).convolutionMode(Same).list() .layer(0, new ConvolutionLayer.Builder().name("layer 0").kernelSize(k, k) + .cudnnAllowFallback(false) .stride(1, 1).padding(0, 0).nIn(inputDepth).nOut(2).build()) .layer(1, new SubsamplingLayer.Builder() + .cudnnAllowFallback(false) .poolingType(SubsamplingLayer.PoolingType.MAX).kernelSize(k, k) .stride(1, 1).padding(0, 0).build()) .layer(2, new ConvolutionLayer.Builder().nIn(2).nOut(2).kernelSize(k, k) + .cudnnAllowFallback(false) .stride(1, 1).padding(0, 0).build()) .layer(3, new OutputLayer.Builder(LossFunctions.LossFunction.MCXENT) .activation(Activation.SOFTMAX).nOut(nOut).build()) @@ -1210,11 +1239,14 @@ public void DEBUG_testCnnSamePaddingMode2() { .updater(new NoOp()) .activation(Activation.TANH).convolutionMode(Same).list() .layer(0, new ConvolutionLayer.Builder().name("layer 0").kernelSize(k, k) + .cudnnAllowFallback(false) .stride(1, 1).padding(0, 0).nIn(inputDepth).nOut(2).build()) .layer(1, new SubsamplingLayer.Builder() + .cudnnAllowFallback(false) .poolingType(SubsamplingLayer.PoolingType.MAX).kernelSize(k, k) .stride(1, 1).padding(0, 0).build()) .layer(2, new ConvolutionLayer.Builder().nIn(2).nOut(2).kernelSize(k, k) + .cudnnAllowFallback(false) .stride(1, 1).padding(0, 0).build()) .layer(3, new OutputLayer.Builder(LossFunctions.LossFunction.MCXENT) .activation(Activation.SOFTMAX).nOut(nOut).build()) From 04618b28d5f7a0636a2e312d43cbf928cc46c94b Mon Sep 17 00:00:00 2001 From: Alex Black Date: Thu, 29 Mar 2018 11:24:27 +1100 Subject: [PATCH 6/8] CuDNN: Implement manual padding for SAME mode in convolution helper --- .../convolution/CudnnConvolutionHelper.java | 148 +++++++++++++++--- 1 file changed, 127 insertions(+), 21 deletions(-) diff --git a/deeplearning4j-cuda/src/main/java/org/deeplearning4j/nn/layers/convolution/CudnnConvolutionHelper.java b/deeplearning4j-cuda/src/main/java/org/deeplearning4j/nn/layers/convolution/CudnnConvolutionHelper.java index 2f423b690115..d0d11214e35f 100644 --- a/deeplearning4j-cuda/src/main/java/org/deeplearning4j/nn/layers/convolution/CudnnConvolutionHelper.java +++ b/deeplearning4j-cuda/src/main/java/org/deeplearning4j/nn/layers/convolution/CudnnConvolutionHelper.java @@ -17,6 +17,8 @@ */ package org.deeplearning4j.nn.layers.convolution; +import lombok.AllArgsConstructor; +import lombok.Data; import lombok.extern.slf4j.Slf4j; import org.bytedeco.javacpp.Pointer; import org.deeplearning4j.nn.conf.ConvolutionMode; @@ -39,6 +41,7 @@ import org.nd4j.linalg.api.ops.executioner.GridExecutioner; import org.nd4j.linalg.api.shape.Shape; import org.nd4j.linalg.factory.Nd4j; +import org.nd4j.linalg.indexing.INDArrayIndex; import org.nd4j.linalg.jcublas.context.CudaContext; import org.nd4j.linalg.primitives.Pair; import org.nd4j.util.OneTimeLogger; @@ -46,8 +49,9 @@ import java.util.Arrays; import static org.bytedeco.javacpp.cuda.CUstream_st; -import static org.bytedeco.javacpp.cuda.cudaGetErrorString; import static org.bytedeco.javacpp.cudnn.*; +import static org.nd4j.linalg.indexing.NDArrayIndex.all; +import static org.nd4j.linalg.indexing.NDArrayIndex.interval; /** * cuDNN-based helper for the convolution layer. @@ -133,22 +137,28 @@ public Pair backpropGradient(INDArray input, INDArray weight int code; int miniBatch = input.size(0); - int inH = input.size(2); - int inW = input.size(3); +// int inH = input.size(2); +// int inW = input.size(3); int outDepth = weights.size(0); int inDepth = weights.size(1); int kH = weights.size(2); int kW = weights.size(3); - int[] outSize; - if (convolutionMode == ConvolutionMode.Same) { - outSize = ConvolutionUtils.getOutputSize(input, kernel, strides, null, convolutionMode, dilation); //Also performs validation - pad = ConvolutionUtils.getSameModeBottomRightPadding(outSize, new int[] {inH, inW}, kernel, strides, dilation); - } else { - outSize = ConvolutionUtils.getOutputSize(input, kernel, strides, pad, convolutionMode, dilation); //Also performs validation - } +// int[] outSize; +// if (convolutionMode == ConvolutionMode.Same) { +// outSize = ConvolutionUtils.getOutputSize(input, kernel, strides, null, convolutionMode, dilation); //Also performs validation +// pad = ConvolutionUtils.getSameModeBottomRightPadding(outSize, new int[] {inH, inW}, kernel, strides, dilation); +// } else { +// outSize = ConvolutionUtils.getOutputSize(input, kernel, strides, pad, convolutionMode, dilation); //Also performs validation +// } + CudnnForwardArgs args = getCudnnForwardArgs(input, kernel, strides, pad, dilation, convolutionMode); + input = args.getInput(); + int inH = input.size(2); + int inW = input.size(3); + int[] srcStride = input.stride(); + int[] outSize = args.getOutSize(); int outH = outSize[0]; int outW = outSize[1]; @@ -157,7 +167,6 @@ public Pair backpropGradient(INDArray input, INDArray weight delta = delta.dup(); } - int[] srcStride = input.stride(); int[] deltaStride = delta.stride(); int[] algo1 = new int[1]; int[] algo2 = new int[1]; @@ -320,6 +329,14 @@ public Pair backpropGradient(INDArray input, INDArray weight if (CudaEnvironment.getInstance().getConfiguration().isDebug()) context.syncOldStream(); + //Note that: if we had to manually pad for SAME mode, we have to 'undo' this manual padding for the epsilon + // we return... + if(args.isManualPadBottom() || args.isManualPadRight()) { + epsNext = epsNext.get(all(), all(), + interval(0, epsNext.size(2) - (args.isManualPadBottom() ? 1 : 0)), + interval(0, epsNext.size(3) - (args.isManualPadRight() ? 1 : 0))); + } + return new Pair<>(retGradient, epsNext); } @@ -334,27 +351,59 @@ public INDArray preOutput(INDArray input, INDArray weights, INDArray bias, int[] int code; int miniBatch = input.size(0); - int inH = input.size(2); - int inW = input.size(3); + int outDepth = weights.size(0); int inDepth = weights.size(1); int kH = weights.size(2); int kW = weights.size(3); + CudnnForwardArgs args = getCudnnForwardArgs(input, kernel, strides, pad, dilation, convolutionMode); + input = args.getInput(); + int inH = input.size(2); + int inW = input.size(3); int[] srcStride = input.stride(); + int[] outSize = args.getOutSize(); + +// int[] outSize; +// if (convolutionMode == ConvolutionMode.Same) { +// outSize = ConvolutionUtils.getOutputSize(input, kernel, strides, null, convolutionMode, dilation); //Also performs validation +//// pad = ConvolutionUtils.getSameModeBottomRightPadding(outSize, new int[] {inH, inW}, kernel, strides, dilation); +// pad = ConvolutionUtils.getSameModeTopLeftPadding(outSize, new int[] {inH, inW}, kernel, strides, dilation); +// int[] padBottomRight = ConvolutionUtils.getSameModeBottomRightPadding(outSize, new int[] {inH, inW}, kernel, strides, dilation); +// if(!Arrays.equals(pad, padBottomRight)){ +// System.out.println("*** MANUAL PADDING APPLIED ***"); +// /* +// CuDNN - even as of 7.1 (CUDA 9.1) still doesn't have support for proper SAME mode padding (i.e., asymmetric +// padding) - padding can *only* be specified as the same amount for both the top/bottom, and for left/right. +// In SAME mode padding, sometimes these are the same - but often they are not. +// Note that when they differ, the bottom or right padding will be exactly 1 more than the top or left padding. +// As per TF, we'll manually pad here: https://github.com/tensorflow/tensorflow/blob/master/tensorflow/core/kernels/conv_ops.cc#L571-L607 +// */ +// boolean manualPadBottom = (pad[0] != padBottomRight[0]); +// boolean manualPadRight = (pad[1] != padBottomRight[1]); +// +// //NCHW format +// int[] newShape = new int[]{input.size(0), input.size(1), +// input.size(2) + (manualPadBottom ? 1 : 0), +// input.size(3) + (manualPadRight ? 1 : 0)}; +// INDArray newInput = Nd4j.create(newShape); +// newInput.put(new INDArrayIndex[]{all(), all(), interval(0,input.size(2)), +// interval(0, input.size(3))}, input); +// input = newInput; +// inH = input.size(2); +// inW = input.size(3); +// //Now: we've manually applied the "extra" bottom/right padding only - if required. Consequently, we +// // now have the same amount of padding required for top/bottom, and left/right - which we'll let +// // CuDNN handle +// } +// } else { +// outSize = ConvolutionUtils.getOutputSize(input, kernel, strides, pad, convolutionMode, dilation); //Also performs validation +// } if (Nd4j.getExecutioner() instanceof GridExecutioner) ((GridExecutioner) Nd4j.getExecutioner()).flushQueue(); - int[] outSize; - if (convolutionMode == ConvolutionMode.Same) { - outSize = ConvolutionUtils.getOutputSize(input, kernel, strides, null, convolutionMode, dilation); //Also performs validation - pad = ConvolutionUtils.getSameModeBottomRightPadding(outSize, new int[] {inH, inW}, kernel, strides, dilation); - } else { - outSize = ConvolutionUtils.getOutputSize(input, kernel, strides, pad, convolutionMode, dilation); //Also performs validation - } - INDArray z; @@ -562,4 +611,61 @@ public INDArray activate(INDArray z, IActivation afn) { return activation; } + protected CudnnForwardArgs getCudnnForwardArgs(INDArray input, int[] kernel, int[] strides, int[] padding, int[] dilation, + ConvolutionMode convolutionMode){ + INDArray origInput = input; + + int inH = input.size(2); + int inW = input.size(3); + + boolean manualPadBottom = false; + boolean manualPadRight = false; + + int[] outSize; + if (convolutionMode == ConvolutionMode.Same) { + outSize = ConvolutionUtils.getOutputSize(input, kernel, strides, null, convolutionMode, dilation); //Also performs validation + padding = ConvolutionUtils.getSameModeTopLeftPadding(outSize, new int[] {inH, inW}, kernel, strides, dilation); + int[] padBottomRight = ConvolutionUtils.getSameModeBottomRightPadding(outSize, new int[] {inH, inW}, kernel, strides, dilation); + if(!Arrays.equals(padding, padBottomRight)){ + /* + CuDNN - even as of 7.1 (CUDA 9.1) still doesn't have support for proper SAME mode padding (i.e., asymmetric + padding) - padding can *only* be specified as the same amount for both the top/bottom, and for left/right. + In SAME mode padding, sometimes these are the same - but often they are not. + Note that when they differ, the bottom or right padding will be exactly 1 more than the top or left padding. + As per TF, we'll manually pad here: https://github.com/tensorflow/tensorflow/blob/master/tensorflow/core/kernels/conv_ops.cc#L571-L607 + */ + manualPadBottom = (padding[0] != padBottomRight[0]); + manualPadRight = (padding[1] != padBottomRight[1]); + + //NCHW format + int[] newShape = new int[]{input.size(0), input.size(1), + input.size(2) + (manualPadBottom ? 1 : 0), + input.size(3) + (manualPadRight ? 1 : 0)}; + INDArray newInput = Nd4j.create(newShape); + newInput.put(new INDArrayIndex[]{all(), all(), interval(0,input.size(2)), + interval(0, input.size(3))}, input); + input = newInput; + //Now: we've manually applied the "extra" bottom/right padding only - if required. Consequently, we + // now have the same amount of padding required for top/bottom, and left/right - which we'll let + // CuDNN handle + } + } else { + outSize = ConvolutionUtils.getOutputSize(input, kernel, strides, padding, convolutionMode, dilation); //Also performs validation + } + + return new CudnnForwardArgs(manualPadBottom, manualPadRight, input, origInput, padding, outSize); + } + + + @AllArgsConstructor + @Data + private static class CudnnForwardArgs { + private boolean manualPadBottom; + private boolean manualPadRight; + private INDArray input; + private INDArray origInput; + private int[] padding; + private int[] outSize; + } + } From 58ea8bc37964f6181c5bd208cb29ddd6d6061f99 Mon Sep 17 00:00:00 2001 From: Alex Black Date: Thu, 29 Mar 2018 11:58:15 +1100 Subject: [PATCH 7/8] View check for CudnnSubsamplingHelper --- .../subsampling/CudnnSubsamplingHelper.java | 5 +- .../gradientcheck/CNNGradientCheckTest.java | 112 ------------------ 2 files changed, 2 insertions(+), 115 deletions(-) diff --git a/deeplearning4j-cuda/src/main/java/org/deeplearning4j/nn/layers/convolution/subsampling/CudnnSubsamplingHelper.java b/deeplearning4j-cuda/src/main/java/org/deeplearning4j/nn/layers/convolution/subsampling/CudnnSubsamplingHelper.java index 4c658c52730a..bcbf27fe31e6 100644 --- a/deeplearning4j-cuda/src/main/java/org/deeplearning4j/nn/layers/convolution/subsampling/CudnnSubsamplingHelper.java +++ b/deeplearning4j-cuda/src/main/java/org/deeplearning4j/nn/layers/convolution/subsampling/CudnnSubsamplingHelper.java @@ -132,7 +132,6 @@ public Pair backpropGradient(INDArray input, INDArray epsilo //Epsilons in shape: [miniBatch, depth, outH, outW] //Epsilons out shape: [miniBatch, depth, inH, inW] - int poolingMode; switch (poolingType) { case AVG: @@ -145,9 +144,9 @@ public Pair backpropGradient(INDArray input, INDArray epsilo return null; } - if (!Shape.strideDescendingCAscendingF(epsilon)) { + if (!Shape.strideDescendingCAscendingF(epsilon) || epsilon.isView()) { // apparently not supported by cuDNN - epsilon = epsilon.dup(); + epsilon = epsilon.dup('c'); } int[] srcStride = input.stride(); diff --git a/deeplearning4j-cuda/src/test/java/org/deeplearning4j/gradientcheck/CNNGradientCheckTest.java b/deeplearning4j-cuda/src/test/java/org/deeplearning4j/gradientcheck/CNNGradientCheckTest.java index 8e63d826418d..6bdd50956b08 100644 --- a/deeplearning4j-cuda/src/test/java/org/deeplearning4j/gradientcheck/CNNGradientCheckTest.java +++ b/deeplearning4j-cuda/src/test/java/org/deeplearning4j/gradientcheck/CNNGradientCheckTest.java @@ -1158,116 +1158,4 @@ public void testCropping2DLayer() { } } } - - - - @Test - public void DEBUG_testCnnSamePaddingMode() { - int nOut = 2; - - int minibatchSize = 1; - int height = 4; - int width = 5; - int k = 2; - int inputDepth = 1; - - Nd4j.getRandom().setSeed(12345); - - INDArray input = Nd4j.rand(minibatchSize, width * height * inputDepth); - INDArray labels = Nd4j.zeros(minibatchSize, nOut); - for (int i = 0; i < minibatchSize; i++) { - labels.putScalar(new int[]{i, i % nOut}, 1.0); - } - - MultiLayerConfiguration conf = new NeuralNetConfiguration.Builder().seed(12345) - .updater(new NoOp()) - .activation(Activation.TANH).convolutionMode(Same).list() - .layer(0, new ConvolutionLayer.Builder().name("layer 0").kernelSize(k, k) - .cudnnAllowFallback(false) - .stride(1, 1).padding(0, 0).nIn(inputDepth).nOut(2).build()) - .layer(1, new SubsamplingLayer.Builder() - .cudnnAllowFallback(false) - .poolingType(SubsamplingLayer.PoolingType.MAX).kernelSize(k, k) - .stride(1, 1).padding(0, 0).build()) - .layer(2, new ConvolutionLayer.Builder().nIn(2).nOut(2).kernelSize(k, k) - .cudnnAllowFallback(false) - .stride(1, 1).padding(0, 0).build()) - .layer(3, new OutputLayer.Builder(LossFunctions.LossFunction.MCXENT) - .activation(Activation.SOFTMAX).nOut(nOut).build()) - .setInputType(InputType.convolutionalFlat(height, width, inputDepth)).build(); - - MultiLayerNetwork net = new MultiLayerNetwork(conf); - net.init(); - - for (int i = 0; i < net.getLayers().length; i++) { - System.out.println("nParams, layer " + i + ": " + net.getLayer(i).numParams()); - } - - String msg = "Minibatch=" + minibatchSize + ", inDepth=" + inputDepth + ", height=" + height - + ", kernelSize=" + k; - System.out.println(msg); - - boolean gradOK = GradientCheckUtil.checkGradients(net, DEFAULT_EPS, DEFAULT_MAX_REL_ERROR, - DEFAULT_MIN_ABS_ERROR, PRINT_RESULTS, RETURN_ON_FIRST_FAILURE, input, labels); - - assertTrue(msg, gradOK); - - TestUtils.testModelSerialization(net); - } - - - - @Test - public void DEBUG_testCnnSamePaddingMode2() { - int nOut = 2; - - int minibatchSize = 1; - int height = 4; - int width = 5; - int k = 2; - int inputDepth = 1; - - Nd4j.getRandom().setSeed(12345); - - INDArray input = Nd4j.rand(new int[]{minibatchSize, inputDepth, height, width}); - INDArray labels = Nd4j.zeros(minibatchSize, nOut); - for (int i = 0; i < minibatchSize; i++) { - labels.putScalar(new int[]{i, i % nOut}, 1.0); - } - - MultiLayerConfiguration conf = new NeuralNetConfiguration.Builder().seed(12345) - .updater(new NoOp()) - .activation(Activation.TANH).convolutionMode(Same).list() - .layer(0, new ConvolutionLayer.Builder().name("layer 0").kernelSize(k, k) - .cudnnAllowFallback(false) - .stride(1, 1).padding(0, 0).nIn(inputDepth).nOut(2).build()) - .layer(1, new SubsamplingLayer.Builder() - .cudnnAllowFallback(false) - .poolingType(SubsamplingLayer.PoolingType.MAX).kernelSize(k, k) - .stride(1, 1).padding(0, 0).build()) - .layer(2, new ConvolutionLayer.Builder().nIn(2).nOut(2).kernelSize(k, k) - .cudnnAllowFallback(false) - .stride(1, 1).padding(0, 0).build()) - .layer(3, new OutputLayer.Builder(LossFunctions.LossFunction.MCXENT) - .activation(Activation.SOFTMAX).nOut(nOut).build()) - .setInputType(InputType.convolutional(height, width, inputDepth)).build(); - - MultiLayerNetwork net = new MultiLayerNetwork(conf); - net.init(); - - for (int i = 0; i < net.getLayers().length; i++) { - System.out.println("nParams, layer " + i + ": " + net.getLayer(i).numParams()); - } - - String msg = "Minibatch=" + minibatchSize + ", inDepth=" + inputDepth + ", height=" + height - + ", kernelSize=" + k; - System.out.println(msg); - - boolean gradOK = GradientCheckUtil.checkGradients(net, DEFAULT_EPS, DEFAULT_MAX_REL_ERROR, - DEFAULT_MIN_ABS_ERROR, PRINT_RESULTS, RETURN_ON_FIRST_FAILURE, input, labels); - - assertTrue(msg, gradOK); - - TestUtils.testModelSerialization(net); - } } From 96650b66e511df027541a7aaf08f512f12ff64d7 Mon Sep 17 00:00:00 2001 From: Alex Black Date: Thu, 29 Mar 2018 12:07:43 +1100 Subject: [PATCH 8/8] Final cleanup --- .../convolution/CudnnConvolutionHelper.java | 56 ++----------------- .../nn/conf/layers/ConvolutionLayer.java | 3 +- .../nn/conf/layers/SubsamplingLayer.java | 3 +- 3 files changed, 7 insertions(+), 55 deletions(-) diff --git a/deeplearning4j-cuda/src/main/java/org/deeplearning4j/nn/layers/convolution/CudnnConvolutionHelper.java b/deeplearning4j-cuda/src/main/java/org/deeplearning4j/nn/layers/convolution/CudnnConvolutionHelper.java index d0d11214e35f..67c6b161ba98 100644 --- a/deeplearning4j-cuda/src/main/java/org/deeplearning4j/nn/layers/convolution/CudnnConvolutionHelper.java +++ b/deeplearning4j-cuda/src/main/java/org/deeplearning4j/nn/layers/convolution/CudnnConvolutionHelper.java @@ -137,22 +137,11 @@ public Pair backpropGradient(INDArray input, INDArray weight int code; int miniBatch = input.size(0); -// int inH = input.size(2); -// int inW = input.size(3); - int outDepth = weights.size(0); int inDepth = weights.size(1); int kH = weights.size(2); int kW = weights.size(3); -// int[] outSize; -// if (convolutionMode == ConvolutionMode.Same) { -// outSize = ConvolutionUtils.getOutputSize(input, kernel, strides, null, convolutionMode, dilation); //Also performs validation -// pad = ConvolutionUtils.getSameModeBottomRightPadding(outSize, new int[] {inH, inW}, kernel, strides, dilation); -// } else { -// outSize = ConvolutionUtils.getOutputSize(input, kernel, strides, pad, convolutionMode, dilation); //Also performs validation -// } - CudnnForwardArgs args = getCudnnForwardArgs(input, kernel, strides, pad, dilation, convolutionMode); input = args.getInput(); int inH = input.size(2); @@ -330,7 +319,7 @@ public Pair backpropGradient(INDArray input, INDArray weight context.syncOldStream(); //Note that: if we had to manually pad for SAME mode, we have to 'undo' this manual padding for the epsilon - // we return... + // we return. The returned epsilon (i.e., dL/dIn array) has to be the same shape as the *original* input. if(args.isManualPadBottom() || args.isManualPadRight()) { epsNext = epsNext.get(all(), all(), interval(0, epsNext.size(2) - (args.isManualPadBottom() ? 1 : 0)), @@ -351,8 +340,6 @@ public INDArray preOutput(INDArray input, INDArray weights, INDArray bias, int[] int code; int miniBatch = input.size(0); - - int outDepth = weights.size(0); int inDepth = weights.size(1); int kH = weights.size(2); @@ -365,46 +352,9 @@ public INDArray preOutput(INDArray input, INDArray weights, INDArray bias, int[] int[] srcStride = input.stride(); int[] outSize = args.getOutSize(); -// int[] outSize; -// if (convolutionMode == ConvolutionMode.Same) { -// outSize = ConvolutionUtils.getOutputSize(input, kernel, strides, null, convolutionMode, dilation); //Also performs validation -//// pad = ConvolutionUtils.getSameModeBottomRightPadding(outSize, new int[] {inH, inW}, kernel, strides, dilation); -// pad = ConvolutionUtils.getSameModeTopLeftPadding(outSize, new int[] {inH, inW}, kernel, strides, dilation); -// int[] padBottomRight = ConvolutionUtils.getSameModeBottomRightPadding(outSize, new int[] {inH, inW}, kernel, strides, dilation); -// if(!Arrays.equals(pad, padBottomRight)){ -// System.out.println("*** MANUAL PADDING APPLIED ***"); -// /* -// CuDNN - even as of 7.1 (CUDA 9.1) still doesn't have support for proper SAME mode padding (i.e., asymmetric -// padding) - padding can *only* be specified as the same amount for both the top/bottom, and for left/right. -// In SAME mode padding, sometimes these are the same - but often they are not. -// Note that when they differ, the bottom or right padding will be exactly 1 more than the top or left padding. -// As per TF, we'll manually pad here: https://github.com/tensorflow/tensorflow/blob/master/tensorflow/core/kernels/conv_ops.cc#L571-L607 -// */ -// boolean manualPadBottom = (pad[0] != padBottomRight[0]); -// boolean manualPadRight = (pad[1] != padBottomRight[1]); -// -// //NCHW format -// int[] newShape = new int[]{input.size(0), input.size(1), -// input.size(2) + (manualPadBottom ? 1 : 0), -// input.size(3) + (manualPadRight ? 1 : 0)}; -// INDArray newInput = Nd4j.create(newShape); -// newInput.put(new INDArrayIndex[]{all(), all(), interval(0,input.size(2)), -// interval(0, input.size(3))}, input); -// input = newInput; -// inH = input.size(2); -// inW = input.size(3); -// //Now: we've manually applied the "extra" bottom/right padding only - if required. Consequently, we -// // now have the same amount of padding required for top/bottom, and left/right - which we'll let -// // CuDNN handle -// } -// } else { -// outSize = ConvolutionUtils.getOutputSize(input, kernel, strides, pad, convolutionMode, dilation); //Also performs validation -// } - if (Nd4j.getExecutioner() instanceof GridExecutioner) ((GridExecutioner) Nd4j.getExecutioner()).flushQueue(); - INDArray z; if (Nd4j.getWorkspaceManager().checkIfWorkspaceExistsAndActive(ComputationGraph.WORKSPACE_EXTERNAL)) { @@ -478,6 +428,10 @@ public INDArray preOutput(INDArray input, INDArray weights, INDArray bias, int[] 0, algo); if(code != CUDNN_STATUS_SUCCESS){ + //If CuDNN can't infer algorithm - try IMPLICIT_GEMM + //Why this specifically? According to the docs, it seems to have the least number of restrictions + // to things like dilation + OneTimeLogger.warn(log, "Error getting CuDNN forward algorithm - falling back on IMPLICIT_GEMM"); mode = AlgoMode.USER_SPECIFIED; fwdAlgo = FwdAlgo.IMPLICIT_GEMM; diff --git a/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/conf/layers/ConvolutionLayer.java b/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/conf/layers/ConvolutionLayer.java index 56c6eeadf13b..0d3f61ce62d4 100644 --- a/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/conf/layers/ConvolutionLayer.java +++ b/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/conf/layers/ConvolutionLayer.java @@ -395,11 +395,10 @@ public T cudnnBwdDataMode(BwdDataAlgo cudnnBwdDataAlgo) { /** * When using CuDNN and an error is encountered, should fallback to the non-CuDNN implementatation be allowed? - * If set to false, an exception in CuDNN will be propagated back to the user. If false, the build-in (non-CuDNN) + * If set to false, an exception in CuDNN will be propagated back to the user. If false, the built-in (non-CuDNN) * implementation for ConvolutionLayer will be used * * @param allowFallback Whether fallback to non-CuDNN implementation should be used - * @return */ public T cudnnAllowFallback(boolean allowFallback){ this.cudnnAllowFallback = allowFallback; diff --git a/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/conf/layers/SubsamplingLayer.java b/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/conf/layers/SubsamplingLayer.java index 8d4dd9dd71d1..666e538c9b8f 100644 --- a/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/conf/layers/SubsamplingLayer.java +++ b/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/conf/layers/SubsamplingLayer.java @@ -403,11 +403,10 @@ public T eps(double eps) { /** * When using CuDNN and an error is encountered, should fallback to the non-CuDNN implementatation be allowed? - * If set to false, an exception in CuDNN will be propagated back to the user. If false, the build-in (non-CuDNN) + * If set to false, an exception in CuDNN will be propagated back to the user. If false, the built-in (non-CuDNN) * implementation for ConvolutionLayer will be used * * @param allowFallback Whether fallback to non-CuDNN implementation should be used - * @return */ public T cudnnAllowFallback(boolean allowFallback){ this.cudnnAllowFallback = allowFallback;