danpovey · danpovey · Oct 24, 2018 · Oct 23, 2018 · Oct 24, 2018
diff --git a/src/nnet3/convolution-cudnn-test.cc b/src/nnet3/convolution-cudnn-test.cc
@@ -36,7 +36,7 @@ static void GetRandomConfig(ConvolutionComputationConfig *config) {
   config->filter_stride_vertical = RandInt(1, 2);
   config->filter_stride_horizontal = RandInt(1, 2);
   config->filter_dilation_vertical = RandInt(1, 2);
-  config->filter_dilation_horizontal = 1;
+  config->filter_dilation_horizontal = RandInt(1, 2);
 
   config->input_image_height = RandInt(10, 20);
   config->input_image_width = RandInt(10, 20);

diff --git a/src/nnet3/convolution-cudnn.cc b/src/nnet3/convolution-cudnn.cc
@@ -30,6 +30,15 @@ namespace {
 // static variables.
 const BaseFloat ONE(1.0);
 const BaseFloat ZERO(0.0);
+
+template<typename CudnnAlgoPerfT>
+void CheckCorrectness(CudnnAlgoPerfT perf_results, const char* function) {
+  if (perf_results.status != CUDNN_STATUS_SUCCESS) {
+    KALDI_WARN << function << " had an error: " <<
+      cudnnGetErrorString(perf_results.status) << ". Continuing with algo" <<
+      perf_results.algo << " but results may not be ideal.";
+  }
+}
 }
 
 
@@ -172,11 +181,14 @@ void ConvolutionComputation::InitCudnn() {
   // relative to what the CUDNN interface specifies; this is because Kaldi's
   // notion of what is height vs. width is opposite to CUDNN's.  (There
   // are good reasons for this).
+  int in_dims[4] = {c.num_images, c.num_channels_in, c.input_image_width,
+		    c.input_image_height};
+  int in_stride[4] = {c.num_channels_in * c.input_image_width * c.input_image_height,
+		      c.input_image_width * c.input_image_height,
+		      c.input_image_height, 1};
   CUDNN_SAFE_CALL(
-      cudnnSetTensor4dDescriptor(input_desc_, CUDNN_TENSOR_NHWC,
-                                 CUDNN_DATA_BASEFLOAT, c.num_images,
-                                 c.num_channels_in, c.input_image_width,
-                                 c.input_image_height));
+      cudnnSetTensorNdDescriptor(input_desc_, CUDNN_DATA_BASEFLOAT, 4, in_dims,
+				 in_stride));
   // Again: width and height are swapped.
   CUDNN_SAFE_CALL(
       cudnnSetConvolution2dDescriptor(
@@ -198,7 +210,7 @@ void ConvolutionComputation::InitCudnn() {
   // smallest: num-channels-out, width, height, num-channels-in.
   CUDNN_SAFE_CALL(
       cudnnSetFilter4dDescriptor(params_desc_, CUDNN_DATA_BASEFLOAT,
-                                 CUDNN_TENSOR_NHWC, c.num_channels_out,
+                                 CUDNN_TENSOR_NCHW, c.num_channels_out,
                                  c.num_channels_in, c.filter_width,
                                  c.filter_height));
 
@@ -219,11 +231,14 @@ void ConvolutionComputation::InitCudnn() {
 
   // These two member functions depend only on input_desc_,
   // conv_desc_, and params_desc_, so they are safe to call now.
+  int out_dims[4] = {c.num_images, c.num_channels_out, c.output_image_width,
+		     c.output_image_height};
+  int out_stride[4] = {c.num_channels_out * c.output_image_width * c.output_image_height,
+		       c.output_image_width * c.output_image_height,
+		       c.output_image_height, 1};
   CUDNN_SAFE_CALL(
-      cudnnSetTensor4dDescriptor(output_desc_, CUDNN_TENSOR_NHWC,
-                                 CUDNN_DATA_BASEFLOAT, c.num_images,
-                                 c.num_channels_out, kaldi_width_cudnn_height,
-                                 kaldi_height_cudnn_width));
+    cudnnSetTensorNdDescriptor(output_desc_, CUDNN_DATA_BASEFLOAT, 4, out_dims,
+			       out_stride));
 
   // We pad the bias with leading dims of 1, since CUDNN's tensors appear to
   // need a dimension of at least 3.
@@ -257,7 +272,7 @@ void ConvolutionComputation::InitCudnn() {
   KALDI_ASSERT(returned_algo_count > 0 &&
                "No algorithms were returned by CUDNN.");
   const cudnnConvolutionFwdAlgoPerf_t& best_forward = forward_results[0];
-  KALDI_ASSERT(best_forward.status == CUDNN_STATUS_SUCCESS);
+  CheckCorrectness(best_forward, "cudnnFindConvolutionForwardAlgorithm");
   fwd_algo_ = best_forward.algo;
   delete [] forward_results;
 
@@ -278,7 +293,8 @@ void ConvolutionComputation::InitCudnn() {
                "No algorithms were returned by CUDNN.");
   const cudnnConvolutionBwdFilterAlgoPerf_t& best_backward_filter =
       backward_filter_results[0];
-  KALDI_ASSERT(best_backward_filter.status == CUDNN_STATUS_SUCCESS);
+  CheckCorrectness(best_backward_filter,
+		   "cudnnFindConvolutionBackwardFilterAlgorithm");
   bwd_filter_algo_ = best_backward_filter.algo;
   delete [] backward_filter_results;
 
@@ -299,7 +315,8 @@ void ConvolutionComputation::InitCudnn() {
                "No algorithms were returned by CUDNN.");
   const cudnnConvolutionBwdDataAlgoPerf_t& best_backward_data =
       backward_data_results[0];
-  KALDI_ASSERT(best_backward_data.status == CUDNN_STATUS_SUCCESS);
+  CheckCorrectness(best_backward_data,
+		   "cudnnFindConvolutionBackwardDataAlgorithm");
   bwd_data_algo_ = best_backward_data.algo;
   delete [] backward_data_results;