diff --git a/app/demo-interactor/KNDemoKernel.cu b/app/demo-interactor/KNDemoKernel.cu index d8a82ad9f1..4123fcddb4 100644 --- a/app/demo-interactor/KNDemoKernel.cu +++ b/app/demo-interactor/KNDemoKernel.cu @@ -166,6 +166,7 @@ void initialize(const CudaGridParams& grid, CELER_EXPECT(states.rng.size() == states.size()); initialize_kernel<<>>( params, states, initial); + CELER_CUDA_CHECK_ERROR(); } //---------------------------------------------------------------------------// @@ -180,6 +181,7 @@ void iterate(const CudaGridParams& grid, { iterate_kernel<<>>( params, state, secondaries, detector); + CELER_CUDA_CHECK_ERROR(); // Note: the device synchronize is useful for debugging and necessary for // timing diagnostics. diff --git a/app/demo-interactor/detail/DetectorUtils.cu b/app/demo-interactor/detail/DetectorUtils.cu index fed5950311..4df81b2f28 100644 --- a/app/demo-interactor/detail/DetectorUtils.cu +++ b/app/demo-interactor/detail/DetectorUtils.cu @@ -73,6 +73,7 @@ void bin_buffer(const DetectorPointers& detector) { auto params = KernelParamCalculator()(detector.capacity()); bin_buffer_kernel<<>>(detector); + CELER_CUDA_CHECK_ERROR(); } //---------------------------------------------------------------------------// @@ -84,6 +85,7 @@ void normalize(const DetectorPointers& device_ptrs, real_type norm) auto params = KernelParamCalculator()(device_ptrs.tally_deposition.size()); normalize_kernel<<>>(device_ptrs, norm); + CELER_CUDA_CHECK_ERROR(); } //---------------------------------------------------------------------------// diff --git a/app/demo-rasterizer/RDemoKernel.cu b/app/demo-rasterizer/RDemoKernel.cu index 850ee5b1fc..981f56b6b4 100644 --- a/app/demo-rasterizer/RDemoKernel.cu +++ b/app/demo-rasterizer/RDemoKernel.cu @@ -101,6 +101,7 @@ void trace(const GeoParamsPointers& geo_params, auto params = calc_kernel_params(image.dims[0]); trace_impl<<>>( geo_params, geo_state, image); + CELER_CUDA_CHECK_ERROR(); CELER_CUDA_CALL(cudaDeviceSynchronize()); } diff --git a/src/physics/em/detail/BetheHeitler.cu b/src/physics/em/detail/BetheHeitler.cu index 652430aa4d..5b5458d36d 100644 --- a/src/physics/em/detail/BetheHeitler.cu +++ b/src/physics/em/detail/BetheHeitler.cu @@ -87,7 +87,6 @@ void bethe_heitler_interact(const BetheHeitlerPointers& bh, auto params = calc_kernel_params(model.states.size()); bethe_heitler_interact_kernel<<>>( bh, model); - CELER_CUDA_CHECK_ERROR(); } diff --git a/src/physics/em/detail/EPlusGG.cu b/src/physics/em/detail/EPlusGG.cu index 9107e4ff68..2727cd61a6 100644 --- a/src/physics/em/detail/EPlusGG.cu +++ b/src/physics/em/detail/EPlusGG.cu @@ -79,7 +79,6 @@ void eplusgg_interact(const EPlusGGPointers& eplusgg, // Launch the kernel eplusgg_interact_kernel<<>>(eplusgg, model); - CELER_CUDA_CHECK_ERROR(); } diff --git a/src/physics/em/detail/KleinNishina.cu b/src/physics/em/detail/KleinNishina.cu index 01459aba98..355b91b0c0 100644 --- a/src/physics/em/detail/KleinNishina.cu +++ b/src/physics/em/detail/KleinNishina.cu @@ -74,7 +74,6 @@ void klein_nishina_interact(const KleinNishinaPointers& kn, auto params = calc_kernel_params(model.states.size()); klein_nishina_interact_kernel<<>>( kn, model); - CELER_CUDA_CHECK_ERROR(); } diff --git a/src/physics/em/detail/LivermorePE.cu b/src/physics/em/detail/LivermorePE.cu index 137ce8c57b..783809c564 100644 --- a/src/physics/em/detail/LivermorePE.cu +++ b/src/physics/em/detail/LivermorePE.cu @@ -87,7 +87,6 @@ void livermore_pe_interact(const LivermorePEPointers& pe, auto params = calc_kernel_params(model.states.size()); livermore_pe_interact_kernel<<>>( pe, model); - CELER_CUDA_CHECK_ERROR(); } diff --git a/src/sim/detail/InitializeTracks.cu b/src/sim/detail/InitializeTracks.cu index 274a642ef0..8633ddaeb4 100644 --- a/src/sim/detail/InitializeTracks.cu +++ b/src/sim/detail/InitializeTracks.cu @@ -273,7 +273,6 @@ void init_tracks(const StatePointers& states, auto lparams = calc_launch_params(num_vacancies); init_tracks_kernel<<>>( states, params, inits, num_vacancies); - CELER_CUDA_CHECK_ERROR(); } @@ -290,7 +289,6 @@ void locate_alive(const StatePointers& states, auto lparams = calc_launch_params(states.size()); locate_alive_kernel<<>>( states, params, inits); - CELER_CUDA_CHECK_ERROR(); } @@ -312,7 +310,6 @@ void process_primaries(Span primaries, auto lparams = calc_launch_params(primaries.size()); process_primaries_kernel<<>>( primaries, initializers); - CELER_CUDA_CHECK_ERROR(); } @@ -334,7 +331,6 @@ void process_secondaries(const StatePointers& states, auto lparams = calc_launch_params(states.size()); process_secondaries_kernel<<>>( states, params, inits); - CELER_CUDA_CHECK_ERROR(); } diff --git a/test/base/NumericLimits.test.cu b/test/base/NumericLimits.test.cu index 5bcd603816..3de85b2dcb 100644 --- a/test/base/NumericLimits.test.cu +++ b/test/base/NumericLimits.test.cu @@ -56,6 +56,7 @@ NLTestOutput nl_test() auto params = calc_launch_params(3); nl_test_kernel<<>>(result_device); + CELER_CUDA_CHECK_ERROR(); CELER_CUDA_CALL(cudaDeviceSynchronize()); // Copy to host diff --git a/test/base/Range.test.cu b/test/base/Range.test.cu index 5e7aa1865e..f22551f933 100644 --- a/test/base/Range.test.cu +++ b/test/base/Range.test.cu @@ -42,6 +42,7 @@ RangeTestOutput rangedev_test(RangeTestInput input) thrust::raw_pointer_cast(y_dev.data()), thrust::raw_pointer_cast(z_dev.data()), z_dev.size()); + CELER_CUDA_CHECK_ERROR(); // Copy result back to CPU RangeTestOutput result; diff --git a/test/base/StackAllocator.test.cu b/test/base/StackAllocator.test.cu index 37cb6a424e..b13337ec6b 100644 --- a/test/base/StackAllocator.test.cu +++ b/test/base/StackAllocator.test.cu @@ -85,11 +85,13 @@ SATestOutput sa_test(SATestInput input) auto params = calc_launch_params(input.num_threads); sa_test_kernel<<>>( input, raw_pointer_cast(out.data())); + CELER_CUDA_CHECK_ERROR(); CELER_CUDA_CALL(cudaDeviceSynchronize()); // Access secondaries after the first kernel completed sa_post_test_kernel<<>>( input, raw_pointer_cast(out.data())); + CELER_CUDA_CHECK_ERROR(); CELER_CUDA_CALL(cudaDeviceSynchronize()); // Copy data back to host diff --git a/test/geometry/GeoTrackView.test.cu b/test/geometry/GeoTrackView.test.cu index ce2cabd63d..d5835e7a6b 100644 --- a/test/geometry/GeoTrackView.test.cu +++ b/test/geometry/GeoTrackView.test.cu @@ -77,6 +77,7 @@ VGGTestOutput vgg_test(VGGTestInput input) input.max_segments, raw_pointer_cast(ids.data()), raw_pointer_cast(distances.data())); + CELER_CUDA_CHECK_ERROR(); CELER_CUDA_CALL(cudaDeviceSynchronize()); // Copy result back to CPU diff --git a/test/geometry/LinearPropagator.test.cu b/test/geometry/LinearPropagator.test.cu index 9288aa203d..43e7aa4bc3 100644 --- a/test/geometry/LinearPropagator.test.cu +++ b/test/geometry/LinearPropagator.test.cu @@ -90,6 +90,7 @@ LinPropTestOutput linProp_test(LinPropTestInput input) input.max_segments, raw_pointer_cast(ids.data()), raw_pointer_cast(distances.data())); + CELER_CUDA_CHECK_ERROR(); CELER_CUDA_CALL(cudaDeviceSynchronize()); // Copy result back to CPU diff --git a/test/physics/base/Particle.test.cu b/test/physics/base/Particle.test.cu index 79f1f5bc48..0f35ab557c 100644 --- a/test/physics/base/Particle.test.cu +++ b/test/physics/base/Particle.test.cu @@ -66,6 +66,7 @@ PTVTestOutput ptv_test(PTVTestInput input) input.states, raw_pointer_cast(init.data()), raw_pointer_cast(result.data())); + CELER_CUDA_CHECK_ERROR(); CELER_CUDA_CALL(cudaDeviceSynchronize()); PTVTestOutput output; diff --git a/test/physics/material/Material.test.cu b/test/physics/material/Material.test.cu index 8e88d14b95..c332c106ff 100644 --- a/test/physics/material/Material.test.cu +++ b/test/physics/material/Material.test.cu @@ -83,6 +83,7 @@ MTestOutput m_test(const MTestInput& input) raw_pointer_cast(temperatures.data()), raw_pointer_cast(rad_len.data()), raw_pointer_cast(tot_z.data())); + CELER_CUDA_CHECK_ERROR(); CELER_CUDA_CALL(cudaDeviceSynchronize()); MTestOutput result; diff --git a/test/random/cuda/RngEngine.test.cu b/test/random/cuda/RngEngine.test.cu index 25642708cd..e358e1dc29 100644 --- a/test/random/cuda/RngEngine.test.cu +++ b/test/random/cuda/RngEngine.test.cu @@ -72,6 +72,7 @@ TEST(RngEngineIntTest, regression) num_samples, container.device_pointers(), thrust::raw_pointer_cast(samples.data())); + CELER_CUDA_CHECK_ERROR(); CELER_CUDA_CALL(cudaDeviceSynchronize()); // Copy data back to host @@ -139,6 +140,7 @@ TYPED_TEST(RngEngineFloatTest, generate_canonical) num_samples, container.device_pointers(), thrust::raw_pointer_cast(samples.data())); + CELER_CUDA_CHECK_ERROR(); CELER_CUDA_CALL(cudaDeviceSynchronize()); // Copy data back to host diff --git a/test/sim/TrackInitializerStore.test.cu b/test/sim/TrackInitializerStore.test.cu index de570710da..6e8243de3c 100644 --- a/test/sim/TrackInitializerStore.test.cu +++ b/test/sim/TrackInitializerStore.test.cu @@ -99,7 +99,6 @@ void interact(StatePointers states, auto lparams = calc_launch_params(states.size()); interact_kernel<<>>( states, secondaries, input); - CELER_CUDA_CHECK_ERROR(); } @@ -118,7 +117,6 @@ std::vector tracks_test(StatePointers states) auto lparams = calc_launch_params(states.size()); tracks_test_kernel<<>>( states, thrust::raw_pointer_cast(output.data())); - CELER_CUDA_CHECK_ERROR(); // Copy data back to host @@ -142,7 +140,6 @@ std::vector initializers_test(TrackInitializerPointers inits) auto lparams = calc_launch_params(inits.initializers.size()); initializers_test_kernel<<>>( inits, thrust::raw_pointer_cast(output.data())); - CELER_CUDA_CHECK_ERROR(); // Copy data back to host @@ -166,7 +163,6 @@ std::vector vacancies_test(TrackInitializerPointers inits) auto lparams = calc_launch_params(inits.vacancies.size()); vacancies_test_kernel<<>>( inits, thrust::raw_pointer_cast(output.data())); - CELER_CUDA_CHECK_ERROR(); // Copy data back to host