Skip to content

Commit

Permalink
Add post-kernel-launch assertions (#119)
Browse files Browse the repository at this point in the history
* Add error testing after kernel in RDemoKernel.cu.

This is to see if CI sees
```
warning: Cuda API error detected: cudaLaunchKernel returned (0x62)

warning: Cuda API error detected: cudaPeekAtLastError returned (0x62)

warning: Cuda API error detected: cudaGetLastError returned (0x62)

/wclustre/g4p/pcanal/geant/sources/celeritas/app/demo-rasterizer/demo-rasterizer.cc:139: critical: caught exception: /wclustre/g4p/pcanal/geant/sources/celeritas/app/demo-rasterizer/RDemoKernel.cu:107:
celeritas: cuda error: invalid device function
    cudaPeekAtLastError()
```

* Add missing error check after kernel launch
  • Loading branch information
pcanal committed Jan 30, 2021
1 parent ee91dbb commit 036f8e2
Show file tree
Hide file tree
Showing 17 changed files with 15 additions and 12 deletions.
2 changes: 2 additions & 0 deletions app/demo-interactor/KNDemoKernel.cu
Original file line number Diff line number Diff line change
Expand Up @@ -166,6 +166,7 @@ void initialize(const CudaGridParams& grid,
CELER_EXPECT(states.rng.size() == states.size());
initialize_kernel<<<grid.grid_size, grid.block_size>>>(
params, states, initial);
CELER_CUDA_CHECK_ERROR();
}

//---------------------------------------------------------------------------//
Expand All @@ -180,6 +181,7 @@ void iterate(const CudaGridParams& grid,
{
iterate_kernel<<<grid.grid_size, grid.block_size>>>(
params, state, secondaries, detector);
CELER_CUDA_CHECK_ERROR();

// Note: the device synchronize is useful for debugging and necessary for
// timing diagnostics.
Expand Down
2 changes: 2 additions & 0 deletions app/demo-interactor/detail/DetectorUtils.cu
Original file line number Diff line number Diff line change
Expand Up @@ -73,6 +73,7 @@ void bin_buffer(const DetectorPointers& detector)
{
auto params = KernelParamCalculator()(detector.capacity());
bin_buffer_kernel<<<params.grid_size, params.block_size>>>(detector);
CELER_CUDA_CHECK_ERROR();
}

//---------------------------------------------------------------------------//
Expand All @@ -84,6 +85,7 @@ void normalize(const DetectorPointers& device_ptrs, real_type norm)
auto params = KernelParamCalculator()(device_ptrs.tally_deposition.size());
normalize_kernel<<<params.grid_size, params.block_size>>>(device_ptrs,
norm);
CELER_CUDA_CHECK_ERROR();
}

//---------------------------------------------------------------------------//
Expand Down
1 change: 1 addition & 0 deletions app/demo-rasterizer/RDemoKernel.cu
Original file line number Diff line number Diff line change
Expand Up @@ -101,6 +101,7 @@ void trace(const GeoParamsPointers& geo_params,
auto params = calc_kernel_params(image.dims[0]);
trace_impl<<<params.grid_size, params.block_size>>>(
geo_params, geo_state, image);
CELER_CUDA_CHECK_ERROR();
CELER_CUDA_CALL(cudaDeviceSynchronize());
}

Expand Down
1 change: 0 additions & 1 deletion src/physics/em/detail/BetheHeitler.cu
Original file line number Diff line number Diff line change
Expand Up @@ -87,7 +87,6 @@ void bethe_heitler_interact(const BetheHeitlerPointers& bh,
auto params = calc_kernel_params(model.states.size());
bethe_heitler_interact_kernel<<<params.grid_size, params.block_size>>>(
bh, model);

CELER_CUDA_CHECK_ERROR();
}

Expand Down
1 change: 0 additions & 1 deletion src/physics/em/detail/EPlusGG.cu
Original file line number Diff line number Diff line change
Expand Up @@ -79,7 +79,6 @@ void eplusgg_interact(const EPlusGGPointers& eplusgg,
// Launch the kernel
eplusgg_interact_kernel<<<params.grid_size, params.block_size>>>(eplusgg,
model);

CELER_CUDA_CHECK_ERROR();
}

Expand Down
1 change: 0 additions & 1 deletion src/physics/em/detail/KleinNishina.cu
Original file line number Diff line number Diff line change
Expand Up @@ -74,7 +74,6 @@ void klein_nishina_interact(const KleinNishinaPointers& kn,
auto params = calc_kernel_params(model.states.size());
klein_nishina_interact_kernel<<<params.grid_size, params.block_size>>>(
kn, model);

CELER_CUDA_CHECK_ERROR();
}

Expand Down
1 change: 0 additions & 1 deletion src/physics/em/detail/LivermorePE.cu
Original file line number Diff line number Diff line change
Expand Up @@ -87,7 +87,6 @@ void livermore_pe_interact(const LivermorePEPointers& pe,
auto params = calc_kernel_params(model.states.size());
livermore_pe_interact_kernel<<<params.grid_size, params.block_size>>>(
pe, model);

CELER_CUDA_CHECK_ERROR();
}

Expand Down
4 changes: 0 additions & 4 deletions src/sim/detail/InitializeTracks.cu
Original file line number Diff line number Diff line change
Expand Up @@ -273,7 +273,6 @@ void init_tracks(const StatePointers& states,
auto lparams = calc_launch_params(num_vacancies);
init_tracks_kernel<<<lparams.grid_size, lparams.block_size>>>(
states, params, inits, num_vacancies);

CELER_CUDA_CHECK_ERROR();
}

Expand All @@ -290,7 +289,6 @@ void locate_alive(const StatePointers& states,
auto lparams = calc_launch_params(states.size());
locate_alive_kernel<<<lparams.grid_size, lparams.block_size>>>(
states, params, inits);

CELER_CUDA_CHECK_ERROR();
}

Expand All @@ -312,7 +310,6 @@ void process_primaries(Span<const Primary> primaries,
auto lparams = calc_launch_params(primaries.size());
process_primaries_kernel<<<lparams.grid_size, lparams.block_size>>>(
primaries, initializers);

CELER_CUDA_CHECK_ERROR();
}

Expand All @@ -334,7 +331,6 @@ void process_secondaries(const StatePointers& states,
auto lparams = calc_launch_params(states.size());
process_secondaries_kernel<<<lparams.grid_size, lparams.block_size>>>(
states, params, inits);

CELER_CUDA_CHECK_ERROR();
}

Expand Down
1 change: 1 addition & 0 deletions test/base/NumericLimits.test.cu
Original file line number Diff line number Diff line change
Expand Up @@ -56,6 +56,7 @@ NLTestOutput<T> nl_test()

auto params = calc_launch_params(3);
nl_test_kernel<<<params.grid_size, params.block_size>>>(result_device);
CELER_CUDA_CHECK_ERROR();
CELER_CUDA_CALL(cudaDeviceSynchronize());

// Copy to host
Expand Down
1 change: 1 addition & 0 deletions test/base/Range.test.cu
Original file line number Diff line number Diff line change
Expand Up @@ -42,6 +42,7 @@ RangeTestOutput rangedev_test(RangeTestInput input)
thrust::raw_pointer_cast(y_dev.data()),
thrust::raw_pointer_cast(z_dev.data()),
z_dev.size());
CELER_CUDA_CHECK_ERROR();

// Copy result back to CPU
RangeTestOutput result;
Expand Down
2 changes: 2 additions & 0 deletions test/base/StackAllocator.test.cu
Original file line number Diff line number Diff line change
Expand Up @@ -85,11 +85,13 @@ SATestOutput sa_test(SATestInput input)
auto params = calc_launch_params(input.num_threads);
sa_test_kernel<<<params.grid_size, params.block_size>>>(
input, raw_pointer_cast(out.data()));
CELER_CUDA_CHECK_ERROR();
CELER_CUDA_CALL(cudaDeviceSynchronize());

// Access secondaries after the first kernel completed
sa_post_test_kernel<<<params.grid_size, params.block_size>>>(
input, raw_pointer_cast(out.data()));
CELER_CUDA_CHECK_ERROR();
CELER_CUDA_CALL(cudaDeviceSynchronize());

// Copy data back to host
Expand Down
1 change: 1 addition & 0 deletions test/geometry/GeoTrackView.test.cu
Original file line number Diff line number Diff line change
Expand Up @@ -77,6 +77,7 @@ VGGTestOutput vgg_test(VGGTestInput input)
input.max_segments,
raw_pointer_cast(ids.data()),
raw_pointer_cast(distances.data()));
CELER_CUDA_CHECK_ERROR();
CELER_CUDA_CALL(cudaDeviceSynchronize());

// Copy result back to CPU
Expand Down
1 change: 1 addition & 0 deletions test/geometry/LinearPropagator.test.cu
Original file line number Diff line number Diff line change
Expand Up @@ -90,6 +90,7 @@ LinPropTestOutput linProp_test(LinPropTestInput input)
input.max_segments,
raw_pointer_cast(ids.data()),
raw_pointer_cast(distances.data()));
CELER_CUDA_CHECK_ERROR();
CELER_CUDA_CALL(cudaDeviceSynchronize());

// Copy result back to CPU
Expand Down
1 change: 1 addition & 0 deletions test/physics/base/Particle.test.cu
Original file line number Diff line number Diff line change
Expand Up @@ -66,6 +66,7 @@ PTVTestOutput ptv_test(PTVTestInput input)
input.states,
raw_pointer_cast(init.data()),
raw_pointer_cast(result.data()));
CELER_CUDA_CHECK_ERROR();
CELER_CUDA_CALL(cudaDeviceSynchronize());

PTVTestOutput output;
Expand Down
1 change: 1 addition & 0 deletions test/physics/material/Material.test.cu
Original file line number Diff line number Diff line change
Expand Up @@ -83,6 +83,7 @@ MTestOutput m_test(const MTestInput& input)
raw_pointer_cast(temperatures.data()),
raw_pointer_cast(rad_len.data()),
raw_pointer_cast(tot_z.data()));
CELER_CUDA_CHECK_ERROR();
CELER_CUDA_CALL(cudaDeviceSynchronize());

MTestOutput result;
Expand Down
2 changes: 2 additions & 0 deletions test/random/cuda/RngEngine.test.cu
Original file line number Diff line number Diff line change
Expand Up @@ -72,6 +72,7 @@ TEST(RngEngineIntTest, regression)
num_samples,
container.device_pointers(),
thrust::raw_pointer_cast(samples.data()));
CELER_CUDA_CHECK_ERROR();
CELER_CUDA_CALL(cudaDeviceSynchronize());

// Copy data back to host
Expand Down Expand Up @@ -139,6 +140,7 @@ TYPED_TEST(RngEngineFloatTest, generate_canonical)
num_samples,
container.device_pointers(),
thrust::raw_pointer_cast(samples.data()));
CELER_CUDA_CHECK_ERROR();
CELER_CUDA_CALL(cudaDeviceSynchronize());

// Copy data back to host
Expand Down
4 changes: 0 additions & 4 deletions test/sim/TrackInitializerStore.test.cu
Original file line number Diff line number Diff line change
Expand Up @@ -99,7 +99,6 @@ void interact(StatePointers states,
auto lparams = calc_launch_params(states.size());
interact_kernel<<<lparams.grid_size, lparams.block_size>>>(
states, secondaries, input);

CELER_CUDA_CHECK_ERROR();
}

Expand All @@ -118,7 +117,6 @@ std::vector<unsigned int> tracks_test(StatePointers states)
auto lparams = calc_launch_params(states.size());
tracks_test_kernel<<<lparams.grid_size, lparams.block_size>>>(
states, thrust::raw_pointer_cast(output.data()));

CELER_CUDA_CHECK_ERROR();

// Copy data back to host
Expand All @@ -142,7 +140,6 @@ std::vector<unsigned int> initializers_test(TrackInitializerPointers inits)
auto lparams = calc_launch_params(inits.initializers.size());
initializers_test_kernel<<<lparams.grid_size, lparams.block_size>>>(
inits, thrust::raw_pointer_cast(output.data()));

CELER_CUDA_CHECK_ERROR();

// Copy data back to host
Expand All @@ -166,7 +163,6 @@ std::vector<size_type> vacancies_test(TrackInitializerPointers inits)
auto lparams = calc_launch_params(inits.vacancies.size());
vacancies_test_kernel<<<lparams.grid_size, lparams.block_size>>>(
inits, thrust::raw_pointer_cast(output.data()));

CELER_CUDA_CHECK_ERROR();

// Copy data back to host
Expand Down

0 comments on commit 036f8e2

Please sign in to comment.