Skip to content

Commit

Permalink
performance optimization: choosing individual thread block sizes for …
Browse files Browse the repository at this point in the history
…kernels
  • Loading branch information
chrxh committed May 23, 2024
1 parent ac5299e commit 15a5562
Show file tree
Hide file tree
Showing 2 changed files with 23 additions and 14 deletions.
9 changes: 9 additions & 0 deletions source/EngineGpuKernels/Macros.cuh
Original file line number Diff line number Diff line change
Expand Up @@ -90,3 +90,12 @@ void checkAndThrowError(T result, char const *const func, const char *const file
} else { \
func<<<1, 1>>>(__VA_ARGS__); \
}

#define KERNEL_CALL_MOD(func, threadsPerBlock, ...) \
if (GlobalSettings::getInstance().isDebugMode()) { \
func<<<gpuSettings.numBlocks, threadsPerBlock>>>(__VA_ARGS__); \
cudaDeviceSynchronize(); \
CHECK_FOR_CUDA_ERROR(cudaGetLastError()); \
} else { \
func<<<gpuSettings.numBlocks, threadsPerBlock>>>(__VA_ARGS__); \
}
28 changes: 14 additions & 14 deletions source/EngineGpuKernels/SimulationKernelsLauncher.cu
Original file line number Diff line number Diff line change
Expand Up @@ -35,21 +35,21 @@ void _SimulationKernelsLauncher::calcTimestep(Settings const& settings, Simulati
bool considerRigidityUpdate = (data.timestep % 3 == 0);

KERNEL_CALL(cudaNextTimestep_physics_init, data);
cudaNextTimestep_physics_fillMaps<<<gpuSettings.numBlocks, 32>>>(data);
KERNEL_CALL_MOD(cudaNextTimestep_physics_fillMaps, 64, data);
if (settings.simulationParameters.motionType == MotionType_Fluid) {
auto threads = calcOptimalThreadsForFluidKernel(settings.simulationParameters);
cudaNextTimestep_physics_calcFluidForces<<<gpuSettings.numBlocks, threads>>>(data);
auto threadBlockSize = calcOptimalThreadsForFluidKernel(settings.simulationParameters);
KERNEL_CALL_MOD(cudaNextTimestep_physics_calcFluidForces, threadBlockSize, data);
} else {
KERNEL_CALL(cudaNextTimestep_physics_calcCollisionForces, data);
}
if (settings.simulationParameters.numSpots > 0) {
KERNEL_CALL(cudaApplyFlowFieldSettings, data);
}
KERNEL_CALL(cudaNextTimestep_physics_applyForces, data);
KERNEL_CALL(cudaNextTimestep_physics_calcConnectionForces, data, considerForcesFromAngleDifferences);
KERNEL_CALL(cudaNextTimestep_physics_verletPositionUpdate, data);
KERNEL_CALL(cudaNextTimestep_physics_calcConnectionForces, data, considerForcesFromAngleDifferences);
KERNEL_CALL(cudaNextTimestep_physics_verletVelocityUpdate, data);
KERNEL_CALL_MOD(cudaNextTimestep_physics_applyForces, 16, data);
KERNEL_CALL_MOD(cudaNextTimestep_physics_calcConnectionForces, 16, data, considerForcesFromAngleDifferences);
KERNEL_CALL_MOD(cudaNextTimestep_physics_verletPositionUpdate, 16, data);
KERNEL_CALL_MOD(cudaNextTimestep_physics_calcConnectionForces, 16, data, considerForcesFromAngleDifferences);
KERNEL_CALL_MOD(cudaNextTimestep_physics_verletVelocityUpdate, 16, data);

//cell functions
KERNEL_CALL(cudaNextTimestep_cellFunction_prepare_substep1, data);
Expand All @@ -59,19 +59,19 @@ void _SimulationKernelsLauncher::calcTimestep(Settings const& settings, Simulati
if (settings.simulationParameters.cellFunctionConstructorCheckCompletenessForSelfReplication) {
KERNEL_CALL(cudaNextTimestep_cellFunction_constructor_completenessCheck, data, statistics);
}
KERNEL_CALL(cudaNextTimestep_cellFunction_constructor_process, data, statistics);
KERNEL_CALL_MOD(cudaNextTimestep_cellFunction_constructor_process, 4, data, statistics);
KERNEL_CALL(cudaNextTimestep_cellFunction_injector, data, statistics);
KERNEL_CALL(cudaNextTimestep_cellFunction_attacker, data, statistics);
KERNEL_CALL(cudaNextTimestep_cellFunction_transmitter, data, statistics);
KERNEL_CALL_MOD(cudaNextTimestep_cellFunction_attacker, 4, data, statistics);
KERNEL_CALL_MOD(cudaNextTimestep_cellFunction_transmitter, 4, data, statistics);
KERNEL_CALL(cudaNextTimestep_cellFunction_muscle, data, statistics);
KERNEL_CALL(cudaNextTimestep_cellFunction_sensor, data, statistics);
KERNEL_CALL_MOD(cudaNextTimestep_cellFunction_sensor, 64, data, statistics);
KERNEL_CALL(cudaNextTimestep_cellFunction_reconnector, data, statistics);
KERNEL_CALL(cudaNextTimestep_cellFunction_detonator, data, statistics);

if (considerInnerFriction) {
KERNEL_CALL(cudaNextTimestep_physics_applyInnerFriction, data);
KERNEL_CALL_MOD(cudaNextTimestep_physics_applyInnerFriction, 16, data);
}
KERNEL_CALL(cudaNextTimestep_physics_applyFriction, data);
KERNEL_CALL_MOD(cudaNextTimestep_physics_applyFriction, 16, data);

if (considerRigidityUpdate && isRigidityUpdateEnabled(settings)) {
KERNEL_CALL(cudaInitClusterData, data);
Expand Down

0 comments on commit 15a5562

Please sign in to comment.