Skip to content

Commit

Permalink
Move cluster culling / lod selection to mesh shader + Store cluster-b…
Browse files Browse the repository at this point in the history
…ased models per viewport
  • Loading branch information
jglrxavpok committed Feb 24, 2024
1 parent e7a88a0 commit 1fd0633
Show file tree
Hide file tree
Showing 4 changed files with 96 additions and 17 deletions.
41 changes: 34 additions & 7 deletions engine/engine/render/ClusterManager.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -122,6 +122,7 @@ namespace Carrot::Render {

std::shared_ptr<ClusterModel> ClusterManager::addModel(const ClustersInstanceDescription& desc) {
verify(desc.templates.size() == desc.pMaterials.size(), "There must be as many templates as material handles!");
auto& gpuInstances = gpuInstancesPerViewport[desc.pViewport];

std::uint32_t clusterCount = 0;

Expand All @@ -130,7 +131,7 @@ namespace Carrot::Render {
}

Async::LockGuard l { accessLock };
requireInstanceUpdate = true;
requireInstanceUpdatePerViewport[desc.pViewport] = true;
const std::uint32_t firstInstanceID = gpuInstances.size();
gpuInstances.resize(firstInstanceID + clusterCount);

Expand Down Expand Up @@ -165,7 +166,11 @@ namespace Carrot::Render {
}

Carrot::BufferView ClusterManager::getClusterInstances(const Carrot::Render::Context& renderContext) {
auto& pAlloc = instancesPerFrame[renderContext.swapchainIndex];
auto iter = instancesPerFramePerViewport.find(renderContext.pViewport);
if(iter == instancesPerFramePerViewport.end()) {
return Carrot::BufferView{};
}
auto& pAlloc = iter->second[renderContext.swapchainIndex];
return pAlloc ? pAlloc->view : Carrot::BufferView{};
}

Expand Down Expand Up @@ -211,6 +216,11 @@ namespace Carrot::Render {
triangleCount = 0;
}

auto& gpuInstances = gpuInstancesPerViewport[renderContext.pViewport];
if(gpuInstances.empty()) {
return;
}

const Carrot::Camera& camera = renderContext.getCamera();

auto testLOD = [&](const Cluster& c, const ClusterModel& instance) {
Expand Down Expand Up @@ -260,7 +270,9 @@ namespace Carrot::Render {
requireClusterUpdate = false;
}

auto& instanceGPUVisibleArray = instanceGPUVisibleArrays[renderContext.pViewport];
// TODO: allow material update once instance are already created? => needs something similar to MaterialSystem::getData
bool& requireInstanceUpdate = requireInstanceUpdatePerViewport[renderContext.pViewport];
if(requireInstanceUpdate) {
instanceGPUVisibleArray = std::make_shared<BufferAllocation>(std::move(GetResourceAllocator().allocateDeviceBuffer(sizeof(ClusterInstance) * gpuInstances.size(), vk::BufferUsageFlagBits::eStorageBuffer | vk::BufferUsageFlagBits::eTransferDst)));
instanceGPUVisibleArray->view.stageUpload(std::span<const ClusterInstance>{ gpuInstances });
Expand All @@ -278,6 +290,10 @@ namespace Carrot::Render {
}

clusterDataPerFrame[renderContext.swapchainIndex] = clusterGPUVisibleArray; // keep ref to avoid allocation going back to heap while still in use
auto& instancesPerFrame = instancesPerFramePerViewport[renderContext.pViewport];
if(instancesPerFrame.empty()) {
instancesPerFrame.resize(GetEngine().getSwapchainImageCount());
}
instancesPerFrame[renderContext.swapchainIndex] = instanceGPUVisibleArray; // keep ref to avoid allocation going back to heap while still in use
instanceDataPerFrame[renderContext.swapchainIndex] = instanceDataGPUVisibleArray; // keep ref to avoid allocation going back to heap while still in use

Expand Down Expand Up @@ -340,14 +356,25 @@ namespace Carrot::Render {
return; // TODO: debug only, remove
}

auto& pushConstant = packet.addPushConstant("push", vk::ShaderStageFlagBits::eMeshEXT);
{
std::uint32_t maxID = gpuInstances.size();
pushConstant.setData(std::move(maxID));
auto& pushConstant = packet.addPushConstant("push", vk::ShaderStageFlagBits::eMeshEXT);
struct PushConstantData {
std::uint32_t maxClusterID;
std::uint32_t lodSelectionMode;
float lodErrorThreshold;
std::uint32_t forcedLOD;
float screenHeight;
};
PushConstantData data{};
data.maxClusterID = gpuInstances.size();
data.lodSelectionMode = lodSelectionMode;
data.lodErrorThreshold = errorThreshold;
data.forcedLOD = globalLOD;
data.screenHeight = renderContext.pViewport->getHeight();
pushConstant.setData(std::move(data));
}

Render::PacketCommand& drawCommand = packet.commands.emplace_back();
//drawCommand.drawMeshTasks.groupCountX = 1;
drawCommand.drawMeshTasks.groupCountX = gpuInstances.size();
drawCommand.drawMeshTasks.groupCountY = 1;
drawCommand.drawMeshTasks.groupCountZ = 1;
Expand All @@ -361,7 +388,7 @@ namespace Carrot::Render {

void ClusterManager::onSwapchainImageCountChange(size_t newCount) {
clusterDataPerFrame.resize(newCount);
instancesPerFrame.resize(newCount);
instancesPerFramePerViewport.clear();
instanceDataPerFrame.resize(newCount);
}

Expand Down
8 changes: 4 additions & 4 deletions engine/engine/render/ClusterManager.h
Original file line number Diff line number Diff line change
Expand Up @@ -170,18 +170,18 @@ namespace Carrot::Render {
WeakPool<ClusterModel> models;

std::vector<Cluster> gpuClusters;
std::vector<ClusterInstance> gpuInstances;
std::unordered_map<Carrot::Render::Viewport*, std::vector<ClusterInstance>> gpuInstancesPerViewport;

bool requireClusterUpdate = true;
bool requireInstanceUpdate = true;
std::unordered_map<Carrot::Render::Viewport*, bool> requireInstanceUpdatePerViewport;
std::shared_ptr<Carrot::BufferAllocation> clusterGPUVisibleArray;
std::shared_ptr<Carrot::BufferAllocation> instanceGPUVisibleArray;
std::unordered_map<Viewport*, std::shared_ptr<Carrot::BufferAllocation>> instanceGPUVisibleArrays;
std::shared_ptr<Carrot::BufferAllocation> instanceDataGPUVisibleArray;
Carrot::InstanceData* pInstanceData = nullptr; // CPU visible version of instanceDataGPUVisibleArray

std::unordered_map<Viewport*, std::shared_ptr<Carrot::Pipeline>> pipelines;
Render::PerFrame<std::shared_ptr<Carrot::BufferAllocation>> clusterDataPerFrame;
Render::PerFrame<std::shared_ptr<Carrot::BufferAllocation>> instancesPerFrame;
std::unordered_map<Viewport*, Render::PerFrame<std::shared_ptr<Carrot::BufferAllocation>>> instancesPerFramePerViewport;
Render::PerFrame<std::shared_ptr<Carrot::BufferAllocation>> instanceDataPerFrame;
};

Expand Down
12 changes: 11 additions & 1 deletion engine/resources/shaders/includes/math.glsl
Original file line number Diff line number Diff line change
Expand Up @@ -33,4 +33,14 @@ vec3 barycentrics(vec3 a, vec3 b, vec3 c, vec3 p) {
v *= invTriangleArea;

return vec3(u, v, 1-u-v);
}
}

// Transforms a sphere by the given transform
// xyz is sphere center
// w is sphere radius
vec4 transformSphere(vec4 sphere, mat4 transform) {
vec4 hCenter = vec4(sphere.xyz, 1.0f);
hCenter = transform * hCenter;
const vec3 center = hCenter.xyz / hCenter.w;
return vec4(center, length((transform * vec4(sphere.w, 0, 0, 0)).xyz));
}
52 changes: 47 additions & 5 deletions engine/resources/shaders/visibility-buffer.mesh.glsl
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
#extension GL_EXT_nonuniform_qualifier : enable
#extension GL_EXT_shader_explicit_arithmetic_types_int8 : require
#extension GL_EXT_shader_explicit_arithmetic_types_int16 : require
#extension GL_EXT_shader_explicit_arithmetic_types_int32 : require
Expand All @@ -9,13 +10,14 @@
#include <includes/camera.glsl>
#include <includes/buffers.glsl>
#include <includes/clusters.glsl>
#include <includes/math.glsl>
#include <draw_data.glsl>
DEFINE_CAMERA_SET(1)

// TODO: change workgroup size
const uint WORKGROUP_SIZE = 1;

layout(local_size_x=WORKGROUP_SIZE) in;
layout(local_size_x = WORKGROUP_SIZE) in;
layout(max_vertices=128, max_primitives=128) out;
layout(triangles) out;

Expand All @@ -24,6 +26,11 @@ layout(location=1) out flat uint outClusterInstanceID[];

layout(push_constant) uniform PushConstant {
uint maxCluster;
uint lodSelectionMode; // 0= screen size based, 1= force specific LOD
float lodErrorThreshold; // screen size threshold
uint forcedLOD; // lod to force

float screenHeight;
} push;

layout(set = 0, binding = 0, scalar) buffer ClusterRef {
Expand All @@ -38,6 +45,40 @@ layout(set = 0, binding = 2, scalar) buffer ModelDataRef {
InstanceData modelData[];
};

// assume a fixed resolution and fov
const float testFOV = M_PI_OVER_2;
const float cotHalfFov = 1.0f / tan(testFOV / 2.0f);

// project given transformed (ie in view space) sphere to an error value in pixels
// xyz is center of sphere
// w is radius of sphere
float projectErrorToScreen(vec4 transformedSphere) {
// https://stackoverflow.com/questions/21648630/radius-of-projected-sphere-in-screen-space
if (isinf(transformedSphere.w)) {
return transformedSphere.w;
}
const float d2 = dot(transformedSphere.xyz, transformedSphere.xyz);
const float r = transformedSphere.w;
return push.screenHeight * cotHalfFov * r / sqrt(d2 - r*r);
}

bool cull(uint instanceID, uint clusterID, uint modelDataIndex, mat4 modelview) {
if(push.lodSelectionMode == 0) {
vec4 projectedBounds = vec4(clusters[clusterID].boundingSphere.xyz, max(clusters[clusterID].error, 10e-10f));
projectedBounds = transformSphere(projectedBounds, modelview);

vec4 parentProjectedBounds = vec4(clusters[clusterID].parentBoundingSphere.xyz, max(clusters[clusterID].parentError, 10e-10f));
parentProjectedBounds = transformSphere(parentProjectedBounds, modelview);

const float clusterError = projectErrorToScreen(projectedBounds);
const float parentError = projectErrorToScreen(parentProjectedBounds);
const bool render = clusterError <= push.lodErrorThreshold && parentError > push.lodErrorThreshold;
return !render;
} else {
return clusters[clusterID].lod != uint(push.forcedLOD);
}
}

void main() {
uint instanceID = gl_GlobalInvocationID.x;

Expand All @@ -51,14 +92,15 @@ void main() {
uint clusterID = instance.clusterID;
#define cluster clusters[clusterID]

if(clusters[clusterID].lod != 1) {
uint modelDataIndex = instance.instanceDataIndex;

// TODO: move this to task shader?
mat4 modelview = cbo.view * modelData[modelDataIndex].transform * clusters[clusterID].transform;
if(cull(instanceID, clusterID, modelDataIndex, modelview)) {
SetMeshOutputsEXT(0,0);
return;
}

uint modelDataIndex = instance.instanceDataIndex;
mat4 modelview = cbo.view * modelData[modelDataIndex].transform * clusters[clusterID].transform;

#if 1
SetMeshOutputsEXT(cluster.vertexCount, cluster.triangleCount);

Expand Down

0 comments on commit 1fd0633

Please sign in to comment.