Skip to content

Commit

Permalink
optimize the inner rendering loop
Browse files Browse the repository at this point in the history
This largely undoes a change I did recently where PrimitiveInfo has
a FRenderPrimitive* to save some space and keep a command at 64 bytes.

This wasn't a good idea because the inner rendering loop shouldn't 
to any dereference in the common case.

This change reorganises PrimitiveInfo such that it stores all the data
necessary to render a primitive in the common case. The less common
cases are when hybrid instancing, morphing or skinning are used; in 
those cases, a dereference into the renderable SOA is needed.

PrimitiveInfo currently has 16 bytes free, which we keep for futur use.
  • Loading branch information
pixelflinger committed Apr 23, 2024
1 parent 490e8cf commit e4442a5
Show file tree
Hide file tree
Showing 2 changed files with 93 additions and 63 deletions.
125 changes: 75 additions & 50 deletions filament/src/RenderPass.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -213,7 +213,7 @@ void RenderPass::appendCommands(FEngine& engine,
// This must be done from the main thread.
for (Command const* first = curr, *last = curr + commandCount ; first != last ; ++first) {
if (UTILS_LIKELY((first->key & CUSTOM_MASK) == uint64_t(CustomCommand::PASS))) {
auto ma = first->primitive.primitive->getMaterialInstance()->getMaterial();
auto ma = first->primitive.mi->getMaterial();
ma->prepareProgram(first->primitive.materialVariant);
}
}
Expand Down Expand Up @@ -290,19 +290,24 @@ void RenderPass::instanceify(FEngine& engine, Arena& arena) noexcept {

while (curr != last) {

// we can't have nice things! No more than maxInstanceCount due to UBO size limits
Command const* const e = std::find_if_not(curr, std::min(last, curr + maxInstanceCount),
[lhs = *curr](Command const& rhs) {
// primitives must be identical to be instanced. Currently, instancing doesn't support
// skinning/morphing.
return lhs.primitive.primitive == rhs.primitive.primitive &&
lhs.primitive.rasterState == rhs.primitive.rasterState &&
lhs.primitive.skinningHandle == rhs.primitive.skinningHandle &&
lhs.primitive.skinningOffset == rhs.primitive.skinningOffset &&
lhs.primitive.morphWeightBuffer == rhs.primitive.morphWeightBuffer &&
lhs.primitive.morphTargetBuffer == rhs.primitive.morphTargetBuffer &&
lhs.primitive.skinningTexture == rhs.primitive.skinningTexture ;
});
// Currently, if we have skinnning or morphing, we can't use auto instancing. This is
// because the morphing/skinning data for comparison is not easily accessible.
// Additionally, we can't have a different skinning/morphing per instance anyway.
Command const* e = curr + 1;
if (UTILS_LIKELY(!curr->primitive.hasSkinning && !curr->primitive.hasMorphing)) {
// we can't have nice things! No more than maxInstanceCount due to UBO size limits
e = std::find_if_not(curr, std::min(last, curr + maxInstanceCount),
[lhs = *curr](Command const& rhs) {
// primitives must be identical to be instanced.
// Currently, instancing doesn't support skinning/morphing.
return lhs.primitive.mi == rhs.primitive.mi &&
lhs.primitive.rph == rhs.primitive.rph &&
lhs.primitive.vbih == rhs.primitive.vbih &&
lhs.primitive.indexOffset == rhs.primitive.indexOffset &&
lhs.primitive.indexCount == rhs.primitive.indexCount &&
lhs.primitive.rasterState == rhs.primitive.rasterState;
});
}

uint32_t const instanceCount = e - curr;
assert_invariant(instanceCount > 0);
Expand Down Expand Up @@ -584,7 +589,7 @@ RenderPass::Command* RenderPass::generateCommandsImpl(RenderPass::CommandTypeFla
cmdColor.primitive.index = i;
cmdColor.primitive.instanceCount =
soaInstanceInfo[i].count | PrimitiveInfo::USER_INSTANCE_MASK;
cmdColor.primitive.instanceBufferHandle = soaInstanceInfo[i].handle;
cmdColor.primitive.hasHybridInstancing = (bool)soaInstanceInfo[i].handle;

// soaInstanceInfo[i].count is the number of instances the user has requested, either for
// manual or hybrid instancing. Instanced stereo multiplies the number of instances by the
Expand Down Expand Up @@ -614,14 +619,11 @@ RenderPass::Command* RenderPass::generateCommandsImpl(RenderPass::CommandTypeFla
cmdDepth.primitive.index = i;
cmdDepth.primitive.instanceCount =
soaInstanceInfo[i].count | PrimitiveInfo::USER_INSTANCE_MASK;
cmdDepth.primitive.instanceBufferHandle = soaInstanceInfo[i].handle;
cmdDepth.primitive.hasHybridInstancing = (bool)soaInstanceInfo[i].handle;
cmdDepth.primitive.materialVariant.setSkinning(hasSkinningOrMorphing);
cmdDepth.primitive.rasterState.inverseFrontFaces = inverseFrontFaces;

cmdDepth.primitive.skinningHandle = skinning.handle;
cmdDepth.primitive.skinningOffset = skinning.offset;
cmdDepth.primitive.skinningTexture = skinning.handleSampler;
cmdDepth.primitive.morphWeightBuffer = morphing.handle;
cmdDepth.primitive.hasMorphing = (bool)morphing.handle;
cmdDepth.primitive.hasSkinning = (bool)skinning.handle;

if (UTILS_UNLIKELY(hasInstancedStereo)) {
cmdColor.primitive.instanceCount =
Expand All @@ -631,11 +633,8 @@ RenderPass::Command* RenderPass::generateCommandsImpl(RenderPass::CommandTypeFla
}
if constexpr (isColorPass) {
renderableVariant.setFog(soaVisibility[i].fog && Variant::isFogVariant(variant));

cmdColor.primitive.skinningHandle = skinning.handle;
cmdColor.primitive.skinningOffset = skinning.offset;
cmdColor.primitive.skinningTexture = skinning.handleSampler;
cmdColor.primitive.morphWeightBuffer = morphing.handle;
cmdColor.primitive.hasMorphing = (bool)morphing.handle;
cmdColor.primitive.hasSkinning = (bool)skinning.handle;
}

const bool shadowCaster = soaVisibility[i].castShadows & hasShadowing;
Expand All @@ -656,7 +655,13 @@ RenderPass::Command* RenderPass::generateCommandsImpl(RenderPass::CommandTypeFla
// skinning or morphing.

if constexpr (isColorPass) {
cmdColor.primitive.primitive = &primitive;
cmdColor.primitive.mi = mi;
cmdColor.primitive.rph = primitive.getHwHandle();
cmdColor.primitive.vbih = primitive.getVertexBufferInfoHandle();
cmdColor.primitive.indexOffset = primitive.getIndexOffset();
cmdColor.primitive.indexCount = primitive.getIndexCount();
cmdColor.primitive.type = primitive.getPrimitiveType();

RenderPass::setupColorCommand(cmdColor, renderableVariant, mi, inverseFrontFaces);

cmdColor.primitive.morphTargetBuffer = morphTargets.buffer->getHwHandle();
Expand Down Expand Up @@ -759,7 +764,13 @@ RenderPass::Command* RenderPass::generateCommandsImpl(RenderPass::CommandTypeFla
cmdDepth.key |= mi->getSortingKey(); // already all set-up for direct or'ing

// unconditionally write the command
cmdDepth.primitive.primitive = &primitive;
cmdDepth.primitive.mi = mi;
cmdDepth.primitive.rph = primitive.getHwHandle();
cmdDepth.primitive.vbih = primitive.getVertexBufferInfoHandle();
cmdDepth.primitive.indexOffset = primitive.getIndexOffset();
cmdDepth.primitive.indexCount = primitive.getIndexCount();
cmdDepth.primitive.type = primitive.getPrimitiveType();

cmdDepth.primitive.rasterState.culling = mi->getCullingMode();
cmdDepth.primitive.morphTargetBuffer = morphTargets.buffer->getHwHandle();

Expand Down Expand Up @@ -915,20 +926,20 @@ void RenderPass::Executor::execute(FEngine& engine,
}

// primitiveHandle may be invalid if no geometry was set on the renderable.
if (UTILS_UNLIKELY(!first->primitive.primitive->getHwHandle())) {
if (UTILS_UNLIKELY(!first->primitive.rph)) {
continue;
}

// per-renderable uniform
PrimitiveInfo const info = first->primitive;
pipeline.rasterState = info.rasterState;
pipeline.vertexBufferInfo = info.primitive->getVertexBufferInfoHandle();
pipeline.primitiveType = info.primitive->getPrimitiveType();
pipeline.vertexBufferInfo = info.vbih;
pipeline.primitiveType = info.type;
assert_invariant(pipeline.vertexBufferInfo);

if (UTILS_UNLIKELY(mi != info.primitive->getMaterialInstance())) {
if (UTILS_UNLIKELY(mi != info.mi)) {
// this is always taken the first time
mi = info.primitive->getMaterialInstance();
mi = info.mi;
assert_invariant(mi);

ma = mi->getMaterial();
Expand Down Expand Up @@ -960,9 +971,10 @@ void RenderPass::Executor::execute(FEngine& engine,
info.instanceCount & PrimitiveInfo::INSTANCE_COUNT_MASK;
auto getPerObjectUboHandle =
[this, &info, &instanceCount]() -> std::pair<Handle<backend::HwBufferObject>, uint32_t> {
if (info.instanceBufferHandle) {
if (info.hasHybridInstancing) {
FScene::RenderableSoa const& soa = *mRenderableSoa;
// "hybrid" instancing -- instanceBufferHandle takes the place of the UBO
return { info.instanceBufferHandle, 0 };
return { soa.elementAt<FScene::INSTANCES>(info.index).handle, 0 };
}
bool const userInstancing =
(info.instanceCount & PrimitiveInfo::USER_INSTANCE_MASK) != 0u;
Expand All @@ -987,16 +999,22 @@ void RenderPass::Executor::execute(FEngine& engine,
offset,
sizeof(PerRenderableUib));

if (UTILS_UNLIKELY(info.skinningHandle)) {
if (UTILS_UNLIKELY(info.hasSkinning)) {

FScene::RenderableSoa const& soa = *mRenderableSoa;

const FRenderableManager::SkinningBindingInfo& skinning =
soa.elementAt<FScene::SKINNING_BUFFER>(info.index);

// note: we can't bind less than sizeof(PerRenderableBoneUib) due to glsl limitations
driver.bindBufferRange(BufferObjectBinding::UNIFORM,
+UniformBindingPoints::PER_RENDERABLE_BONES,
info.skinningHandle,
info.skinningOffset * sizeof(PerRenderableBoneUib::BoneData),
skinning.handle,
skinning.offset * sizeof(PerRenderableBoneUib::BoneData),
sizeof(PerRenderableBoneUib));
// note: always bind the skinningTexture because the shader needs it.
driver.bindSamplers(+SamplerBindingPoints::PER_RENDERABLE_SKINNING,
info.skinningTexture);
skinning.handleSampler);
// note: even if only skinning is enabled, binding morphTargetBuffer is needed.
driver.bindSamplers(+SamplerBindingPoints::PER_RENDERABLE_MORPHING,
info.morphTargetBuffer);
Expand All @@ -1006,16 +1024,25 @@ void RenderPass::Executor::execute(FEngine& engine,
rebindPipeline = true;
}

if (UTILS_UNLIKELY(info.morphWeightBuffer)) {
if (UTILS_UNLIKELY(info.hasMorphing)) {

FScene::RenderableSoa const& soa = *mRenderableSoa;

const FRenderableManager::SkinningBindingInfo& skinning =
soa.elementAt<FScene::SKINNING_BUFFER>(info.index);

const FRenderableManager::MorphingBindingInfo& morphing =
soa.elementAt<FScene::MORPHING_BUFFER>(info.index);

// Instead of using a UBO per primitive, we could also have a single UBO for all
// primitives and use bindUniformBufferRange which might be more efficient.
driver.bindUniformBuffer(+UniformBindingPoints::PER_RENDERABLE_MORPHING,
info.morphWeightBuffer);
morphing.handle);
driver.bindSamplers(+SamplerBindingPoints::PER_RENDERABLE_MORPHING,
info.morphTargetBuffer);
// note: even if only morphing is enabled, binding skinningTexture is needed.
driver.bindSamplers(+SamplerBindingPoints::PER_RENDERABLE_SKINNING,
info.skinningTexture);
skinning.handleSampler);

// FIXME: Currently we need to rebind the PipelineState when texture or
// UBO binding change.
Expand All @@ -1029,15 +1056,12 @@ void RenderPass::Executor::execute(FEngine& engine,
driver.bindPipeline(pipeline);
}

if (info.primitive->getHwHandle() != currentPrimitiveHandle) {
currentPrimitiveHandle = info.primitive->getHwHandle();
driver.bindRenderPrimitive(info.primitive->getHwHandle());
if (info.rph != currentPrimitiveHandle) {
currentPrimitiveHandle = info.rph;
driver.bindRenderPrimitive(info.rph);
}

driver.draw2(
info.primitive->getIndexOffset(),
info.primitive->getIndexCount(),
instanceCount);
driver.draw2(info.indexOffset, info.indexCount, instanceCount);
}
}

Expand All @@ -1057,7 +1081,8 @@ void RenderPass::Executor::execute(FEngine& engine,
// ------------------------------------------------------------------------------------------------

RenderPass::Executor::Executor(RenderPass const* pass, Command const* b, Command const* e) noexcept
: mCommands(b, e),
: mRenderableSoa(&pass->mRenderableSoa),
mCommands(b, e),
mCustomCommands(pass->mCustomCommands.data(), pass->mCustomCommands.size()),
mUboHandle(pass->mUboHandle),
mInstancedUboHandle(pass->mInstancedUboHandle),
Expand Down
31 changes: 18 additions & 13 deletions filament/src/RenderPass.h
Original file line number Diff line number Diff line change
Expand Up @@ -238,22 +238,26 @@ class RenderPass {

struct PrimitiveInfo { // 56 bytes
union {
FRenderPrimitive const* primitive; // 8 bytes;
uint64_t padding = {}; // ensures primitive is 8 bytes on all archs
}; // 8 bytes
uint64_t rfu0; // 8 bytes
backend::RasterState rasterState; // 4 bytes
backend::Handle<backend::HwBufferObject> skinningHandle; // 4 bytes
backend::Handle<backend::HwSamplerGroup> skinningTexture; // 4 bytes
backend::Handle<backend::HwBufferObject> morphWeightBuffer; // 4 bytes
backend::Handle<backend::HwSamplerGroup> morphTargetBuffer; // 4 bytes
backend::Handle<backend::HwBufferObject> instanceBufferHandle; // 4 bytes
FMaterialInstance const* mi;
uint64_t padding; // make this field 64 bits on all platforms
};
backend::RenderPrimitiveHandle rph; // 4 bytes
backend::VertexBufferInfoHandle vbih; // 4 bytes
uint32_t indexOffset; // 4 bytes
uint32_t indexCount; // 4 bytes
uint32_t index = 0; // 4 bytes
uint32_t skinningOffset = 0; // 4 bytes
backend::SamplerGroupHandle morphTargetBuffer; // 4 bytes

backend::RasterState rasterState; // 4 bytes

uint16_t instanceCount; // 2 bytes [MSb: user]
Variant materialVariant; // 1 byte
uint8_t rfu1; // 1 byte
uint32_t rfu2; // 4 byte
backend::PrimitiveType type : 3; // 1 byte 3 bits
bool hasSkinning : 1; // 1 bit
bool hasMorphing : 1; // 1 bit
bool hasHybridInstancing : 1; // 1 bit

uint64_t rfu[2]; // 16 bytes

static const uint16_t USER_INSTANCE_MASK = 0x8000u;
static const uint16_t INSTANCE_COUNT_MASK = 0x7fffu;
Expand Down Expand Up @@ -316,6 +320,7 @@ class RenderPass {
friend class RenderPassBuilder;

// these fields are constant after creation
FScene::RenderableSoa const* mRenderableSoa = nullptr;
utils::Slice<Command> mCommands;
utils::Slice<CustomCommandFn> mCustomCommands;
backend::Handle<backend::HwBufferObject> mUboHandle;
Expand Down

0 comments on commit e4442a5

Please sign in to comment.