Skip to content

Commit

Permalink
Merge pull request #17565 from hrydgard/breakout-vcache-vulkan
Browse files Browse the repository at this point in the history
Vulkan: Breakout the vertex cache logic from DoFlush()
  • Loading branch information
hrydgard committed Jun 13, 2023
2 parents 963ca50 + f5516d3 commit 22632b8
Show file tree
Hide file tree
Showing 8 changed files with 195 additions and 178 deletions.
2 changes: 2 additions & 0 deletions Common/GPU/Vulkan/VulkanMemory.h
Expand Up @@ -135,6 +135,8 @@ class VulkanPushPool : public GPUMemoryManager {
return blocks_[curBlockIndex_].writePtr;
}

// NOTE: If you can avoid this by writing the data directly into memory returned from Allocate,
// do so. Savings from avoiding memcpy can be significant.
VkDeviceSize Push(const void *data, VkDeviceSize numBytes, int alignment, VkBuffer *vkbuf) {
uint32_t bindOffset;
uint8_t *ptr = Allocate(numBytes, alignment, vkbuf, &bindOffset);
Expand Down
51 changes: 28 additions & 23 deletions GPU/Common/DrawEngineCommon.cpp
Expand Up @@ -74,17 +74,18 @@ VertexDecoder *DrawEngineCommon::GetVertexDecoder(u32 vtype) {

int DrawEngineCommon::ComputeNumVertsToDecode() const {
int vertsToDecode = 0;
int numDrawCalls = numDrawCalls_;
if (drawCalls_[0].indexType == GE_VTYPE_IDX_NONE >> GE_VTYPE_IDX_SHIFT) {
for (int i = 0; i < numDrawCalls_; i++) {
for (int i = 0; i < numDrawCalls; i++) {
const DeferredDrawCall &dc = drawCalls_[i];
vertsToDecode += dc.vertexCount;
}
} else {
// TODO: Share this computation with DecodeVertsStep?
for (int i = 0; i < numDrawCalls_; i++) {
for (int i = 0; i < numDrawCalls; i++) {
const DeferredDrawCall &dc = drawCalls_[i];
int lastMatch = i;
const int total = numDrawCalls_;
const int total = numDrawCalls;
int indexLowerBound = dc.indexLowerBound;
int indexUpperBound = dc.indexUpperBound;
for (int j = i + 1; j < total; ++j) {
Expand Down Expand Up @@ -642,7 +643,7 @@ void DrawEngineCommon::DecodeVertsStep(u8 *dest, int &i, int &decodedVerts, cons
for (int j = i + 1; j < total; ++j) {
if (drawCalls_[j].verts != dc.verts)
break;

// TODO: What if UV scale/offset changes between drawcalls here?
indexLowerBound = std::min(indexLowerBound, (int)drawCalls_[j].indexLowerBound);
indexUpperBound = std::max(indexUpperBound, (int)drawCalls_[j].indexUpperBound);
lastMatch = j;
Expand Down Expand Up @@ -779,16 +780,6 @@ uint64_t DrawEngineCommon::ComputeHash() {
return fullhash;
}

// Cheap bit scrambler from https://nullprogram.com/blog/2018/07/31/
inline uint32_t lowbias32_r(uint32_t x) {
x ^= x >> 16;
x *= 0x43021123U;
x ^= x >> 15 ^ x >> 30;
x *= 0x1d69e2a5U;
x ^= x >> 16;
return x;
}

// vertTypeID is the vertex type but with the UVGen mode smashed into the top bits.
void DrawEngineCommon::SubmitPrim(const void *verts, const void *inds, GEPrimitiveType prim, int vertexCount, u32 vertTypeID, int cullMode, int *bytesRead) {
if (!indexGen.PrimCompatible(prevPrim_, prim) || numDrawCalls_ >= MAX_DEFERRED_DRAW_CALLS || vertexCountInDrawCalls_ + vertexCount > VERTEX_BUFFER_MAX) {
Expand Down Expand Up @@ -818,15 +809,6 @@ void DrawEngineCommon::SubmitPrim(const void *verts, const void *inds, GEPrimiti
if ((vertexCount < 2 && prim > 0) || (vertexCount < 3 && prim > GE_PRIM_LINE_STRIP && prim != GE_PRIM_RECTANGLES))
return;

if (g_Config.bVertexCache) {
u32 dhash = dcid_;
dhash = __rotl(dhash ^ (u32)(uintptr_t)verts, 13);
dhash = __rotl(dhash ^ (u32)(uintptr_t)inds, 19);
dhash = __rotl(dhash ^ (u32)vertTypeID, 7);
dhash = __rotl(dhash ^ (u32)vertexCount, 11);
dcid_ = lowbias32_r(dhash ^ (u32)prim);
}

DeferredDrawCall &dc = drawCalls_[numDrawCalls_];
dc.verts = verts;
dc.inds = inds;
Expand Down Expand Up @@ -870,6 +852,29 @@ bool DrawEngineCommon::CanUseHardwareTessellation(GEPatchPrimType prim) {
return false;
}

// Cheap bit scrambler from https://nullprogram.com/blog/2018/07/31/
inline uint32_t lowbias32_r(uint32_t x) {
x ^= x >> 16;
x *= 0x43021123U;
x ^= x >> 15 ^ x >> 30;
x *= 0x1d69e2a5U;
x ^= x >> 16;
return x;
}

uint32_t DrawEngineCommon::ComputeDrawcallsHash() const {
uint32_t dcid = 0;
for (int i = 0; i < numDrawCalls_; i++) {
u32 dhash = dcid;
dhash = __rotl(dhash ^ (u32)(uintptr_t)drawCalls_[i].verts, 13);
dhash = __rotl(dhash ^ (u32)(uintptr_t)drawCalls_[i].inds, 19);
dhash = __rotl(dhash ^ (u32)drawCalls_[i].indexType, 7);
dhash = __rotl(dhash ^ (u32)drawCalls_[i].vertexCount, 11);
dcid = lowbias32_r(dhash ^ (u32)drawCalls_[i].prim);
}
return dcid;
}

void TessellationDataTransfer::CopyControlPoints(float *pos, float *tex, float *col, int posStride, int texStride, int colStride, const SimpleVertex *const *points, int size, u32 vertType) {
bool hasColor = (vertType & GE_VTYPE_COL_MASK) != 0;
bool hasTexCoord = (vertType & GE_VTYPE_TC_MASK) != 0;
Expand Down
3 changes: 2 additions & 1 deletion GPU/Common/DrawEngineCommon.h
Expand Up @@ -175,6 +175,8 @@ class DrawEngineCommon {
}
}

uint32_t ComputeDrawcallsHash() const;

bool useHWTransform_ = false;
bool useHWTessellation_ = false;
// Used to prevent unnecessary flushing in softgpu.
Expand Down Expand Up @@ -218,7 +220,6 @@ class DrawEngineCommon {

int decimationCounter_ = 0;
int decodeCounter_ = 0;
u32 dcid_ = 0;

// Vertex collector state
IndexGenerator indexGen;
Expand Down
8 changes: 4 additions & 4 deletions GPU/D3D11/DrawEngineD3D11.cpp
Expand Up @@ -363,12 +363,13 @@ void DrawEngineD3D11::DoFlush() {
useCache = false;

if (useCache) {
u32 id = dcid_ ^ gstate.getUVGenMode(); // This can have an effect on which UV decoder we need to use! And hence what the decoded data will look like. See #9263
// getUVGenMode can have an effect on which UV decoder we need to use! And hence what the decoded data will look like. See #9263
u32 dcid = (u32)XXH3_64bits(&drawCalls_, sizeof(DeferredDrawCall) * numDrawCalls_) ^ gstate.getUVGenMode();

VertexArrayInfoD3D11 *vai = vai_.Get(id);
VertexArrayInfoD3D11 *vai = vai_.Get(dcid);
if (!vai) {
vai = new VertexArrayInfoD3D11();
vai_.Insert(id, vai);
vai_.Insert(dcid, vai);
}

switch (vai->status) {
Expand Down Expand Up @@ -724,7 +725,6 @@ void DrawEngineD3D11::DoFlush() {
numDrawCalls_ = 0;
vertexCountInDrawCalls_ = 0;
decodeCounter_ = 0;
dcid_ = 0;
gstate_c.vertexFullAlpha = true;
framebufferManager_->SetColorUpdated(gstate_c.skipDrawReason);

Expand Down
8 changes: 4 additions & 4 deletions GPU/Directx9/DrawEngineDX9.cpp
Expand Up @@ -345,11 +345,12 @@ void DrawEngineDX9::DoFlush() {
useCache = false;

if (useCache) {
u32 id = dcid_ ^ gstate.getUVGenMode(); // This can have an effect on which UV decoder we need to use! And hence what the decoded data will look like. See #9263
VertexArrayInfoDX9 *vai = vai_.Get(id);
// getUVGenMode can have an effect on which UV decoder we need to use! And hence what the decoded data will look like. See #9263
u32 dcid = (u32)XXH3_64bits(&drawCalls_, sizeof(DeferredDrawCall) * numDrawCalls_) ^ gstate.getUVGenMode();
VertexArrayInfoDX9 *vai = vai_.Get(dcid);
if (!vai) {
vai = new VertexArrayInfoDX9();
vai_.Insert(id, vai);
vai_.Insert(dcid, vai);
}

switch (vai->status) {
Expand Down Expand Up @@ -666,7 +667,6 @@ void DrawEngineDX9::DoFlush() {
numDrawCalls_ = 0;
vertexCountInDrawCalls_ = 0;
decodeCounter_ = 0;
dcid_ = 0;
gstate_c.vertexFullAlpha = true;
framebufferManager_->SetColorUpdated(gstate_c.skipDrawReason);

Expand Down
2 changes: 0 additions & 2 deletions GPU/GLES/DrawEngineGLES.cpp
Expand Up @@ -248,7 +248,6 @@ void DrawEngineGLES::DoFlush() {
numDrawCalls_ = 0;
vertexCountInDrawCalls_ = 0;
decodeCounter_ = 0;
dcid_ = 0;
return;
}

Expand Down Expand Up @@ -483,7 +482,6 @@ void DrawEngineGLES::DoFlush() {
numDrawCalls_ = 0;
vertexCountInDrawCalls_ = 0;
decodeCounter_ = 0;
dcid_ = 0;
gstate_c.vertexFullAlpha = true;
framebufferManager_->SetColorUpdated(gstate_c.skipDrawReason);

Expand Down

0 comments on commit 22632b8

Please sign in to comment.