Skip to content

Commit

Permalink
Merge pull request #18339 from hrydgard/delete-vertex-cache
Browse files Browse the repository at this point in the history
Remove the vertex cache option
  • Loading branch information
hrydgard committed Oct 11, 2023
2 parents 7d43a49 + 2ac14f5 commit bdff933
Show file tree
Hide file tree
Showing 67 changed files with 25 additions and 1,397 deletions.
121 changes: 0 additions & 121 deletions Common/GPU/Vulkan/VulkanMemory.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -35,127 +35,6 @@ using namespace PPSSPP_VK;
// Always keep around push buffers at least this long (seconds).
static const double PUSH_GARBAGE_COLLECTION_DELAY = 10.0;

VulkanPushBuffer::VulkanPushBuffer(VulkanContext *vulkan, const char *name, size_t size, VkBufferUsageFlags usage)
: vulkan_(vulkan), name_(name), size_(size), usage_(usage) {
RegisterGPUMemoryManager(this);
bool res = AddBuffer();
_assert_(res);
}

VulkanPushBuffer::~VulkanPushBuffer() {
UnregisterGPUMemoryManager(this);
_dbg_assert_(!writePtr_);
_assert_(buffers_.empty());
}

bool VulkanPushBuffer::AddBuffer() {
BufInfo info;
VkDevice device = vulkan_->GetDevice();

VkBufferCreateInfo b{ VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO };
b.size = size_;
b.flags = 0;
b.usage = usage_;
b.sharingMode = VK_SHARING_MODE_EXCLUSIVE;
b.queueFamilyIndexCount = 0;
b.pQueueFamilyIndices = nullptr;

VmaAllocationCreateInfo allocCreateInfo{};
allocCreateInfo.usage = VMA_MEMORY_USAGE_CPU_TO_GPU;
VmaAllocationInfo allocInfo{};

VkResult res = vmaCreateBuffer(vulkan_->Allocator(), &b, &allocCreateInfo, &info.buffer, &info.allocation, &allocInfo);
if (VK_SUCCESS != res) {
_assert_msg_(false, "vkCreateBuffer failed! result=%d", (int)res);
return false;
}

vulkan_->SetDebugName(info.buffer, VK_OBJECT_TYPE_BUFFER, name_);

buffers_.push_back(info);
buf_ = buffers_.size() - 1;
return true;
}

void VulkanPushBuffer::Destroy(VulkanContext *vulkan) {
_dbg_assert_(!writePtr_);
for (BufInfo &info : buffers_) {
vulkan->Delete().QueueDeleteBufferAllocation(info.buffer, info.allocation);
}
buffers_.clear();
}

void VulkanPushBuffer::NextBuffer(size_t minSize) {
// First, unmap the current memory.
Unmap();

buf_++;
if (buf_ >= buffers_.size() || minSize > size_) {
// Before creating the buffer, adjust to the new size_ if necessary.
while (size_ < minSize) {
size_ <<= 1;
}

bool res = AddBuffer();
_assert_(res);
if (!res) {
// Let's try not to crash at least?
buf_ = 0;
}
}

// Now, move to the next buffer and map it.
offset_ = 0;
Map();
}

void VulkanPushBuffer::Defragment(VulkanContext *vulkan) {
if (buffers_.size() <= 1) {
return;
}

// Okay, we have more than one. Destroy them all and start over with a larger one.
size_t newSize = size_ * buffers_.size();
Destroy(vulkan);

size_ = newSize;
bool res = AddBuffer();
_assert_(res);
}

size_t VulkanPushBuffer::GetTotalSize() const {
size_t sum = 0;
if (buffers_.size() > 1)
sum += size_ * (buffers_.size() - 1);
sum += offset_;
return sum;
}

void VulkanPushBuffer::GetDebugString(char *buffer, size_t bufSize) const {
size_t sum = 0;
if (buffers_.size() > 1)
sum += size_ * (buffers_.size() - 1);
sum += offset_;
size_t capacity = size_ * buffers_.size();
snprintf(buffer, bufSize, "Push %s: %s / %s", name_, NiceSizeFormat(sum).c_str(), NiceSizeFormat(capacity).c_str());
}

void VulkanPushBuffer::Map() {
_dbg_assert_(!writePtr_);
VkResult res = vmaMapMemory(vulkan_->Allocator(), buffers_[buf_].allocation, (void **)(&writePtr_));
_dbg_assert_(writePtr_);
_assert_(VK_SUCCESS == res);
}

void VulkanPushBuffer::Unmap() {
_dbg_assert_msg_(writePtr_ != nullptr, "VulkanPushBuffer::Unmap: writePtr_ null here means we have a bug (map/unmap mismatch)");
if (!writePtr_)
return;

vmaUnmapMemory(vulkan_->Allocator(), buffers_[buf_].allocation);
writePtr_ = nullptr;
}

VulkanPushPool::VulkanPushPool(VulkanContext *vulkan, const char *name, size_t originalBlockSize, VkBufferUsageFlags usage)
: vulkan_(vulkan), name_(name), originalBlockSize_(originalBlockSize), usage_(usage) {
RegisterGPUMemoryManager(this);
Expand Down
82 changes: 0 additions & 82 deletions Common/GPU/Vulkan/VulkanMemory.h
Original file line number Diff line number Diff line change
Expand Up @@ -16,88 +16,6 @@ VK_DEFINE_HANDLE(VmaAllocation);
//
// Vulkan memory management utils.

// VulkanPushBuffer
// Simple incrementing allocator.
// Use these to push vertex, index and uniform data. Generally you'll have two or three of these
// and alternate on each frame. Make sure not to reset until the fence from the last time you used it
// has completed.
// NOTE: This has now been replaced with VulkanPushPool for all uses except the vertex cache.
class VulkanPushBuffer : public GPUMemoryManager {
struct BufInfo {
VkBuffer buffer;
VmaAllocation allocation;
};

public:
// NOTE: If you create a push buffer with PushBufferType::GPU_ONLY,
// then you can't use any of the push functions as pointers will not be reachable from the CPU.
// You must in this case use Allocate() only, and pass the returned offset and the VkBuffer to Vulkan APIs.
VulkanPushBuffer(VulkanContext *vulkan, const char *name, size_t size, VkBufferUsageFlags usage);
~VulkanPushBuffer();

void Destroy(VulkanContext *vulkan);

void Reset() { offset_ = 0; }

void GetDebugString(char *buffer, size_t bufSize) const override;
const char *Name() const override {
return name_;
}

// Needs context in case of defragment.
void Begin(VulkanContext *vulkan) {
buf_ = 0;
offset_ = 0;
// Note: we must defrag because some buffers may be smaller than size_.
Defragment(vulkan);
Map();
}

void BeginNoReset() { Map(); }
void End() { Unmap(); }

void Map();
void Unmap();

// When using the returned memory, make sure to bind the returned vkbuf.
uint8_t *Allocate(VkDeviceSize numBytes, VkDeviceSize alignment, VkBuffer *vkbuf, uint32_t *bindOffset) {
size_t offset = (offset_ + alignment - 1) & ~(alignment - 1);
if (offset + numBytes > size_) {
NextBuffer(numBytes);
offset = offset_;
}
offset_ = offset + numBytes;
*bindOffset = (uint32_t)offset;
*vkbuf = buffers_[buf_].buffer;
return writePtr_ + offset;
}

VkDeviceSize Push(const void *data, VkDeviceSize numBytes, int alignment, VkBuffer *vkbuf) {
uint32_t bindOffset;
uint8_t *ptr = Allocate(numBytes, alignment, vkbuf, &bindOffset);
memcpy(ptr, data, numBytes);
return bindOffset;
}

size_t GetOffset() const { return offset_; }
size_t GetTotalSize() const;

private:
bool AddBuffer();
void NextBuffer(size_t minSize);
void Defragment(VulkanContext *vulkan);

VulkanContext *vulkan_;

std::vector<BufInfo> buffers_;
size_t buf_ = 0;
size_t offset_ = 0;
size_t size_ = 0;
uint8_t *writePtr_ = nullptr;
VkBufferUsageFlags usage_;
const char *name_;
};

// Simple memory pushbuffer pool that can share blocks between the "frames", to reduce the impact of push memory spikes -
// a later frame can gobble up redundant buffers from an earlier frame even if they don't share frame index.
// NOT thread safe! Can only be used from one thread (our main thread).
Expand Down
1 change: 0 additions & 1 deletion Core/Config.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -590,7 +590,6 @@ static const ConfigSetting graphicsSettings[] = {
ConfigSetting("AnisotropyLevel", &g_Config.iAnisotropyLevel, 4, CfgFlag::PER_GAME),
ConfigSetting("MultiSampleLevel", &g_Config.iMultiSampleLevel, 0, CfgFlag::PER_GAME), // Number of samples is 1 << iMultiSampleLevel

ConfigSetting("VertexDecCache", &g_Config.bVertexCache, false, CfgFlag::PER_GAME | CfgFlag::REPORT),
ConfigSetting("TextureBackoffCache", &g_Config.bTextureBackoffCache, false, CfgFlag::PER_GAME | CfgFlag::REPORT),
ConfigSetting("VertexDecJit", &g_Config.bVertexDecoderJit, &DefaultCodeGen, CfgFlag::DONT_SAVE | CfgFlag::REPORT),

Expand Down
1 change: 0 additions & 1 deletion Core/Config.h
Original file line number Diff line number Diff line change
Expand Up @@ -176,7 +176,6 @@ struct Config {
float fUITint;
float fUISaturation;

bool bVertexCache;
bool bTextureBackoffCache;
bool bVertexDecoderJit;
bool bFullScreen;
Expand Down
100 changes: 0 additions & 100 deletions GPU/Common/DrawEngineCommon.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -575,80 +575,6 @@ void DrawEngineCommon::ApplyFramebufferRead(FBOTexState *fboTexState) {
gstate_c.Dirty(DIRTY_SHADERBLEND);
}

inline u32 ComputeMiniHashRange(const void *ptr, size_t sz) {
// Switch to u32 units, and round up to avoid unaligned accesses.
// Probably doesn't matter if we skip the first few bytes in some cases.
const u32 *p = (const u32 *)(((uintptr_t)ptr + 3) & ~3);
sz >>= 2;

if (sz > 100) {
size_t step = sz / 4;
u32 hash = 0;
for (size_t i = 0; i < sz; i += step) {
hash += XXH3_64bits(p + i, 100);
}
return hash;
} else {
return p[0] + p[sz - 1];
}
}

u32 DrawEngineCommon::ComputeMiniHash() {
u32 fullhash = 0;
const int vertexSize = dec_->GetDecVtxFmt().stride;
const int indexSize = IndexSize(dec_->VertexType());

int step;
if (numDrawVerts_ < 3) {
step = 1;
} else if (numDrawVerts_ < 8) {
step = 4;
} else {
step = numDrawVerts_ / 8;
}
for (int i = 0; i < numDrawVerts_; i += step) {
const DeferredVerts &dc = drawVerts_[i];
fullhash += ComputeMiniHashRange((const u8 *)dc.verts + vertexSize * dc.indexLowerBound, vertexSize * (dc.indexUpperBound - dc.indexLowerBound));
}
for (int i = 0; i < numDrawInds_; i += step) {
const DeferredInds &di = drawInds_[i];
if (di.indexType != 0) {
fullhash += ComputeMiniHashRange(di.inds, indexSize * di.vertexCount);
}
}

return fullhash;
}

// Cheap bit scrambler from https://nullprogram.com/blog/2018/07/31/
inline uint32_t lowbias32_r(uint32_t x) {
x ^= x >> 16;
x *= 0x43021123U;
x ^= x >> 15 ^ x >> 30;
x *= 0x1d69e2a5U;
x ^= x >> 16;
return x;
}

uint32_t DrawEngineCommon::ComputeDrawcallsHash() const {
uint32_t dcid = 0;
for (int i = 0; i < numDrawVerts_; i++) {
u32 dhash = dcid;
dhash = __rotl(dhash ^ (u32)(uintptr_t)drawVerts_[i].verts, 13);
dhash = __rotl(dhash ^ (u32)drawInds_[i].vertexCount, 11);
dcid = lowbias32_r(dhash ^ (u32)drawInds_[i].prim);
}
for (int i = 0; i < numDrawInds_; i++) {
const DeferredInds &di = drawInds_[i];
u32 dhash = dcid;
if (di.indexType) {
dhash = __rotl(dhash ^ (u32)(uintptr_t)di.inds, 19);
dcid = lowbias32_r(__rotl(dhash ^ (u32)di.indexType, 7));
}
}
return dcid;
}

int DrawEngineCommon::ComputeNumVertsToDecode() const {
int sum = 0;
for (int i = 0; i < numDrawVerts_; i++) {
Expand All @@ -657,32 +583,6 @@ int DrawEngineCommon::ComputeNumVertsToDecode() const {
return sum;
}

uint64_t DrawEngineCommon::ComputeHash() {
uint64_t fullhash = 0;
const int vertexSize = dec_->GetDecVtxFmt().stride;

// TODO: Add some caps both for numDrawCalls_ and num verts to check?
// It is really very expensive to check all the vertex data so often.
for (int i = 0; i < numDrawVerts_; i++) {
const DeferredVerts &dv = drawVerts_[i];
int indexLowerBound = dv.indexLowerBound, indexUpperBound = dv.indexUpperBound;
fullhash += XXH3_64bits((const char *)dv.verts + vertexSize * indexLowerBound, vertexSize * (indexUpperBound - indexLowerBound));
}

for (int i = 0; i < numDrawInds_; i++) {
const DeferredInds &di = drawInds_[i];
if (di.indexType != 0) {
int indexSize = IndexSize(di.indexType << GE_VTYPE_IDX_SHIFT);
// Hm, we will miss some indices when combining above, but meh, it should be fine.
fullhash += XXH3_64bits((const char *)di.inds, indexSize * di.vertexCount);
}
}

// this looks utterly broken??
// fullhash += XXH3_64bits(&drawCalls_[0].uvScale, sizeof(drawCalls_[0].uvScale) * numDrawCalls_);
return fullhash;
}

int DrawEngineCommon::ExtendNonIndexedPrim(const uint32_t *cmd, const uint32_t *stall, u32 vertTypeID, bool clockwise, int *bytesRead, bool isTriangle) {
const uint32_t *start = cmd;
int prevDrawVerts = numDrawVerts_ - 1;
Expand Down
5 changes: 0 additions & 5 deletions GPU/Common/DrawEngineCommon.h
Original file line number Diff line number Diff line change
Expand Up @@ -159,10 +159,6 @@ class DrawEngineCommon {
// Preprocessing for spline/bezier
u32 NormalizeVertices(u8 *outPtr, u8 *bufPtr, const u8 *inPtr, int lowerBound, int upperBound, u32 vertType, int *vertexSize = nullptr);

// Utility for vertex caching
u32 ComputeMiniHash();
uint64_t ComputeHash();

int ComputeNumVertsToDecode() const;

void ApplyFramebufferRead(FBOTexState *fboTexState);
Expand Down Expand Up @@ -271,7 +267,6 @@ class DrawEngineCommon {
int numDrawInds_ = 0;
int vertexCountInDrawCalls_ = 0;

int decimationCounter_ = 0;
int decodeVertsCounter_ = 0;
int decodeIndsCounter_ = 0;

Expand Down
Loading

0 comments on commit bdff933

Please sign in to comment.