Merge pull request #18339 from hrydgard/delete-vertex-cache

Remove the vertex cache option
hrydgard · Oct 11, 2023 · bdff933 · bdff933
2 parents 7d43a49 + 2ac14f5
commit bdff933
Show file tree

Hide file tree

Showing 67 changed files with 25 additions and 1,397 deletions.
diff --git a/Common/GPU/Vulkan/VulkanMemory.cpp b/Common/GPU/Vulkan/VulkanMemory.cpp
@@ -35,127 +35,6 @@ using namespace PPSSPP_VK;
 // Always keep around push buffers at least this long (seconds).
 static const double PUSH_GARBAGE_COLLECTION_DELAY = 10.0;
 
-VulkanPushBuffer::VulkanPushBuffer(VulkanContext *vulkan, const char *name, size_t size, VkBufferUsageFlags usage)
-		: vulkan_(vulkan), name_(name), size_(size), usage_(usage) {
-	RegisterGPUMemoryManager(this);
-	bool res = AddBuffer();
-	_assert_(res);
-}
-
-VulkanPushBuffer::~VulkanPushBuffer() {
-	UnregisterGPUMemoryManager(this);
-	_dbg_assert_(!writePtr_);
-	_assert_(buffers_.empty());
-}
-
-bool VulkanPushBuffer::AddBuffer() {
-	BufInfo info;
-	VkDevice device = vulkan_->GetDevice();
-
-	VkBufferCreateInfo b{ VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO };
-	b.size = size_;
-	b.flags = 0;
-	b.usage = usage_;
-	b.sharingMode = VK_SHARING_MODE_EXCLUSIVE;
-	b.queueFamilyIndexCount = 0;
-	b.pQueueFamilyIndices = nullptr;
-
-	VmaAllocationCreateInfo allocCreateInfo{};
-	allocCreateInfo.usage = VMA_MEMORY_USAGE_CPU_TO_GPU;
-	VmaAllocationInfo allocInfo{};
-
-	VkResult res = vmaCreateBuffer(vulkan_->Allocator(), &b, &allocCreateInfo, &info.buffer, &info.allocation, &allocInfo);
-	if (VK_SUCCESS != res) {
-		_assert_msg_(false, "vkCreateBuffer failed! result=%d", (int)res);
-		return false;
-	}
-
-	vulkan_->SetDebugName(info.buffer, VK_OBJECT_TYPE_BUFFER, name_);
-
-	buffers_.push_back(info);
-	buf_ = buffers_.size() - 1;
-	return true;
-}
-
-void VulkanPushBuffer::Destroy(VulkanContext *vulkan) {
-	_dbg_assert_(!writePtr_);
-	for (BufInfo &info : buffers_) {
-		vulkan->Delete().QueueDeleteBufferAllocation(info.buffer, info.allocation);
-	}
-	buffers_.clear();
-}
-
-void VulkanPushBuffer::NextBuffer(size_t minSize) {
-	// First, unmap the current memory.
-	Unmap();
-
-	buf_++;
-	if (buf_ >= buffers_.size() || minSize > size_) {
-		// Before creating the buffer, adjust to the new size_ if necessary.
-		while (size_ < minSize) {
-			size_ <<= 1;
-		}
-
-		bool res = AddBuffer();
-		_assert_(res);
-		if (!res) {
-			// Let's try not to crash at least?
-			buf_ = 0;
-		}
-	}
-
-	// Now, move to the next buffer and map it.
-	offset_ = 0;
-	Map();
-}
-
-void VulkanPushBuffer::Defragment(VulkanContext *vulkan) {
-	if (buffers_.size() <= 1) {
-		return;
-	}
-
-	// Okay, we have more than one.  Destroy them all and start over with a larger one.
-	size_t newSize = size_ * buffers_.size();
-	Destroy(vulkan);
-
-	size_ = newSize;
-	bool res = AddBuffer();
-	_assert_(res);
-}
-
-size_t VulkanPushBuffer::GetTotalSize() const {
-	size_t sum = 0;
-	if (buffers_.size() > 1)
-		sum += size_ * (buffers_.size() - 1);
-	sum += offset_;
-	return sum;
-}
-
-void VulkanPushBuffer::GetDebugString(char *buffer, size_t bufSize) const {
-	size_t sum = 0;
-	if (buffers_.size() > 1)
-		sum += size_ * (buffers_.size() - 1);
-	sum += offset_;
-	size_t capacity = size_ * buffers_.size();
-	snprintf(buffer, bufSize, "Push %s: %s / %s", name_, NiceSizeFormat(sum).c_str(), NiceSizeFormat(capacity).c_str());
-}
-
-void VulkanPushBuffer::Map() {
-	_dbg_assert_(!writePtr_);
-	VkResult res = vmaMapMemory(vulkan_->Allocator(), buffers_[buf_].allocation, (void **)(&writePtr_));
-	_dbg_assert_(writePtr_);
-	_assert_(VK_SUCCESS == res);
-}
-
-void VulkanPushBuffer::Unmap() {
-	_dbg_assert_msg_(writePtr_ != nullptr, "VulkanPushBuffer::Unmap: writePtr_ null here means we have a bug (map/unmap mismatch)");
-	if (!writePtr_)
-		return;
-
-	vmaUnmapMemory(vulkan_->Allocator(), buffers_[buf_].allocation);
-	writePtr_ = nullptr;
-}
-
 VulkanPushPool::VulkanPushPool(VulkanContext *vulkan, const char *name, size_t originalBlockSize, VkBufferUsageFlags usage)
 	: vulkan_(vulkan), name_(name), originalBlockSize_(originalBlockSize), usage_(usage) {
 	RegisterGPUMemoryManager(this);

diff --git a/Common/GPU/Vulkan/VulkanMemory.h b/Common/GPU/Vulkan/VulkanMemory.h
@@ -16,88 +16,6 @@ VK_DEFINE_HANDLE(VmaAllocation);
 //
 // Vulkan memory management utils.
 
-// VulkanPushBuffer
-// Simple incrementing allocator.
-// Use these to push vertex, index and uniform data. Generally you'll have two or three of these
-// and alternate on each frame. Make sure not to reset until the fence from the last time you used it
-// has completed.
-// NOTE: This has now been replaced with VulkanPushPool for all uses except the vertex cache.
-class VulkanPushBuffer : public GPUMemoryManager {
-	struct BufInfo {
-		VkBuffer buffer;
-		VmaAllocation allocation;
-	};
-
-public:
-	// NOTE: If you create a push buffer with PushBufferType::GPU_ONLY,
-	// then you can't use any of the push functions as pointers will not be reachable from the CPU.
-	// You must in this case use Allocate() only, and pass the returned offset and the VkBuffer to Vulkan APIs.
-	VulkanPushBuffer(VulkanContext *vulkan, const char *name, size_t size, VkBufferUsageFlags usage);
-	~VulkanPushBuffer();
-
-	void Destroy(VulkanContext *vulkan);
-
-	void Reset() { offset_ = 0; }
-
-	void GetDebugString(char *buffer, size_t bufSize) const override;
-	const char *Name() const override {
-		return name_;
-	}
-
-	// Needs context in case of defragment.
-	void Begin(VulkanContext *vulkan) {
-		buf_ = 0;
-		offset_ = 0;
-		// Note: we must defrag because some buffers may be smaller than size_.
-		Defragment(vulkan);
-		Map();
-	}
-
-	void BeginNoReset() { Map(); }
-	void End() { Unmap(); }
-
-	void Map();
-	void Unmap();
-
-	// When using the returned memory, make sure to bind the returned vkbuf.
-	uint8_t *Allocate(VkDeviceSize numBytes, VkDeviceSize alignment, VkBuffer *vkbuf, uint32_t *bindOffset) {
-		size_t offset = (offset_ + alignment - 1) & ~(alignment - 1);
-		if (offset + numBytes > size_) {
-			NextBuffer(numBytes);
-			offset = offset_;
-		}
-		offset_ = offset + numBytes;
-		*bindOffset = (uint32_t)offset;
-		*vkbuf = buffers_[buf_].buffer;
-		return writePtr_ + offset;
-	}
-
-	VkDeviceSize Push(const void *data, VkDeviceSize numBytes, int alignment, VkBuffer *vkbuf) {
-		uint32_t bindOffset;
-		uint8_t *ptr = Allocate(numBytes, alignment, vkbuf, &bindOffset);
-		memcpy(ptr, data, numBytes);
-		return bindOffset;
-	}
-
-	size_t GetOffset() const { return offset_; }
-	size_t GetTotalSize() const;
-
-private:
-	bool AddBuffer();
-	void NextBuffer(size_t minSize);
-	void Defragment(VulkanContext *vulkan);
-
-	VulkanContext *vulkan_;
-
-	std::vector<BufInfo> buffers_;
-	size_t buf_ = 0;
-	size_t offset_ = 0;
-	size_t size_ = 0;
-	uint8_t *writePtr_ = nullptr;
-	VkBufferUsageFlags usage_;
-	const char *name_;
-};
-
 // Simple memory pushbuffer pool that can share blocks between the "frames", to reduce the impact of push memory spikes -
 // a later frame can gobble up redundant buffers from an earlier frame even if they don't share frame index.
 // NOT thread safe! Can only be used from one thread (our main thread).

diff --git a/Core/Config.cpp b/Core/Config.cpp
@@ -590,7 +590,6 @@ static const ConfigSetting graphicsSettings[] = {
 	ConfigSetting("AnisotropyLevel", &g_Config.iAnisotropyLevel, 4, CfgFlag::PER_GAME),
 	ConfigSetting("MultiSampleLevel", &g_Config.iMultiSampleLevel, 0, CfgFlag::PER_GAME),  // Number of samples is 1 << iMultiSampleLevel
 
-	ConfigSetting("VertexDecCache", &g_Config.bVertexCache, false, CfgFlag::PER_GAME | CfgFlag::REPORT),
 	ConfigSetting("TextureBackoffCache", &g_Config.bTextureBackoffCache, false, CfgFlag::PER_GAME | CfgFlag::REPORT),
 	ConfigSetting("VertexDecJit", &g_Config.bVertexDecoderJit, &DefaultCodeGen, CfgFlag::DONT_SAVE | CfgFlag::REPORT),
 

diff --git a/Core/Config.h b/Core/Config.h
@@ -176,7 +176,6 @@ struct Config {
 	float fUITint;
 	float fUISaturation;
 
-	bool bVertexCache;
 	bool bTextureBackoffCache;
 	bool bVertexDecoderJit;
 	bool bFullScreen;

diff --git a/GPU/Common/DrawEngineCommon.cpp b/GPU/Common/DrawEngineCommon.cpp
@@ -575,80 +575,6 @@ void DrawEngineCommon::ApplyFramebufferRead(FBOTexState *fboTexState) {
 	gstate_c.Dirty(DIRTY_SHADERBLEND);
 }
 
-inline u32 ComputeMiniHashRange(const void *ptr, size_t sz) {
-	// Switch to u32 units, and round up to avoid unaligned accesses.
-	// Probably doesn't matter if we skip the first few bytes in some cases.
-	const u32 *p = (const u32 *)(((uintptr_t)ptr + 3) & ~3);
-	sz >>= 2;
-
-	if (sz > 100) {
-		size_t step = sz / 4;
-		u32 hash = 0;
-		for (size_t i = 0; i < sz; i += step) {
-			hash += XXH3_64bits(p + i, 100);
-		}
-		return hash;
-	} else {
-		return p[0] + p[sz - 1];
-	}
-}
-
-u32 DrawEngineCommon::ComputeMiniHash() {
-	u32 fullhash = 0;
-	const int vertexSize = dec_->GetDecVtxFmt().stride;
-	const int indexSize = IndexSize(dec_->VertexType());
-
-	int step;
-	if (numDrawVerts_ < 3) {
-		step = 1;
-	} else if (numDrawVerts_ < 8) {
-		step = 4;
-	} else {
-		step = numDrawVerts_ / 8;
-	}
-	for (int i = 0; i < numDrawVerts_; i += step) {
-		const DeferredVerts &dc = drawVerts_[i];
-		fullhash += ComputeMiniHashRange((const u8 *)dc.verts + vertexSize * dc.indexLowerBound, vertexSize * (dc.indexUpperBound - dc.indexLowerBound));
-	}
-	for (int i = 0; i < numDrawInds_; i += step) {
-		const DeferredInds &di = drawInds_[i];
-		if (di.indexType != 0) {
-			fullhash += ComputeMiniHashRange(di.inds, indexSize * di.vertexCount);
-		}
-	}
-
-	return fullhash;
-}
-
-// Cheap bit scrambler from https://nullprogram.com/blog/2018/07/31/
-inline uint32_t lowbias32_r(uint32_t x) {
-	x ^= x >> 16;
-	x *= 0x43021123U;
-	x ^= x >> 15 ^ x >> 30;
-	x *= 0x1d69e2a5U;
-	x ^= x >> 16;
-	return x;
-}
-
-uint32_t DrawEngineCommon::ComputeDrawcallsHash() const {
-	uint32_t dcid = 0;
-	for (int i = 0; i < numDrawVerts_; i++) {
-		u32 dhash = dcid;
-		dhash = __rotl(dhash ^ (u32)(uintptr_t)drawVerts_[i].verts, 13);
-		dhash = __rotl(dhash ^ (u32)drawInds_[i].vertexCount, 11);
-		dcid = lowbias32_r(dhash ^ (u32)drawInds_[i].prim);
-	}
-	for (int i = 0; i < numDrawInds_; i++) {
-		const DeferredInds &di = drawInds_[i];
-		u32 dhash = dcid;
-		if (di.indexType) {
-			dhash = __rotl(dhash ^ (u32)(uintptr_t)di.inds, 19);
-			dcid = lowbias32_r(__rotl(dhash ^ (u32)di.indexType, 7));
-		}
-	}
-	return dcid;
-}
-
 int DrawEngineCommon::ComputeNumVertsToDecode() const {
 	int sum = 0;
 	for (int i = 0; i < numDrawVerts_; i++) {
@@ -657,32 +583,6 @@ int DrawEngineCommon::ComputeNumVertsToDecode() const {
 	return sum;
 }
 
-uint64_t DrawEngineCommon::ComputeHash() {
-	uint64_t fullhash = 0;
-	const int vertexSize = dec_->GetDecVtxFmt().stride;
-
-	// TODO: Add some caps both for numDrawCalls_ and num verts to check?
-	// It is really very expensive to check all the vertex data so often.
-	for (int i = 0; i < numDrawVerts_; i++) {
-		const DeferredVerts &dv = drawVerts_[i];
-		int indexLowerBound = dv.indexLowerBound, indexUpperBound = dv.indexUpperBound;
-		fullhash += XXH3_64bits((const char *)dv.verts + vertexSize * indexLowerBound, vertexSize * (indexUpperBound - indexLowerBound));
-	}
-
-	for (int i = 0; i < numDrawInds_; i++) {
-		const DeferredInds &di = drawInds_[i];
-		if (di.indexType != 0) {
-			int indexSize = IndexSize(di.indexType << GE_VTYPE_IDX_SHIFT);
-			// Hm, we will miss some indices when combining above, but meh, it should be fine.
-			fullhash += XXH3_64bits((const char *)di.inds, indexSize * di.vertexCount);
-		}
-	}
-
-	// this looks utterly broken??
-	// fullhash += XXH3_64bits(&drawCalls_[0].uvScale, sizeof(drawCalls_[0].uvScale) * numDrawCalls_);
-	return fullhash;
-}
-
 int DrawEngineCommon::ExtendNonIndexedPrim(const uint32_t *cmd, const uint32_t *stall, u32 vertTypeID, bool clockwise, int *bytesRead, bool isTriangle) {
 	const uint32_t *start = cmd;
 	int prevDrawVerts = numDrawVerts_ - 1;

diff --git a/GPU/Common/DrawEngineCommon.h b/GPU/Common/DrawEngineCommon.h
@@ -159,10 +159,6 @@ class DrawEngineCommon {
 	// Preprocessing for spline/bezier
 	u32 NormalizeVertices(u8 *outPtr, u8 *bufPtr, const u8 *inPtr, int lowerBound, int upperBound, u32 vertType, int *vertexSize = nullptr);
 
-	// Utility for vertex caching
-	u32 ComputeMiniHash();
-	uint64_t ComputeHash();
-
 	int ComputeNumVertsToDecode() const;
 
 	void ApplyFramebufferRead(FBOTexState *fboTexState);
@@ -271,7 +267,6 @@ class DrawEngineCommon {
 	int numDrawInds_ = 0;
 	int vertexCountInDrawCalls_ = 0;
 
-	int decimationCounter_ = 0;
 	int decodeVertsCounter_ = 0;
 	int decodeIndsCounter_ = 0;