Skip to content

Commit

Permalink
Merge pull request #18129 from hrydgard/hashmap-semantics-fix
Browse files Browse the repository at this point in the history
Fix the semantics of DenseHashMap to be consistent even when inserting nulls
  • Loading branch information
hrydgard committed Sep 11, 2023
2 parents 4d58bb8 + d335393 commit b83e89a
Show file tree
Hide file tree
Showing 29 changed files with 223 additions and 162 deletions.
53 changes: 38 additions & 15 deletions Common/Data/Collections/Hashmaps.h
Expand Up @@ -29,31 +29,52 @@ enum class BucketState : uint8_t {
// we always use very small values, so it's probably better to have them in the same
// cache-line as the corresponding key.
// Enforces that value are pointers to make sure that combined storage makes sense.
template <class Key, class Value, Value NullValue>
template <class Key, class Value>
class DenseHashMap {
public:
DenseHashMap(int initialCapacity) : capacity_(initialCapacity) {
map.resize(initialCapacity);
state.resize(initialCapacity);
}

// Returns nullptr if no entry was found.
Value Get(const Key &key) {
// Returns true if the entry was found, and writes the entry to *value.
// Returns false and does not write to value if no entry was found.
// Note that nulls can be stored.
bool Get(const Key &key, Value *value) const {
uint32_t mask = capacity_ - 1;
uint32_t pos = HashKey(key) & mask;
// No? Let's go into search mode. Linear probing.
uint32_t p = pos;
while (true) {
if (state[p] == BucketState::TAKEN && KeyEquals(key, map[p].key))
return map[p].value;
else if (state[p] == BucketState::FREE)
return NullValue;
if (state[p] == BucketState::TAKEN && KeyEquals(key, map[p].key)) {
*value = map[p].value;
return true;
} else if (state[p] == BucketState::FREE) {
return false;
}
p = (p + 1) & mask; // If the state is REMOVED, we just keep on walking.
if (p == pos) {
// We looped around the whole map.
_assert_msg_(false, "DenseHashMap: Hit full on Get()");
}
}
return NullValue;
return false;
}

// Only works if Value can be nullptr
Value GetOrNull(const Key &key) const {
Value value;
if (Get(key, &value)) {
return value;
} else {
return (Value)nullptr;
}
}

bool ContainsKey(const Key &key) const {
// Slightly wasteful.
Value value;
return Get(key, &value);
}

// Asserts if we already had the key!
Expand Down Expand Up @@ -190,7 +211,7 @@ class DenseHashMap {

// Like the above, uses linear probing for cache-friendliness.
// Does not perform hashing at all so expects well-distributed keys.
template <class Value, Value NullValue>
template <class Value>
class PrehashMap {
public:
PrehashMap(int initialCapacity) : capacity_(initialCapacity) {
Expand All @@ -199,22 +220,24 @@ class PrehashMap {
}

// Returns nullptr if no entry was found.
Value Get(uint32_t hash) {
bool Get(uint32_t hash, Value *value) {
uint32_t mask = capacity_ - 1;
uint32_t pos = hash & mask;
// No? Let's go into search mode. Linear probing.
uint32_t p = pos;
while (true) {
if (state[p] == BucketState::TAKEN && hash == map[p].hash)
return map[p].value;
else if (state[p] == BucketState::FREE)
return NullValue;
if (state[p] == BucketState::TAKEN && hash == map[p].hash) {
*value = map[p].value;
return true;
} else if (state[p] == BucketState::FREE) {
return false;
}
p = (p + 1) & mask; // If the state is REMOVED, we just keep on walking.
if (p == pos) {
_assert_msg_(false, "PrehashMap: Hit full on Get()");
}
}
return NullValue;
return false;
}

// Returns false if we already had the key! Which is a bit different.
Expand Down
4 changes: 2 additions & 2 deletions Common/GPU/Vulkan/VulkanFrameData.h
Expand Up @@ -99,13 +99,13 @@ struct FrameData {

// Frames need unique IDs to wait for present on, let's keep them here.
// Also used for indexing into the frame timing history buffer.
uint64_t frameId;
uint64_t frameId = 0;

// Profiling.
QueueProfileContext profile{};

// Async readback cache.
DenseHashMap<ReadbackKey, CachedReadback*, nullptr> readbacks_;
DenseHashMap<ReadbackKey, CachedReadback *> readbacks_;

FrameData() : readbacks_(8) {}

Expand Down
11 changes: 5 additions & 6 deletions Common/GPU/Vulkan/VulkanQueueRunner.cpp
Expand Up @@ -251,8 +251,8 @@ void VulkanQueueRunner::DestroyBackBuffers() {
// Self-dependency: https://github.com/gpuweb/gpuweb/issues/442#issuecomment-547604827
// Also see https://www.khronos.org/registry/vulkan/specs/1.3-extensions/html/vkspec.html#synchronization-pipeline-barriers-subpass-self-dependencies
VKRRenderPass *VulkanQueueRunner::GetRenderPass(const RPKey &key) {
auto foundPass = renderPasses_.Get(key);
if (foundPass) {
VKRRenderPass *foundPass;
if (renderPasses_.Get(key, &foundPass)) {
return foundPass;
}

Expand Down Expand Up @@ -1984,8 +1984,7 @@ void VulkanQueueRunner::PerformReadback(const VKRStep &step, VkCommandBuffer cmd
key.height = step.readback.srcRect.extent.height;

// See if there's already a buffer we can reuse
cached = frameData.readbacks_.Get(key);
if (!cached) {
if (!frameData.readbacks_.Get(key, &cached)) {
cached = new CachedReadback();
cached->bufferSize = 0;
frameData.readbacks_.Insert(key, cached);
Expand Down Expand Up @@ -2065,8 +2064,8 @@ bool VulkanQueueRunner::CopyReadbackBuffer(FrameData &frameData, VKRFramebuffer
key.framebuf = src;
key.width = width;
key.height = height;
CachedReadback *cached = frameData.readbacks_.Get(key);
if (cached) {
CachedReadback *cached;
if (frameData.readbacks_.Get(key, &cached)) {
readback = cached;
} else {
// Didn't have a cached image ready yet
Expand Down
2 changes: 1 addition & 1 deletion Common/GPU/Vulkan/VulkanQueueRunner.h
Expand Up @@ -322,7 +322,7 @@ class VulkanQueueRunner {

// Renderpasses, all combinations of preserving or clearing or dont-care-ing fb contents.
// Each VKRRenderPass contains all compatibility classes (which attachments they have, etc).
DenseHashMap<RPKey, VKRRenderPass *, nullptr> renderPasses_;
DenseHashMap<RPKey, VKRRenderPass *> renderPasses_;

// Readback buffer. Currently we only support synchronous readback, so we only really need one.
// We size it generously.
Expand Down
12 changes: 8 additions & 4 deletions GPU/Common/DrawEngineCommon.cpp
Expand Up @@ -62,8 +62,8 @@ void DrawEngineCommon::Init() {
}

VertexDecoder *DrawEngineCommon::GetVertexDecoder(u32 vtype) {
VertexDecoder *dec = decoderMap_.Get(vtype);
if (dec)
VertexDecoder *dec;
if (decoderMap_.Get(vtype, &dec))
return dec;
dec = new VertexDecoder();
_assert_(dec);
Expand Down Expand Up @@ -132,8 +132,12 @@ std::vector<std::string> DrawEngineCommon::DebugGetVertexLoaderIDs() {
std::string DrawEngineCommon::DebugGetVertexLoaderString(std::string id, DebugShaderStringType stringType) {
u32 mapId;
memcpy(&mapId, &id[0], sizeof(mapId));
VertexDecoder *dec = decoderMap_.Get(mapId);
return dec ? dec->GetString(stringType) : "N/A";
VertexDecoder *dec;
if (decoderMap_.Get(mapId, &dec)) {
return dec->GetString(stringType);
} else {
return "N/A";
}
}

static Vec3f ClipToScreen(const Vec4f& coords) {
Expand Down
2 changes: 1 addition & 1 deletion GPU/Common/DrawEngineCommon.h
Expand Up @@ -201,7 +201,7 @@ class DrawEngineCommon {

// Cached vertex decoders
u32 lastVType_ = -1; // corresponds to dec_. Could really just pick it out of dec_...
DenseHashMap<u32, VertexDecoder *, nullptr> decoderMap_;
DenseHashMap<u32, VertexDecoder *> decoderMap_;
VertexDecoder *dec_ = nullptr;
VertexDecoderJitCache *decJitCache_ = nullptr;
VertexDecoderOptions decOptions_{};
Expand Down
12 changes: 6 additions & 6 deletions GPU/D3D11/DrawEngineD3D11.cpp
Expand Up @@ -199,8 +199,8 @@ ID3D11InputLayout *DrawEngineD3D11::SetupDecFmtForDraw(D3D11VertexShader *vshade
// TODO: Instead of one for each vshader, we can reduce it to one for each type of shader
// that reads TEXCOORD or not, etc. Not sure if worth it.
const InputLayoutKey key{ vshader, decFmt.id };
ID3D11InputLayout *inputLayout = inputLayoutMap_.Get(key);
if (inputLayout) {
ID3D11InputLayout *inputLayout;
if (inputLayoutMap_.Get(key, &inputLayout)) {
return inputLayout;
} else {
D3D11_INPUT_ELEMENT_DESC VertexElements[8];
Expand Down Expand Up @@ -368,8 +368,8 @@ void DrawEngineD3D11::DoFlush() {
// getUVGenMode can have an effect on which UV decoder we need to use! And hence what the decoded data will look like. See #9263
u32 dcid = (u32)XXH3_64bits(&drawCalls_, sizeof(DeferredDrawCall) * numDrawCalls_) ^ gstate.getUVGenMode();

VertexArrayInfoD3D11 *vai = vai_.Get(dcid);
if (!vai) {
VertexArrayInfoD3D11 *vai;
if (!vai_.Get(dcid, &vai)) {
vai = new VertexArrayInfoD3D11();
vai_.Insert(dcid, vai);
}
Expand Down Expand Up @@ -663,8 +663,8 @@ void DrawEngineD3D11::DoFlush() {
// We really do need a vertex layout for each vertex shader (or at least check its ID bits for what inputs it uses)!
// Some vertex shaders ignore one of the inputs, and then the layout created from it will lack it, which will be a problem for others.
InputLayoutKey key{ vshader, 0xFFFFFFFF }; // Let's use 0xFFFFFFFF to signify TransformedVertex
ID3D11InputLayout *layout = inputLayoutMap_.Get(key);
if (!layout) {
ID3D11InputLayout *layout;
if (!inputLayoutMap_.Get(key, &layout)) {
ASSERT_SUCCESS(device_->CreateInputLayout(TransformedVertexElements, ARRAY_SIZE(TransformedVertexElements), vshader->bytecode().data(), vshader->bytecode().size(), &layout));
inputLayoutMap_.Insert(key, layout);
}
Expand Down
12 changes: 6 additions & 6 deletions GPU/D3D11/DrawEngineD3D11.h
Expand Up @@ -179,7 +179,7 @@ class DrawEngineD3D11 : public DrawEngineCommon {
ID3D11DeviceContext *context_;
ID3D11DeviceContext1 *context1_;

PrehashMap<VertexArrayInfoD3D11 *, nullptr> vai_;
PrehashMap<VertexArrayInfoD3D11 *> vai_;

struct InputLayoutKey {
D3D11VertexShader *vshader;
Expand All @@ -193,7 +193,7 @@ class DrawEngineD3D11 : public DrawEngineCommon {
}
};

DenseHashMap<InputLayoutKey, ID3D11InputLayout *, nullptr> inputLayoutMap_;
DenseHashMap<InputLayoutKey, ID3D11InputLayout *> inputLayoutMap_;

// Other
ShaderManagerD3D11 *shaderManager_ = nullptr;
Expand All @@ -205,10 +205,10 @@ class DrawEngineD3D11 : public DrawEngineCommon {
PushBufferD3D11 *pushInds_;

// D3D11 state object caches.
DenseHashMap<uint64_t, ID3D11BlendState *, nullptr> blendCache_;
DenseHashMap<uint64_t, ID3D11BlendState1 *, nullptr> blendCache1_;
DenseHashMap<uint64_t, ID3D11DepthStencilState *, nullptr> depthStencilCache_;
DenseHashMap<uint32_t, ID3D11RasterizerState *, nullptr> rasterCache_;
DenseHashMap<uint64_t, ID3D11BlendState *> blendCache_;
DenseHashMap<uint64_t, ID3D11BlendState1 *> blendCache1_;
DenseHashMap<uint64_t, ID3D11DepthStencilState *> depthStencilCache_;
DenseHashMap<uint32_t, ID3D11RasterizerState *> rasterCache_;

// Keep the depth state between ApplyDrawState and ApplyDrawStateLate
ID3D11RasterizerState *rasterState_ = nullptr;
Expand Down
16 changes: 8 additions & 8 deletions GPU/D3D11/StateMappingD3D11.cpp
Expand Up @@ -356,8 +356,8 @@ void DrawEngineD3D11::ApplyDrawState(int prim) {
// There might have been interactions between depth and blend above.
if (gstate_c.IsDirty(DIRTY_BLEND_STATE)) {
if (!device1_) {
ID3D11BlendState *bs = blendCache_.Get(keys_.blend.value);
if (bs == nullptr) {
ID3D11BlendState *bs = nullptr;
if (!blendCache_.Get(keys_.blend.value, &bs) || !bs) {
D3D11_BLEND_DESC desc{};
D3D11_RENDER_TARGET_BLEND_DESC &rt = desc.RenderTarget[0];
rt.BlendEnable = keys_.blend.blendEnable;
Expand All @@ -373,8 +373,8 @@ void DrawEngineD3D11::ApplyDrawState(int prim) {
}
blendState_ = bs;
} else {
ID3D11BlendState1 *bs1 = blendCache1_.Get(keys_.blend.value);
if (bs1 == nullptr) {
ID3D11BlendState1 *bs1 = nullptr;
if (!blendCache1_.Get(keys_.blend.value, &bs1) || !bs1) {
D3D11_BLEND_DESC1 desc1{};
D3D11_RENDER_TARGET_BLEND_DESC1 &rt = desc1.RenderTarget[0];
rt.BlendEnable = keys_.blend.blendEnable;
Expand All @@ -395,8 +395,8 @@ void DrawEngineD3D11::ApplyDrawState(int prim) {
}

if (gstate_c.IsDirty(DIRTY_RASTER_STATE)) {
ID3D11RasterizerState *rs = rasterCache_.Get(keys_.raster.value);
if (rs == nullptr) {
ID3D11RasterizerState *rs;
if (!rasterCache_.Get(keys_.raster.value, &rs) || !rs) {
D3D11_RASTERIZER_DESC desc{};
desc.CullMode = (D3D11_CULL_MODE)(keys_.raster.cullMode);
desc.FillMode = D3D11_FILL_SOLID;
Expand All @@ -410,8 +410,8 @@ void DrawEngineD3D11::ApplyDrawState(int prim) {
}

if (gstate_c.IsDirty(DIRTY_DEPTHSTENCIL_STATE)) {
ID3D11DepthStencilState *ds = depthStencilCache_.Get(keys_.depthStencil.value);
if (ds == nullptr) {
ID3D11DepthStencilState *ds;
if (!depthStencilCache_.Get(keys_.depthStencil.value, &ds) || !ds) {
D3D11_DEPTH_STENCIL_DESC desc{};
desc.DepthEnable = keys_.depthStencil.depthTestEnable;
desc.DepthWriteMask = keys_.depthStencil.depthWriteEnable ? D3D11_DEPTH_WRITE_MASK_ALL : D3D11_DEPTH_WRITE_MASK_ZERO;
Expand Down
9 changes: 4 additions & 5 deletions GPU/Directx9/DrawEngineDX9.cpp
Expand Up @@ -162,9 +162,8 @@ static void VertexAttribSetup(D3DVERTEXELEMENT9 * VertexElement, u8 fmt, u8 offs
}

IDirect3DVertexDeclaration9 *DrawEngineDX9::SetupDecFmtForDraw(const DecVtxFormat &decFmt, u32 pspFmt) {
IDirect3DVertexDeclaration9 *vertexDeclCached = vertexDeclMap_.Get(pspFmt);

if (vertexDeclCached) {
IDirect3DVertexDeclaration9 *vertexDeclCached;
if (vertexDeclMap_.Get(pspFmt, &vertexDeclCached)) {
return vertexDeclCached;
} else {
D3DVERTEXELEMENT9 VertexElements[8];
Expand Down Expand Up @@ -347,8 +346,8 @@ void DrawEngineDX9::DoFlush() {
if (useCache) {
// getUVGenMode can have an effect on which UV decoder we need to use! And hence what the decoded data will look like. See #9263
u32 dcid = (u32)XXH3_64bits(&drawCalls_, sizeof(DeferredDrawCall) * numDrawCalls_) ^ gstate.getUVGenMode();
VertexArrayInfoDX9 *vai = vai_.Get(dcid);
if (!vai) {
VertexArrayInfoDX9 *vai;
if (!vai_.Get(dcid, &vai)) {
vai = new VertexArrayInfoDX9();
vai_.Insert(dcid, vai);
}
Expand Down
4 changes: 2 additions & 2 deletions GPU/Directx9/DrawEngineDX9.h
Expand Up @@ -164,8 +164,8 @@ class DrawEngineDX9 : public DrawEngineCommon {
LPDIRECT3DDEVICE9 device_ = nullptr;
Draw::DrawContext *draw_;

PrehashMap<VertexArrayInfoDX9 *, nullptr> vai_;
DenseHashMap<u32, IDirect3DVertexDeclaration9 *, nullptr> vertexDeclMap_;
PrehashMap<VertexArrayInfoDX9 *> vai_;
DenseHashMap<u32, IDirect3DVertexDeclaration9 *> vertexDeclMap_;

// SimpleVertex
IDirect3DVertexDeclaration9* transformedVertexDecl_ = nullptr;
Expand Down
4 changes: 2 additions & 2 deletions GPU/GLES/DrawEngineGLES.cpp
Expand Up @@ -206,8 +206,8 @@ static inline void VertexAttribSetup(int attrib, int fmt, int stride, int offset
// TODO: Use VBO and get rid of the vertexData pointers - with that, we will supply only offsets
GLRInputLayout *DrawEngineGLES::SetupDecFmtForDraw(const DecVtxFormat &decFmt) {
uint32_t key = decFmt.id;
GLRInputLayout *inputLayout = inputLayoutMap_.Get(key);
if (inputLayout) {
GLRInputLayout *inputLayout;
if (inputLayoutMap_.Get(key, &inputLayout)) {
return inputLayout;
}

Expand Down
2 changes: 1 addition & 1 deletion GPU/GLES/DrawEngineGLES.h
Expand Up @@ -128,7 +128,7 @@ class DrawEngineGLES : public DrawEngineCommon {
};
FrameData frameData_[GLRenderManager::MAX_INFLIGHT_FRAMES];

DenseHashMap<uint32_t, GLRInputLayout *, nullptr> inputLayoutMap_;
DenseHashMap<uint32_t, GLRInputLayout *> inputLayoutMap_;

GLRInputLayout *softwareInputLayout_ = nullptr;
GLRenderManager *render_;
Expand Down

0 comments on commit b83e89a

Please sign in to comment.