Permalink
Browse files

Vulkan: Speed up and simplify hardware tesselation by using storage b…

…uffers.
  • Loading branch information...
hrydgard committed Nov 12, 2017
1 parent c05fe83 commit 4346a54eb768d7b2300fdf0f90b450c01ad9c096
@@ -37,7 +37,7 @@ bool VulkanPushBuffer::AddBuffer() {
VkBufferCreateInfo b = { VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO };
b.size = size_;
b.flags = 0;
b.usage = VK_BUFFER_USAGE_INDEX_BUFFER_BIT | VK_BUFFER_USAGE_UNIFORM_BUFFER_BIT | VK_BUFFER_USAGE_VERTEX_BUFFER_BIT | VK_BUFFER_USAGE_TRANSFER_SRC_BIT;
b.usage = VK_BUFFER_USAGE_INDEX_BUFFER_BIT | VK_BUFFER_USAGE_UNIFORM_BUFFER_BIT | VK_BUFFER_USAGE_VERTEX_BUFFER_BIT | VK_BUFFER_USAGE_TRANSFER_SRC_BIT | VK_BUFFER_USAGE_STORAGE_BUFFER_BIT;
b.sharingMode = VK_SHARING_MODE_EXCLUSIVE;
b.queueFamilyIndexCount = 0;
b.pQueueFamilyIndices = nullptr;
@@ -103,12 +103,20 @@ class VulkanPushBuffer {
}
// "Zero-copy" variant - you can write the data directly as you compute it.
// Recommended.
void *Push(size_t size, uint32_t *bindOffset, VkBuffer *vkbuf) {
assert(writePtr_);
size_t off = Allocate(size, vkbuf);
*bindOffset = (uint32_t)off;
return writePtr_ + off;
}
void *PushAligned(size_t size, uint32_t *bindOffset, VkBuffer *vkbuf, int align) {
assert(writePtr_);
offset_ = (offset_ + align - 1) & ~(align - 1);
size_t off = Allocate(size, vkbuf);
*bindOffset = (uint32_t)off;
return writePtr_ + off;
}
size_t GetTotalSize() const;
@@ -67,9 +67,7 @@ enum {
DRAW_BINDING_DYNUBO_BASE = 2,
DRAW_BINDING_DYNUBO_LIGHT = 3,
DRAW_BINDING_DYNUBO_BONE = 4,
DRAW_BINDING_TESS_POS_TEXTURE = 5,
DRAW_BINDING_TESS_TEX_TEXTURE = 6,
DRAW_BINDING_TESS_COL_TEXTURE = 7,
DRAW_BINDING_TESS_STORAGE_BUF = 5,
};
enum {
@@ -97,7 +95,7 @@ DrawEngineVulkan::DrawEngineVulkan(VulkanContext *vulkan, Draw::DrawContext *dra
void DrawEngineVulkan::InitDeviceObjects() {
// All resources we need for PSP drawing. Usually only bindings 0 and 2-4 are populated.
VkDescriptorSetLayoutBinding bindings[8];
VkDescriptorSetLayoutBinding bindings[6];
bindings[0].descriptorCount = 1;
bindings[0].pImmutableSamplers = nullptr;
bindings[0].descriptorType = VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER;
@@ -123,37 +121,28 @@ void DrawEngineVulkan::InitDeviceObjects() {
bindings[4].descriptorType = VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER_DYNAMIC;
bindings[4].stageFlags = VK_SHADER_STAGE_VERTEX_BIT;
bindings[4].binding = DRAW_BINDING_DYNUBO_BONE;
// Hardware tessellation. TODO: Don't allocate these unless actually drawing splines.
// Will require additional
// Used only for hardware tessellation.
bindings[5].descriptorCount = 1;
bindings[5].pImmutableSamplers = nullptr;
bindings[5].descriptorType = VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER;
bindings[5].descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER;
bindings[5].stageFlags = VK_SHADER_STAGE_VERTEX_BIT;
bindings[5].binding = DRAW_BINDING_TESS_POS_TEXTURE;
bindings[6].descriptorCount = 1;
bindings[6].pImmutableSamplers = nullptr;
bindings[6].descriptorType = VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER;
bindings[6].stageFlags = VK_SHADER_STAGE_VERTEX_BIT;
bindings[6].binding = DRAW_BINDING_TESS_TEX_TEXTURE;
bindings[7].descriptorCount = 1;
bindings[7].pImmutableSamplers = nullptr;
bindings[7].descriptorType = VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER;
bindings[7].stageFlags = VK_SHADER_STAGE_VERTEX_BIT;
bindings[7].binding = DRAW_BINDING_TESS_COL_TEXTURE;
bindings[5].binding = DRAW_BINDING_TESS_STORAGE_BUF;
VkDevice device = vulkan_->GetDevice();
VkDescriptorSetLayoutCreateInfo dsl = { VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO };
dsl.bindingCount = 8;
dsl.bindingCount = ARRAY_SIZE(bindings);
dsl.pBindings = bindings;
VkResult res = vkCreateDescriptorSetLayout(device, &dsl, nullptr, &descriptorSetLayout_);
assert(VK_SUCCESS == res);
VkDescriptorPoolSize dpTypes[2];
VkDescriptorPoolSize dpTypes[3];
dpTypes[0].descriptorCount = 8192;
dpTypes[0].type = VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER_DYNAMIC;
dpTypes[1].descriptorCount = 8192 + 4096; // Due to the tess stuff, we need a LOT of these. Most will be empty...
dpTypes[1].type = VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER;
dpTypes[2].descriptorCount = 2048;
dpTypes[2].type = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER;
VkDescriptorPoolCreateInfo dp = { VK_STRUCTURE_TYPE_DESCRIPTOR_POOL_CREATE_INFO };
dp.pNext = nullptr;
@@ -586,29 +575,27 @@ VkDescriptorSet DrawEngineVulkan::GetOrCreateDescriptorSet(VkImageView imageView
// Skipping 2nd texture for now.
// Tessellation data textures
// Tessellation data buffer. Make sure this is declared outside the if to avoid optimizer
// shenanigans.
VkDescriptorBufferInfo tess_buf{};
if (tess) {
VkDescriptorImageInfo tess_tex[3]{};
VkSampler sampler = ((TessellationDataTransferVulkan *)tessDataTransfer)->GetSampler();
for (int i = 0; i < 3; i++) {
VulkanTexture *texture = ((TessellationDataTransferVulkan *)tessDataTransfer)->GetTexture(i);
if (texture) {
assert(texture->GetImageView());
VkImageView imageView = texture->GetImageView();
tess_tex[i].imageLayout = VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL;
tess_tex[i].imageView = imageView;
tess_tex[i].sampler = sampler;
writes[n].sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET;
writes[n].pNext = nullptr;
writes[n].dstBinding = DRAW_BINDING_TESS_POS_TEXTURE + i;
writes[n].pImageInfo = &tess_tex[i];
writes[n].descriptorCount = 1;
writes[n].descriptorType = VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER;
writes[n].dstSet = desc;
n++;
}
}
VkBuffer buf;
VkDeviceSize offset;
VkDeviceSize range;
((TessellationDataTransferVulkan *)tessDataTransfer)->GetBufferAndOffset(&buf, &offset, &range);
assert(buf);
tess_buf.buffer = buf;
tess_buf.offset = offset;
tess_buf.range = range;
tessOffset_ = offset;
writes[n].sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET;
writes[n].pNext = nullptr;
writes[n].dstBinding = DRAW_BINDING_TESS_STORAGE_BUF;
writes[n].pBufferInfo = &tess_buf;
writes[n].descriptorCount = 1;
writes[n].descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER;
writes[n].dstSet = desc;
n++;
}
// Uniform buffer objects
@@ -1097,15 +1084,10 @@ void DrawEngineVulkan::UpdateUBOs(FrameData *frame) {
}
DrawEngineVulkan::TessellationDataTransferVulkan::TessellationDataTransferVulkan(VulkanContext *vulkan, Draw::DrawContext *draw)
: TessellationDataTransfer(), vulkan_(vulkan), draw_(draw), tessAlloc_(vulkan_, 128 * 1024, 4096 * 1024) {
CreateSampler();
: TessellationDataTransfer(), vulkan_(vulkan), draw_(draw) {
}
DrawEngineVulkan::TessellationDataTransferVulkan::~TessellationDataTransferVulkan() {
for (int i = 0; i < 3; i++)
delete data_tex[i];
tessAlloc_.Destroy();
vulkan_->Delete().QueueDeleteSampler(sampler);
}
// TODO: Consolidate the three textures into one, with height 3.
@@ -1116,91 +1098,27 @@ void DrawEngineVulkan::TessellationDataTransferVulkan::PrepareBuffers(float *&po
assert(size > 0);
VkCommandBuffer cmd = (VkCommandBuffer)draw_->GetNativeObject(Draw::NativeObject::INIT_COMMANDBUFFER);
// Position
delete data_tex[0];
data_tex[0] = new VulkanTexture(vulkan_, &tessAlloc_);
bool success = data_tex[0]->CreateDirect(cmd, size, 1, 1, VK_FORMAT_R32G32B32A32_SFLOAT, VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL);
assert(success);
pos = (float *)push_->Push(size * sizeof(float) * 4, &posOffset_, &posBuf_);
posStride = 4;
posSize_ = size;
// Texcoords
delete data_tex[1];
if (hasTexCoords) {
data_tex[1] = new VulkanTexture(vulkan_, &tessAlloc_);
success = data_tex[1]->CreateDirect(cmd, size, 1, 1, VK_FORMAT_R32G32_SFLOAT, VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL);
assert(success);
tex = (float *)push_->Push(size * sizeof(float) * 2, &texOffset_, &texBuf_);
texStride = 2;
texSize_ = size;
} else {
data_tex[1] = nullptr;
tex = nullptr;
texStride = 0;
texSize_ = 0;
}
// Color
colSize_ = hasColor ? size : 1;
if (colSize_ == 1)
colStride = 0;
else
colStride = 4;
delete data_tex[2];
data_tex[2] = new VulkanTexture(vulkan_, &tessAlloc_);
success = data_tex[2]->CreateDirect(cmd, colSize_, 1, 1, VK_FORMAT_R32G32B32A32_SFLOAT, VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL);
assert(success);
col = (float *)push_->Push(colSize_ * sizeof(float) * 4, &colOffset_, &colBuf_);
// TODO: This SHOULD work without padding but I can't get it to work on nvidia, so had
// to expand to vec4. Driver bug?
struct TessData {
float pos[3]; float pad1;
float uv[2]; float pad2[2];
float color[4];
};
int ssboAlignment = vulkan_->GetPhysicalDeviceProperties().limits.minStorageBufferOffsetAlignment;
uint8_t *data = (uint8_t *)push_->PushAligned(size * sizeof(TessData), &offset_, &buf_, ssboAlignment);
range_ = size * sizeof(TessData);
pos = (float *)(data);
tex = (float *)(data + offsetof(TessData, uv));
col = (float *)(data + offsetof(TessData, color));
posStride = sizeof(TessData) / sizeof(float);
colStride = hasColor ? (sizeof(TessData) / sizeof(float)) : 0;
texStride = sizeof(TessData) / sizeof(float);
}
void DrawEngineVulkan::TessellationDataTransferVulkan::SendDataToShader(const float *pos, const float *tex, const float *col, int size, bool hasColor, bool hasTexCoords) {
VkCommandBuffer cmd = (VkCommandBuffer)draw_->GetNativeObject(Draw::NativeObject::INIT_COMMANDBUFFER);
// Position
data_tex[0]->UploadMip(cmd, 0, posSize_, 1, posBuf_, posOffset_, posSize_);
data_tex[0]->EndCreate(cmd, true);
// Texcoords
if (hasTexCoords) {
data_tex[1]->UploadMip(cmd, 0, texSize_, 1, texBuf_, texOffset_, texSize_);
data_tex[1]->EndCreate(cmd, true);
}
// Color
data_tex[2]->UploadMip(cmd, 0, colSize_, 1, colBuf_, colOffset_, colSize_);
data_tex[2]->EndCreate(cmd, true);
}
void DrawEngineVulkan::TessellationDataTransferVulkan::CreateSampler() {
VkSamplerCreateInfo samp = { VK_STRUCTURE_TYPE_SAMPLER_CREATE_INFO };
samp.addressModeU = VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_EDGE;
samp.addressModeV = VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_EDGE;
samp.addressModeW = VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_EDGE;
samp.borderColor = VK_BORDER_COLOR_FLOAT_OPAQUE_WHITE;
samp.compareOp = VK_COMPARE_OP_NEVER;
samp.flags = 0;
samp.magFilter = VK_FILTER_NEAREST;
samp.minFilter = VK_FILTER_NEAREST;
samp.mipmapMode = VK_SAMPLER_MIPMAP_MODE_NEAREST;
if (gstate_c.Supports(GPU_SUPPORTS_ANISOTROPY) && g_Config.iAnisotropyLevel > 0) {
// Docs say the min of this value and the supported max are used.
samp.maxAnisotropy = 1 << g_Config.iAnisotropyLevel;
samp.anisotropyEnable = true;
} else {
samp.maxAnisotropy = 1.0f;
samp.anisotropyEnable = false;
}
samp.maxLod = 1.0f;
samp.minLod = 0.0f;
samp.mipLodBias = 0.0f;
VkResult res = vkCreateSampler(vulkan_->GetDevice(), &samp, nullptr, &sampler);
assert(res == VK_SUCCESS);
assert(pos);
// Nothing to do here!
}
@@ -262,6 +262,8 @@ class DrawEngineVulkan : public DrawEngineCommon {
VulkanPipelineRasterStateKey pipelineKey_{};
VulkanDynamicState dynState_{};
int tessOffset_ = 0;
// Hardware tessellation
class TessellationDataTransferVulkan : public TessellationDataTransfer {
public:
@@ -272,27 +274,24 @@ class DrawEngineVulkan : public DrawEngineCommon {
void SendDataToShader(const float *pos, const float *tex, const float *col, int size, bool hasColor, bool hasTexCoords) override;
void PrepareBuffers(float *&pos, float *&tex, float *&col, int &posStride, int &texStride, int &colStride, int size, bool hasColor, bool hasTexCoords) override;
VulkanTexture *GetTexture(int i) const { return data_tex[i]; }
void GetBufferAndOffset(VkBuffer *buf, VkDeviceSize *offset, VkDeviceSize *range) {
*buf = buf_;
*offset = (VkDeviceSize)offset_;
*range = (VkDeviceSize)range_;
buf_ = 0;
offset_ = 0;
range_ = 0;
}
VkSampler GetSampler() const { return sampler; }
void CreateSampler();
private:
VulkanContext *vulkan_;
Draw::DrawContext *draw_;
VulkanTexture *data_tex[3]{};
VkSampler sampler = VK_NULL_HANDLE;
VulkanPushBuffer *push_; // Updated each frame.
VulkanDeviceAllocator tessAlloc_;
int posSize_ = 0;
uint32_t posOffset_ = 0;
VkBuffer posBuf_ = 0;
int texSize_ = 0;
uint32_t texOffset_ = 0;
VkBuffer texBuf_ = 0;
int colSize_ = 0;
uint32_t colOffset_ = 0;
VkBuffer colBuf_ = 0;
int size_ = 0;
uint32_t offset_ = 0;
uint32_t range_ = 0;
VkBuffer buf_ = VK_NULL_HANDLE;
};
};
@@ -37,7 +37,7 @@
#include "GPU/Vulkan/ShaderManagerVulkan.h"
static const char *vulkan_glsl_preamble =
"#version 400\n"
"#version 430\n"
"#extension GL_ARB_separate_shader_objects : enable\n"
"#extension GL_ARB_shading_language_420pack : enable\n\n";
@@ -220,9 +220,14 @@ bool GenerateVulkanGLSLVertexShader(const ShaderID &id, char *buffer, bool *uses
WRITE(p, "out gl_PerVertex { vec4 gl_Position; };\n");
if (doBezier || doSpline) {
WRITE(p, "layout (binding = 5) uniform sampler2D u_tess_pos_tex;\n");
WRITE(p, "layout (binding = 6) uniform sampler2D u_tess_tex_tex;\n");
WRITE(p, "layout (binding = 7) uniform sampler2D u_tess_col_tex;\n");
WRITE(p, "layout (std430) struct TessData {\n");
WRITE(p, " vec4 pos;\n");
WRITE(p, " vec4 uv;\n");
WRITE(p, " vec4 color;\n");
WRITE(p, "};");
WRITE(p, "layout (std430, set = 0, binding = 5) buffer s_tess_data {\n");
WRITE(p, " TessData data[];");
WRITE(p, "} tess_data;");
for (int i = 2; i <= 4; i++) {
// Define 3 types vec2, vec3, vec4
@@ -340,12 +345,11 @@ bool GenerateVulkanGLSLVertexShader(const ShaderID &id, char *buffer, bool *uses
WRITE(p, " for (int i = 0; i < 4; i++) {\n");
WRITE(p, " for (int j = 0; j < 4; j++) {\n");
WRITE(p, " int idx = (i + v%s) * base.spline_count_u + (j + u%s);\n", doBezier ? " * 3" : "", doBezier ? " * 3" : "");
WRITE(p, " ivec2 index = ivec2(idx, 0);\n");
WRITE(p, " _pos[i * 4 + j] = texelFetch(u_tess_pos_tex, index, 0).xyz;\n");
WRITE(p, " _pos[i * 4 + j] = tess_data.data[idx].pos.xyz;\n");
if (doTexture && hasTexcoord && hasTexcoordTess)
WRITE(p, " _tex[i * 4 + j] = texelFetch(u_tess_tex_tex, index, 0).xy;\n");
WRITE(p, " _tex[i * 4 + j] = tess_data.data[idx].uv.xy;\n");
if (hasColor && hasColorTess)
WRITE(p, " _col[i * 4 + j] = texelFetch(u_tess_col_tex, index, 0).rgba;\n");
WRITE(p, " _col[i * 4 + j] = tess_data.data[idx].color;\n");
WRITE(p, " }\n");
WRITE(p, " }\n");
WRITE(p, " vec2 tess_pos = position.xy;\n");
@@ -374,7 +378,7 @@ bool GenerateVulkanGLSLVertexShader(const ShaderID &id, char *buffer, bool *uses
if (hasColorTess)
WRITE(p, " vec4 col = tess_sample(_col, weights);\n");
else
WRITE(p, " vec4 col = texelFetch(u_tess_col_tex, ivec2(0, 0), 0).rgba;\n");
WRITE(p, " vec4 col = tess_data.data[0].color;\n");
}
if (hasNormal) {
// Curved surface is probably always need to compute normal(not sampling from control points)

0 comments on commit 4346a54

Please sign in to comment.