Permalink
Browse files

[spline/bezier]Instanced rendering for B-Spline is very slow when usi…

…ng weak GPU, so disabled it and simplify the shaders.

Add changing the quality of HW tessellation.
  • Loading branch information...
xebra committed Sep 22, 2018
1 parent d98fa06 commit 89786b943df5bbceb513f78e67618dbee8781967
@@ -178,6 +178,5 @@ class DrawEngineCommon {
bool fboTexBound_ = false;
// Hardware tessellation
int numPatches;
TessellationDataTransfer *tessDataTransfer;
};
@@ -240,7 +240,7 @@ void BaseUpdateUniforms(UB_VS_FS_Base *ub, uint64_t dirtyUniforms, bool flipView
}
if (dirtyUniforms & DIRTY_BEZIERSPLINE) {
ub->spline_counts = BytesToUint32(gstate_c.spline_num_patches_u, gstate_c.spline_num_points_u, gstate_c.spline_tess_u, gstate_c.spline_tess_v);
ub->spline_counts = gstate_c.spline_num_points_u;
}
if (dirtyUniforms & DIRTY_DEPAL) {
@@ -467,17 +467,29 @@ static void HardwareTessellation(OutputBuffers &output, const Patch &patch, u32
// Generating simple input vertices for the spline-computing vertex shader.
float inv_u = 1.0f / (float)patch.tess_u;
float inv_v = 1.0f / (float)patch.tess_v;
for (int tile_v = 0; tile_v <= patch.tess_v; ++tile_v) {
for (int tile_u = 0; tile_u <= patch.tess_u; ++tile_u) {
SimpleVertex &vert = output.vertices[tile_v * (patch.tess_u + 1) + tile_u];
vert.pos.x = (float)tile_u;
vert.pos.y = (float)tile_v;
// For texcoord generation
vert.nrm.x = (float)tile_u * inv_u;
vert.nrm.y = (float)tile_v * inv_v;
for (int patch_u = 0; patch_u < patch.num_patches_u; ++patch_u) {
const int start_u = patch.GetTessStart(patch_u);
for (int patch_v = 0; patch_v < patch.num_patches_v; ++patch_v) {
const int start_v = patch.GetTessStart(patch_v);
for (int tile_u = start_u; tile_u <= patch.tess_u; ++tile_u) {
const int index_u = patch.GetIndexU(patch_u, tile_u);
for (int tile_v = start_v; tile_v <= patch.tess_v; ++tile_v) {
const int index_v = patch.GetIndexV(patch_v, tile_v);
SimpleVertex &vert = output.vertices[patch.GetIndex(index_u, index_v, patch_u, patch_v)];
// Index for the weights
vert.pos.x = index_u;
vert.pos.y = index_v;
// For texcoord generation
vert.nrm.x = patch_u + (float)tile_u * inv_u;
vert.nrm.y = patch_v + (float)tile_v * inv_v;
// Patch position
vert.pos.z = patch_u;
vert.nrm.z = patch_v;
}
}
}
}
BuildIndex(output.indices, output.count, patch.tess_u, patch.tess_v, patch.primType);
patch.BuildIndex(output.indices, output.count);
}
void DrawEngineCommon::SubmitSpline(const void *control_points, const void *indices, int tess_u, int tess_v, int count_u, int count_v, int type_u, int type_v, GEPatchPrimType prim_type, bool computeNormals, bool patchFacing, u32 vertType, int *bytesRead) {
@@ -538,13 +550,12 @@ void DrawEngineCommon::SubmitSpline(const void *control_points, const void *indi
patch.num_patches_v = count_v - 3;
patch.primType = prim_type;
patch.patchFacing = patchFacing;
patch.Init(SPLINE_BUFFER_SIZE / vertexSize);
if (CanUseHardwareTessellation(prim_type)) {
HardwareTessellation(output, patch, origVertType, points, tessDataTransfer);
numPatches = patch.num_patches_u * patch.num_patches_v;
} else {
ControlPoints cpoints(points, count_u * count_v, managedBuf);
patch.Init(SPLINE_BUFFER_SIZE / vertexSize);
SoftwareTessellation(output, patch, origVertType, cpoints);
}
@@ -630,13 +641,12 @@ void DrawEngineCommon::SubmitBezier(const void *control_points, const void *indi
patch.num_patches_v = (count_v - 1) / 3;
patch.primType = prim_type;
patch.patchFacing = patchFacing;
patch.Init(SPLINE_BUFFER_SIZE / vertexSize);
if (CanUseHardwareTessellation(prim_type)) {
HardwareTessellation(output, patch, origVertType, points, tessDataTransfer);
numPatches = patch.num_patches_u * patch.num_patches_v;
} else {
ControlPoints cpoints(points, count_u * count_v, managedBuf);
patch.Init(SPLINE_BUFFER_SIZE / vertexSize);
SoftwareTessellation(output, patch, origVertType, cpoints);
}
@@ -540,10 +540,7 @@ void DrawEngineD3D11::DoFlush() {
memcpy(iptr, decIndex, iSize);
pushInds_->EndPush(context_);
context_->IASetIndexBuffer(pushInds_->Buf(), DXGI_FORMAT_R16_UINT, iOffset);
if (tess)
context_->DrawIndexedInstanced(vertexCount, numPatches, 0, 0, 0);
else
context_->DrawIndexed(vertexCount, 0, 0);
context_->DrawIndexed(vertexCount, 0, 0);
} else {
context_->Draw(vertexCount, 0);
}
@@ -552,10 +549,7 @@ void DrawEngineD3D11::DoFlush() {
context_->IASetVertexBuffers(0, 1, &vb_, &stride, &offset);
if (useElements) {
context_->IASetIndexBuffer(ib_, DXGI_FORMAT_R16_UINT, 0);
if (tess)
context_->DrawIndexedInstanced(vertexCount, numPatches, 0, 0, 0);
else
context_->DrawIndexed(vertexCount, 0, 0);
context_->DrawIndexed(vertexCount, 0, 0);
} else {
context_->Draw(vertexCount, 0);
}
@@ -311,27 +311,16 @@ void GenerateVertexShaderHLSL(const VShaderID &id, char *buffer, ShaderLanguage
WRITE(p, "};\n");
WRITE(p, "void tessellate(in VS_IN In, out Tess tess) {\n");
WRITE(p, " int spline_num_patches_u = int(u_spline_counts & 0xff);\n");
WRITE(p, " int spline_num_points_u = int((u_spline_counts >> 8) & 0xff);\n");
WRITE(p, " int2 spline_tess = int2((u_spline_counts >> 16) & 0xFF, (u_spline_counts >> 24) & 0xFF);\n");
// Calculate current patch position and vertex position(index for the weights)
WRITE(p, " int u = In.instanceId %% spline_num_patches_u;\n");
WRITE(p, " int v = In.instanceId / spline_num_patches_u;\n");
WRITE(p, " int2 patch_pos = int2(u, v);\n");
WRITE(p, " int2 vertex_pos = int2(In.position.xy);\n");
if (doSpline) {
WRITE(p, " bool2 isFirstEdge = !bool2(vertex_pos);\n"); // vertex_pos == 0
WRITE(p, " bool2 isNotFirstPatch = bool2(patch_pos);\n"); // patch_pos > 0
WRITE(p, " vertex_pos += patch_pos * spline_tess;\n");
}
WRITE(p, " int2 point_pos = int2(In.position.z, In.normal.z)%s;\n", doBezier ? " * 3" : "");
WRITE(p, " int2 weight_idx = int2(In.position.xy);\n");
// Load 4x4 control points
WRITE(p, " float3 _pos[16];\n");
WRITE(p, " float2 _tex[16];\n");
WRITE(p, " float4 _col[16];\n");
WRITE(p, " int index;\n");
for (int i = 0; i < 4; i++) {
for (int j = 0; j < 4; j++) {
WRITE(p, " index = (%i + v%s) * spline_num_points_u + (%i + u%s);\n", i, doBezier ? " * 3" : "", j, doBezier ? " * 3" : "");
WRITE(p, " index = (%i + point_pos.y) * u_spline_counts + (%i + point_pos.x);\n", i, j);
WRITE(p, " _pos[%i] = tess_data[index].pos;\n", i * 4 + j);
if (doTexture && hasTexcoordTess)
WRITE(p, " _tex[%i] = tess_data[index].tex;\n", i * 4 + j);
@@ -341,14 +330,8 @@ void GenerateVertexShaderHLSL(const VShaderID &id, char *buffer, ShaderLanguage
}
// Basis polynomials as weight coefficients
WRITE(p, " float4 basis_u = tess_weights_u[vertex_pos.x].basis;\n");
WRITE(p, " float4 basis_v = tess_weights_v[vertex_pos.y].basis;\n");
if (doSpline) {
WRITE(p, " if (isFirstEdge.x && isNotFirstPatch.x)\n");
WRITE(p, " basis_u = float4(basis_u.yzw, 0);\n");
WRITE(p, " if (isFirstEdge.y && isNotFirstPatch.y)\n");
WRITE(p, " basis_v = float4(basis_v.yzw, 0);\n");
}
WRITE(p, " float4 basis_u = tess_weights_u[weight_idx.x].basis;\n");
WRITE(p, " float4 basis_v = tess_weights_v[weight_idx.y].basis;\n");
WRITE(p, " float4x4 basis = outerProduct(basis_u, basis_v);\n");
// Tessellate
@@ -357,22 +340,16 @@ void GenerateVertexShaderHLSL(const VShaderID &id, char *buffer, ShaderLanguage
if (hasTexcoordTess)
WRITE(p, " tess.tex = tess_sample(_tex, basis);\n");
else
WRITE(p, " tess.tex = In.normal.xy + float2(patch_pos);\n");
WRITE(p, " tess.tex = In.normal.xy;\n");
}
if (hasColorTess)
WRITE(p, " tess.col = tess_sample(_col, basis);\n");
else
WRITE(p, " tess.col = u_matambientalpha;\n");
if (hasNormalTess) {
// Derivatives as weight coefficients
WRITE(p, " float4 deriv_u = tess_weights_u[vertex_pos.x].deriv;\n");
WRITE(p, " float4 deriv_v = tess_weights_v[vertex_pos.y].deriv;\n");
if (doSpline) {
WRITE(p, " if (isFirstEdge.x && isNotFirstPatch.x)\n");
WRITE(p, " deriv_u = float4(deriv_u.yzw, 0);\n");
WRITE(p, " if (isFirstEdge.y && isNotFirstPatch.y)\n");
WRITE(p, " deriv_v = float4(deriv_v.yzw, 0);\n");
}
WRITE(p, " float4 deriv_u = tess_weights_u[weight_idx.x].deriv;\n");
WRITE(p, " float4 deriv_v = tess_weights_v[weight_idx.y].deriv;\n");
WRITE(p, " float3 du = tess_sample(_pos, outerProduct(deriv_u, basis_v));\n");
WRITE(p, " float3 dv = tess_sample(_pos, outerProduct(basis_u, deriv_v));\n");
@@ -521,10 +521,7 @@ void DrawEngineGLES::DoFlush() {
indexBufferOffset = (uint32_t)frameData.pushIndex->Push(decIndex, sizeof(uint16_t) * indexGen.VertexCount(), &indexBuffer);
render_->BindIndexBuffer(indexBuffer);
}
if (gstate_c.bezier || gstate_c.spline)
render_->DrawIndexed(glprim[prim], vertexCount, GL_UNSIGNED_SHORT, (GLvoid*)(intptr_t)indexBufferOffset, numPatches);
else
render_->DrawIndexed(glprim[prim], vertexCount, GL_UNSIGNED_SHORT, (GLvoid*)(intptr_t)indexBufferOffset);
render_->DrawIndexed(glprim[prim], vertexCount, GL_UNSIGNED_SHORT, (GLvoid*)(intptr_t)indexBufferOffset);
} else {
render_->Draw(glprim[prim], 0, vertexCount);
}
@@ -109,7 +109,7 @@ GPU_GLES::GPU_GLES(GraphicsContext *gfxCtx, Draw::DrawContext *draw)
if (g_Config.bHardwareTessellation) {
// Disable hardware tessellation if device is unsupported.
bool hasTexelFetch = gl_extensions.GLES3 || (!gl_extensions.IsGLES && gl_extensions.VersionGEThan(3, 3, 0)) || gl_extensions.EXT_gpu_shader4;
if (!gstate_c.SupportsAll(GPU_SUPPORTS_INSTANCE_RENDERING | GPU_SUPPORTS_VERTEX_TEXTURE_FETCH | GPU_SUPPORTS_TEXTURE_FLOAT) || !hasTexelFetch) {
if (!gstate_c.SupportsAll(GPU_SUPPORTS_VERTEX_TEXTURE_FETCH | GPU_SUPPORTS_TEXTURE_FLOAT) || !hasTexelFetch) {
// TODO: Check unsupported device name list.(Above gpu features are supported but it has issues with weak gpu, memory, shader compiler etc...)
g_Config.bHardwareTessellation = false;
ERROR_LOG(G3D, "Hardware Tessellation is unsupported, falling back to software tessellation");
@@ -162,7 +162,6 @@ LinkedShader::LinkedShader(GLRenderManager *render, VShaderID VSID, Shader *vs,
queries.push_back({ &u_tess_points, "u_tess_points" });
queries.push_back({ &u_tess_weights_u, "u_tess_weights_u" });
queries.push_back({ &u_tess_weights_v, "u_tess_weights_v" });
queries.push_back({ &u_spline_tess, "u_spline_tess" });
queries.push_back({ &u_spline_counts, "u_spline_counts" });
queries.push_back({ &u_depal, "u_depal" });
@@ -565,13 +564,8 @@ void LinkedShader::UpdateUniforms(u32 vertType, const ShaderID &vsid) {
}
if (dirty & DIRTY_BEZIERSPLINE) {
if (u_spline_tess != -1) {
int tess[] = { gstate_c.spline_tess_u, gstate_c.spline_tess_v };
render_->SetUniformI(&u_spline_tess, 2, tess);
}
if (u_spline_counts != -1) {
int counts[] = { gstate_c.spline_num_patches_u, gstate_c.spline_num_points_u };
render_->SetUniformI(&u_spline_counts, 2, counts);
render_->SetUniformI1(&u_spline_counts, gstate_c.spline_num_points_u);
}
}
}
@@ -121,7 +121,6 @@ class LinkedShader {
int u_tess_points; // Control Points
int u_tess_weights_u;
int u_tess_weights_v;
int u_spline_tess;
int u_spline_counts;
};
@@ -384,8 +384,7 @@ void GenerateVertexShader(const VShaderID &id, char *buffer, uint32_t *attrMask,
WRITE(p, "uniform sampler2D u_tess_weights_u;\n");
WRITE(p, "uniform sampler2D u_tess_weights_v;\n");
WRITE(p, "uniform ivec2 u_spline_counts;\n");
WRITE(p, "uniform ivec2 u_spline_tess;\n");
WRITE(p, "uniform int u_spline_counts;\n");
for (int i = 2; i <= 4; i++) {
// Define 3 types vec2, vec3, vec4
@@ -416,26 +415,17 @@ void GenerateVertexShader(const VShaderID &id, char *buffer, uint32_t *attrMask,
WRITE(p, "};\n");
WRITE(p, "void tessellate(out Tess tess) {\n");
WRITE(p, " int spline_num_patches_u = u_spline_counts[0];\n");
WRITE(p, " int spline_num_points_u = u_spline_counts[1];\n");
// Calculate current patch position and vertex position(index for the weights)
WRITE(p, " int u = gl_InstanceID %% spline_num_patches_u;\n");
WRITE(p, " int v = gl_InstanceID / spline_num_patches_u;\n");
WRITE(p, " ivec2 patch_pos = ivec2(u, v);\n");
WRITE(p, " ivec2 vertex_pos = ivec2(position.xy);\n");
if (doSpline) {
WRITE(p, " bvec2 isFirstEdge = not(bvec2(vertex_pos));\n"); // vertex_pos == 0
WRITE(p, " bvec2 isNotFirstPatch = bvec2(patch_pos);\n"); // patch_pos > 0
WRITE(p, " vertex_pos += patch_pos * u_spline_tess;\n");
}
WRITE(p, " ivec2 point_pos = ivec2(position.z, normal.z)%s;\n", doBezier ? " * 3" : "");
WRITE(p, " ivec2 weight_idx = ivec2(position.xy);\n");
// Load 4x4 control points
WRITE(p, " vec3 _pos[16];\n");
WRITE(p, " vec2 _tex[16];\n");
WRITE(p, " vec4 _col[16];\n");
WRITE(p, " int index;\n");
for (int i = 0; i < 4; i++) {
for (int j = 0; j < 4; j++) {
WRITE(p, " index = (%i + v%s) * spline_num_points_u + (%i + u%s);\n", i, doBezier ? " * 3" : "", j, doBezier ? " * 3" : "");
WRITE(p, " index = (%i + point_pos.y) * u_spline_counts + (%i + point_pos.x);\n", i, j);
WRITE(p, " _pos[%i] = %s(u_tess_points, ivec2(index, 0), 0).xyz;\n", i * 4 + j, texelFetch);
if (doTexture && hasTexcoordTess)
WRITE(p, " _tex[%i] = %s(u_tess_points, ivec2(index, 1), 0).xy;\n", i * 4 + j, texelFetch);
@@ -445,14 +435,8 @@ void GenerateVertexShader(const VShaderID &id, char *buffer, uint32_t *attrMask,
}
// Basis polynomials as weight coefficients
WRITE(p, " vec4 basis_u = %s(u_tess_weights_u, %s, 0);\n", texelFetch, "ivec2(vertex_pos.x * 2, 0)");
WRITE(p, " vec4 basis_v = %s(u_tess_weights_v, %s, 0);\n", texelFetch, "ivec2(vertex_pos.y * 2, 0)");
if (doSpline) {
WRITE(p, " if (isFirstEdge.x && isNotFirstPatch.x)\n");
WRITE(p, " basis_u = vec4(basis_u.yzw, 0);\n");
WRITE(p, " if (isFirstEdge.y && isNotFirstPatch.y)\n");
WRITE(p, " basis_v = vec4(basis_v.yzw, 0);\n");
}
WRITE(p, " vec4 basis_u = %s(u_tess_weights_u, %s, 0);\n", texelFetch, "ivec2(weight_idx.x * 2, 0)");
WRITE(p, " vec4 basis_v = %s(u_tess_weights_v, %s, 0);\n", texelFetch, "ivec2(weight_idx.y * 2, 0)");
WRITE(p, " mat4 basis = outerProduct(basis_u, basis_v);\n");
// Tessellate
@@ -461,22 +445,16 @@ void GenerateVertexShader(const VShaderID &id, char *buffer, uint32_t *attrMask,
if (hasTexcoordTess)
WRITE(p, " tess.tex = tess_sample(_tex, basis);\n");
else
WRITE(p, " tess.tex = normal.xy + vec2(patch_pos);\n");
WRITE(p, " tess.tex = normal.xy;\n");
}
if (hasColorTess)
WRITE(p, " tess.col = tess_sample(_col, basis);\n");
else
WRITE(p, " tess.col = u_matambientalpha;\n");
if (hasNormalTess) {
// Derivatives as weight coefficients
WRITE(p, " vec4 deriv_u = %s(u_tess_weights_u, %s, 0);\n", texelFetch, "ivec2(vertex_pos.x * 2 + 1, 0)");
WRITE(p, " vec4 deriv_v = %s(u_tess_weights_v, %s, 0);\n", texelFetch, "ivec2(vertex_pos.y * 2 + 1, 0)");
if (doSpline) {
WRITE(p, " if (isFirstEdge.x && isNotFirstPatch.x)\n");
WRITE(p, " deriv_u = vec4(deriv_u.yzw, 0);\n");
WRITE(p, " if (isFirstEdge.y && isNotFirstPatch.y)\n");
WRITE(p, " deriv_v = vec4(deriv_v.yzw, 0);\n");
}
WRITE(p, " vec4 deriv_u = %s(u_tess_weights_u, %s, 0);\n", texelFetch, "ivec2(weight_idx.x * 2 + 1, 0)");
WRITE(p, " vec4 deriv_v = %s(u_tess_weights_v, %s, 0);\n", texelFetch, "ivec2(weight_idx.y * 2 + 1, 0)");
WRITE(p, " vec3 du = tess_sample(_pos, outerProduct(deriv_u, basis_v));\n");
WRITE(p, " vec3 dv = tess_sample(_pos, outerProduct(basis_u, deriv_v));\n");
@@ -845,8 +845,7 @@ void DrawEngineVulkan::DoFlush() {
if (useElements) {
if (!ibuf)
ibOffset = (uint32_t)frame->pushIndex->Push(decIndex, sizeof(uint16_t) * indexGen.VertexCount(), &ibuf);
int numInstances = tess ? numPatches : 1;
renderManager->DrawIndexed(pipelineLayout_, ds, ARRAY_SIZE(dynamicUBOOffsets), dynamicUBOOffsets, vbuf, vbOffset, ibuf, ibOffset, vertexCount, numInstances, VK_INDEX_TYPE_UINT16);
renderManager->DrawIndexed(pipelineLayout_, ds, ARRAY_SIZE(dynamicUBOOffsets), dynamicUBOOffsets, vbuf, vbOffset, ibuf, ibOffset, vertexCount, 1, VK_INDEX_TYPE_UINT16);
} else {
renderManager->Draw(pipelineLayout_, ds, ARRAY_SIZE(dynamicUBOOffsets), dynamicUBOOffsets, vbuf, vbOffset, vertexCount);
}
Oops, something went wrong.

0 comments on commit 89786b9

Please sign in to comment.