Skip to content

Commit

Permalink
[spline/bezier]Improve hwtess to use cached weights.
Browse files Browse the repository at this point in the history
  • Loading branch information
xebra committed Oct 7, 2018
1 parent e5976f5 commit 3c0fb44
Show file tree
Hide file tree
Showing 19 changed files with 389 additions and 555 deletions.
9 changes: 2 additions & 7 deletions GPU/Common/DrawEngineCommon.h
Expand Up @@ -27,6 +27,7 @@
#include "GPU/Common/GPUDebugInterface.h" #include "GPU/Common/GPUDebugInterface.h"
#include "GPU/Common/IndexGenerator.h" #include "GPU/Common/IndexGenerator.h"
#include "GPU/Common/VertexDecoderCommon.h" #include "GPU/Common/VertexDecoderCommon.h"
#include "GPU/Common/SplineCommon.h"


class VertexDecoder; class VertexDecoder;


Expand Down Expand Up @@ -173,17 +174,11 @@ class DrawEngineCommon {
// Hardware tessellation // Hardware tessellation
int numPatches; int numPatches;
class TessellationDataTransfer { class TessellationDataTransfer {
protected:
// TODO: These aren't used by all backends.
int prevSize;
int prevSizeTex;
int prevSizeCol;
public: public:
virtual ~TessellationDataTransfer() {} virtual ~TessellationDataTransfer() {}
void CopyControlPoints(float *pos, float *tex, float *col, int posStride, int texStride, int colStride, const SimpleVertex *const *points, int size, u32 vertType); void CopyControlPoints(float *pos, float *tex, float *col, int posStride, int texStride, int colStride, const SimpleVertex *const *points, int size, u32 vertType);
// Send spline/bezier's control points to vertex shader through floating point texture. // Send spline/bezier's control points to vertex shader through floating point texture.
virtual void SendDataToShader(const SimpleVertex *const *points, int size, u32 vertType) = 0; virtual void SendDataToShader(const SimpleVertex *const *points, int size, u32 vertType, const Weight2D &weights) = 0;
virtual void EndFrame() {}
}; };
TessellationDataTransfer *tessDataTransfer; TessellationDataTransfer *tessDataTransfer;
}; };
2 changes: 1 addition & 1 deletion GPU/Common/ShaderUniforms.cpp
Expand Up @@ -240,7 +240,7 @@ void BaseUpdateUniforms(UB_VS_FS_Base *ub, uint64_t dirtyUniforms, bool flipView
} }


if (dirtyUniforms & DIRTY_BEZIERSPLINE) { if (dirtyUniforms & DIRTY_BEZIERSPLINE) {
ub->spline_counts = BytesToUint32(gstate_c.spline_count_u, gstate_c.spline_count_v, gstate_c.spline_type_u, gstate_c.spline_type_v); ub->spline_counts = BytesToUint32(gstate_c.spline_num_patches_u, gstate_c.spline_num_patches_v, gstate_c.spline_tess_u, gstate_c.spline_tess_v);
} }


if (dirtyUniforms & DIRTY_DEPAL) { if (dirtyUniforms & DIRTY_DEPAL) {
Expand Down
119 changes: 47 additions & 72 deletions GPU/Common/SplineCommon.cpp
Expand Up @@ -22,7 +22,6 @@


#include "Common/CPUDetect.h" #include "Common/CPUDetect.h"
#include "Common/MemoryUtil.h" #include "Common/MemoryUtil.h"
#include "Core/Config.h"


#include "GPU/Common/GPUStateUtils.h" #include "GPU/Common/GPUStateUtils.h"
#include "GPU/Common/SplineCommon.h" #include "GPU/Common/SplineCommon.h"
Expand Down Expand Up @@ -60,32 +59,28 @@ static void BuildIndex(u16 *indices, int &count, int num_u, int num_v, GEPatchPr
} }
} }


struct Weight {
float weights[4], derivs[4];
};

class Bezier3DWeight { class Bezier3DWeight {
private: private:
void CalcWeights(float t, Weight &w) { void CalcWeights(float t, Weight &w) {
// Bernstein 3D basis polynomial // Bernstein 3D basis polynomial
w.weights[0] = (1 - t) * (1 - t) * (1 - t); w.basis[0] = (1 - t) * (1 - t) * (1 - t);
w.weights[1] = 3 * t * (1 - t) * (1 - t); w.basis[1] = 3 * t * (1 - t) * (1 - t);
w.weights[2] = 3 * t * t * (1 - t); w.basis[2] = 3 * t * t * (1 - t);
w.weights[3] = t * t * t; w.basis[3] = t * t * t;


// Derivative // Derivative
w.derivs[0] = -3 * (1 - t) * (1 - t); w.deriv[0] = -3 * (1 - t) * (1 - t);
w.derivs[1] = 9 * t * t - 12 * t + 3; w.deriv[1] = 9 * t * t - 12 * t + 3;
w.derivs[2] = 3 * (2 - 3 * t) * t; w.deriv[2] = 3 * (2 - 3 * t) * t;
w.derivs[3] = 3 * t * t; w.deriv[3] = 3 * t * t;
} }
public: public:
Weight *CalcWeightsAll(u32 key) { Weight *CalcWeightsAll(u32 key) {
int tess = (int)key; int tess = (int)key;
Weight *weights = new Weight[tess + 1]; Weight *weights = new Weight[tess + 1];
const float inv_u = 1.0f / (float)tess; const float inv_tess = 1.0f / (float)tess;
for (int i = 0; i < tess + 1; ++i) { for (int i = 0; i < tess + 1; ++i) {
const float t = (float)i * inv_u; const float t = (float)i * inv_tess;
CalcWeights(t, weights[i]); CalcWeights(t, weights[i]);
} }
return weights; return weights;
Expand Down Expand Up @@ -182,10 +177,10 @@ class Spline3DWeight {
float c = (1 - f41) * (1 - f42); float c = (1 - f41) * (1 - f42);
float d = (f42 * f52); float d = (f42 * f52);
#endif #endif
w.weights[0] = a * (1 - f32); // (1-f30)*(1-f31)*(1-f32) w.basis[0] = a * (1 - f32); // (1-f30)*(1-f31)*(1-f32)
w.weights[1] = 1 - a - b + ((a + b + c - 1) * f32); w.basis[1] = 1 - a - b + ((a + b + c - 1) * f32);
w.weights[2] = b + ((1 - b - c - d) * f32); w.basis[2] = b + ((1 - b - c - d) * f32);
w.weights[3] = d * f32; // f32*f42*f52 w.basis[3] = d * f32; // f32*f42*f52


// Derivative // Derivative
float i1 = (1 - f31) * (1 - f32); float i1 = (1 - f31) * (1 - f32);
Expand All @@ -196,10 +191,10 @@ class Spline3DWeight {
float f241 = i2 * div._4_1; float f241 = i2 * div._4_1;
float f352 = i3 * div._5_2; float f352 = i3 * div._5_2;


w.derivs[0] = 3 * (0 - f130); w.deriv[0] = 3 * (0 - f130);
w.derivs[1] = 3 * (f130 - f241); w.deriv[1] = 3 * (f130 - f241);
w.derivs[2] = 3 * (f241 - f352); w.deriv[2] = 3 * (f241 - f352);
w.derivs[3] = 3 * (f352 - 0); w.deriv[3] = 3 * (f352 - 0);
} }
public: public:
Weight *CalcWeightsAll(u32 key) { Weight *CalcWeightsAll(u32 key) {
Expand Down Expand Up @@ -238,38 +233,9 @@ class Spline3DWeight {
} }
}; };


template<class T>
class WeightCache : public T {
private:
std::unordered_map<u32, Weight*> weightsCache;
public:
Weight* operator [] (u32 key) {
Weight *&weights = weightsCache[key];
if (!weights)
weights = CalcWeightsAll(key);
return weights;
}

void Clear() {
for (auto it : weightsCache)
delete[] it.second;
weightsCache.clear();
}
};

static WeightCache<Bezier3DWeight> bezierWeightsCache; static WeightCache<Bezier3DWeight> bezierWeightsCache;
static WeightCache<Spline3DWeight> splineWeightsCache; static WeightCache<Spline3DWeight> splineWeightsCache;


struct Weight2D {
const Weight *u, *v;

template<class T>
Weight2D(WeightCache<T> &cache, u32 key_u, u32 key_v) {
u = cache[key_u];
v = (key_u != key_v) ? cache[key_v] : u; // Use same weights if u == v
}
};

void DrawEngineCommon::ClearSplineBezierWeights() { void DrawEngineCommon::ClearSplineBezierWeights() {
bezierWeightsCache.Clear(); bezierWeightsCache.Clear();
splineWeightsCache.Clear(); splineWeightsCache.Clear();
Expand All @@ -293,13 +259,11 @@ static void TessellateSplinePatchHardware(u8 *&dest, u16 *indices, int &count, c
for (int tile_v = 0; tile_v < spatch.tess_v + 1; ++tile_v) { for (int tile_v = 0; tile_v < spatch.tess_v + 1; ++tile_v) {
for (int tile_u = 0; tile_u < spatch.tess_u + 1; ++tile_u) { for (int tile_u = 0; tile_u < spatch.tess_u + 1; ++tile_u) {
SimpleVertex &vert = vertices[tile_v * (spatch.tess_u + 1) + tile_u]; SimpleVertex &vert = vertices[tile_v * (spatch.tess_u + 1) + tile_u];
vert.pos.x = (float)tile_u * inv_u; vert.pos.x = (float)tile_u;
vert.pos.y = (float)tile_v * inv_v; vert.pos.y = (float)tile_v;

// For texcoord generation
// TODO: Move to shader uniform and unify this method spline and bezier if necessary. vert.nrm.x = (float)tile_u * inv_u;
// For compute normal vert.nrm.y = (float)tile_v * inv_v;
vert.nrm.x = inv_u;
vert.nrm.y = inv_v;
} }
} }


Expand Down Expand Up @@ -357,8 +321,11 @@ static void TessellateBezierPatchHardware(u8 *&dest, u16 *indices, int &count, i
for (int tile_u = 0; tile_u < tess_u + 1; ++tile_u) { for (int tile_u = 0; tile_u < tess_u + 1; ++tile_u) {
SimpleVertex &vert = vertices[tile_v * (tess_u + 1) + tile_u]; SimpleVertex &vert = vertices[tile_v * (tess_u + 1) + tile_u];


vert.pos.x = (float)tile_u * inv_u; vert.pos.x = (float)tile_u;
vert.pos.y = (float)tile_v * inv_v; vert.pos.y = (float)tile_v;
// For texcoord generation
vert.nrm.x = (float)tile_u * inv_u;
vert.nrm.y = (float)tile_v * inv_v;
} }
} }


Expand Down Expand Up @@ -433,13 +400,13 @@ class SubdivisionSurface {
const Weight &wu = weights.u[index_u]; const Weight &wu = weights.u[index_u];


// Pre-tessellate U lines // Pre-tessellate U lines
tess_pos.SampleU(wu.weights); tess_pos.SampleU(wu.basis);
if (sampleCol) if (sampleCol)
tess_col.SampleU(wu.weights); tess_col.SampleU(wu.basis);
if (sampleTex) if (sampleTex)
tess_tex.SampleU(wu.weights); tess_tex.SampleU(wu.basis);
if (sampleNrm) if (sampleNrm)
tess_nrm.SampleU(wu.derivs); tess_nrm.SampleU(wu.deriv);


for (int tile_v = 0; tile_v < tess_v; ++tile_v) { for (int tile_v = 0; tile_v < tess_v; ++tile_v) {
const int index_v = patch.GetIndexV(patch_v, tile_v); const int index_v = patch.GetIndexV(patch_v, tile_v);
Expand All @@ -448,22 +415,22 @@ class SubdivisionSurface {
SimpleVertex &vert = vertices[patch.GetIndex(index_u, index_v, patch_u, patch_v)]; SimpleVertex &vert = vertices[patch.GetIndex(index_u, index_v, patch_u, patch_v)];


// Tessellate // Tessellate
vert.pos = tess_pos.SampleV(wv.weights); vert.pos = tess_pos.SampleV(wv.basis);
if (sampleCol) { if (sampleCol) {
vert.color_32 = tess_col.SampleV(wv.weights).ToRGBA(); vert.color_32 = tess_col.SampleV(wv.basis).ToRGBA();
} else { } else {
vert.color_32 = defcolor; vert.color_32 = defcolor;
} }
if (sampleTex) { if (sampleTex) {
tess_tex.SampleV(wv.weights).Write(vert.uv); tess_tex.SampleV(wv.basis).Write(vert.uv);
} else { } else {
// Generate texcoord // Generate texcoord
vert.uv[0] = patch_u + tile_u * inv_u; vert.uv[0] = patch_u + tile_u * inv_u;
vert.uv[1] = patch_v + tile_v * inv_v; vert.uv[1] = patch_v + tile_v * inv_v;
} }
if (sampleNrm) { if (sampleNrm) {
const Vec3f derivU = tess_nrm.SampleV(wv.weights); const Vec3f derivU = tess_nrm.SampleV(wv.basis);
const Vec3f derivV = tess_pos.SampleV(wv.derivs); const Vec3f derivV = tess_pos.SampleV(wv.deriv);


vert.nrm = Cross(derivU, derivV).Normalized(useSSE4); vert.nrm = Cross(derivU, derivV).Normalized(useSSE4);
if (patch.patchFacing) if (patch.patchFacing)
Expand Down Expand Up @@ -569,7 +536,12 @@ void DrawEngineCommon::SubmitSpline(const void *control_points, const void *indi
patch.patchFacing = patchFacing; patch.patchFacing = patchFacing;


if (CanUseHardwareTessellation(prim_type)) { if (CanUseHardwareTessellation(prim_type)) {
tessDataTransfer->SendDataToShader(points, count_u * count_v, origVertType); const u32 key_u = splineWeightsCache.ToKey(tess_u, count_u, type_u);
const u32 key_v = splineWeightsCache.ToKey(tess_v, count_v, type_v);
Weight2D weights(splineWeightsCache, key_u, key_v);
weights.size_u = (count_u - 3) * tess_u + 1;
weights.size_v = (count_v - 3) * tess_v + 1;
tessDataTransfer->SendDataToShader(points, count_u * count_v, origVertType, weights);
TessellateSplinePatchHardware(dest, quadIndices_, count, patch); TessellateSplinePatchHardware(dest, quadIndices_, count, patch);
numPatches = (count_u - 3) * (count_v - 3); numPatches = (count_u - 3) * (count_v - 3);
} else { } else {
Expand Down Expand Up @@ -653,7 +625,10 @@ void DrawEngineCommon::SubmitBezier(const void *control_points, const void *indi
int num_patches_u = (count_u - 1) / 3; int num_patches_u = (count_u - 1) / 3;
int num_patches_v = (count_v - 1) / 3; int num_patches_v = (count_v - 1) / 3;
if (CanUseHardwareTessellation(prim_type)) { if (CanUseHardwareTessellation(prim_type)) {
tessDataTransfer->SendDataToShader(points, count_u * count_v, origVertType); Weight2D weights(bezierWeightsCache, tess_u, tess_v);
weights.size_u = tess_u + 1;
weights.size_v = tess_v + 1;
tessDataTransfer->SendDataToShader(points, count_u * count_v, origVertType, weights);
TessellateBezierPatchHardware(dest, inds, count, tess_u, tess_v, prim_type); TessellateBezierPatchHardware(dest, inds, count, tess_u, tess_v, prim_type);
numPatches = num_patches_u * num_patches_v; numPatches = num_patches_u * num_patches_v;
} else { } else {
Expand Down
40 changes: 39 additions & 1 deletion GPU/Common/SplineCommon.h
Expand Up @@ -16,11 +16,13 @@
// https://github.com/hrydgard/ppsspp and http://www.ppsspp.org/. // https://github.com/hrydgard/ppsspp and http://www.ppsspp.org/.


#pragma once #pragma once
#include <unordered_map>


#include "Common/CommonTypes.h" #include "Common/CommonTypes.h"
#include "Common/Swap.h" #include "Common/Swap.h"
#include "GPU/Math3D.h" #include "GPU/Math3D.h"
#include "GPU/ge_constants.h" #include "GPU/ge_constants.h"
#include "Core/Config.h"


#define HALF_CEIL(x) (x + 1) / 2 // Integer ceil = (int)ceil((float)x / 2.0f) #define HALF_CEIL(x) (x + 1) / 2 // Integer ceil = (int)ceil((float)x / 2.0f)


Expand Down Expand Up @@ -157,7 +159,43 @@ struct SplinePatchLocal {
} }
}; };


bool CanUseHardwareTessellation(GEPatchPrimType prim);void TessellateSplinePatch(u8 *&dest, u16 *indices, int &count, SplinePatchLocal &spatch, u32 origVertType, int maxVertices);void TessellateBezierPatch(u8 *&dest, u16 *&indices, int &count, int tess_u, int tess_v, const BezierPatch &patch, u32 origVertType, int maxVertices); struct Weight {
float basis[4], deriv[4];
};

template<class T>
class WeightCache : public T {
private:
std::unordered_map<u32, Weight*> weightsCache;
public:
Weight* operator [] (u32 key) {
Weight *&weights = weightsCache[key];
if (!weights)
weights = CalcWeightsAll(key);
return weights;
}

void Clear() {
for (auto it : weightsCache)
delete[] it.second;
weightsCache.clear();
}
};

struct Weight2D {
const Weight *u, *v;
int size_u, size_v;

template<class T>
Weight2D(WeightCache<T> &cache, u32 key_u, u32 key_v) {
u = cache[key_u];
v = (key_u != key_v) ? cache[key_v] : u; // Use same weights if u == v
}
};

bool CanUseHardwareTessellation(GEPatchPrimType prim);
void TessellateSplinePatch(u8 *&dest, u16 *indices, int &count, SplinePatchLocal &spatch, u32 origVertType, int maxVertices);
void TessellateBezierPatch(u8 *&dest, u16 *&indices, int &count, int tess_u, int tess_v, const BezierPatch &patch, u32 origVertType, int maxVertices);


#define TEMPLATE_PARAMETER_DISPATCHER(NAME, FUNCNAME) \ #define TEMPLATE_PARAMETER_DISPATCHER(NAME, FUNCNAME) \
template<typename Func, int NumParams> \ template<typename Func, int NumParams> \
Expand Down
55 changes: 46 additions & 9 deletions GPU/D3D11/DrawEngineD3D11.cpp
Expand Up @@ -692,7 +692,7 @@ void DrawEngineD3D11::DoFlush() {
GPUDebug::NotifyDraw(); GPUDebug::NotifyDraw();
} }


void DrawEngineD3D11::TessellationDataTransferD3D11::SendDataToShader(const SimpleVertex *const *points, int size, u32 vertType) { void DrawEngineD3D11::TessellationDataTransferD3D11::SendDataToShader(const SimpleVertex *const *points, int size, u32 vertType, const Weight2D &weights) {
struct TessData { struct TessData {
float pos[3]; float pad1; float pos[3]; float pad1;
float uv[2]; float pad2[2]; float uv[2]; float pad2[2];
Expand All @@ -701,19 +701,19 @@ void DrawEngineD3D11::TessellationDataTransferD3D11::SendDataToShader(const Simp


if (prevSize < size) { if (prevSize < size) {
prevSize = size; prevSize = size;
if (buf) { if (buf[0]) {
buf->Release(); buf[0]->Release();
view->Release(); view[0]->Release();
} }
desc.ByteWidth = size * sizeof(TessData); desc.ByteWidth = size * sizeof(TessData);
desc.StructureByteStride = sizeof(TessData); desc.StructureByteStride = sizeof(TessData);


device_->CreateBuffer(&desc, nullptr, &buf); device_->CreateBuffer(&desc, nullptr, &buf[0]);
device_->CreateShaderResourceView(buf, 0, &view); device_->CreateShaderResourceView(buf[0], nullptr, &view[0]);
context_->VSSetShaderResources(0, 1, &view); context_->VSSetShaderResources(0, 1, &view[0]);
} }
D3D11_MAPPED_SUBRESOURCE map; D3D11_MAPPED_SUBRESOURCE map;
context_->Map(buf, 0, D3D11_MAP_WRITE_DISCARD, 0, &map); context_->Map(buf[0], 0, D3D11_MAP_WRITE_DISCARD, 0, &map);
uint8_t *data = (uint8_t *)map.pData; uint8_t *data = (uint8_t *)map.pData;


float *pos = (float *)(data); float *pos = (float *)(data);
Expand All @@ -725,5 +725,42 @@ void DrawEngineD3D11::TessellationDataTransferD3D11::SendDataToShader(const Simp


CopyControlPoints(pos, tex, col, stride, stride, stride, points, size, vertType); CopyControlPoints(pos, tex, col, stride, stride, stride, points, size, vertType);


context_->Unmap(buf, 0); context_->Unmap(buf[0], 0);


// Weights U
if (prevSizeWeights[0] < weights.size_u) {
prevSizeWeights[0] = weights.size_u;
if (buf[1]) {
buf[1]->Release();
view[1]->Release();
}
desc.ByteWidth = weights.size_u * sizeof(Weight);
desc.StructureByteStride = sizeof(Weight);

device_->CreateBuffer(&desc, nullptr, &buf[1]);
device_->CreateShaderResourceView(buf[1], nullptr, &view[1]);
context_->VSSetShaderResources(1, 1, &view[1]);
}
context_->Map(buf[1], 0, D3D11_MAP_WRITE_DISCARD, 0, &map);
memcpy(map.pData, weights.u, weights.size_u * sizeof(Weight));
context_->Unmap(buf[1], 0);

// Weights V
if (prevSizeWeights[1] < weights.size_v) {
prevSizeWeights[1] = weights.size_v;
if (buf[2]) {
buf[2]->Release();
view[2]->Release();
}
desc.ByteWidth = weights.size_v * sizeof(Weight);
desc.StructureByteStride = sizeof(Weight);

device_->CreateBuffer(&desc, nullptr, &buf[2]);
device_->CreateShaderResourceView(buf[2], nullptr, &view[2]);
context_->VSSetShaderResources(2, 1, &view[2]);
}
context_->Map(buf[2], 0, D3D11_MAP_WRITE_DISCARD, 0, &map);
memcpy(map.pData, weights.v, weights.size_v * sizeof(Weight));
context_->Unmap(buf[2], 0);
} }

0 comments on commit 3c0fb44

Please sign in to comment.