Permalink
Browse files

Merge pull request #9544 from hrydgard/state-optimizations-2

State optimizations 2
  • Loading branch information...
hrydgard committed Aug 14, 2017
2 parents 945f57a + ca40282 commit 00f57a5d4cac9132c2731c3892c0a48ad19fc343
@@ -302,7 +302,7 @@ VirtualFramebuffer *FramebufferManagerCommon::DoSetRenderFrameBuffer(const Frame
int drawing_width, drawing_height;
EstimateDrawingSize(params.fb_address, params.fmt, params.viewportWidth, params.viewportHeight, params.regionWidth, params.regionHeight, params.scissorWidth, params.scissorHeight, std::max(params.fb_stride, 4), drawing_width, drawing_height);
gstate_c.curRTOffsetX = 0;
gstate_c.SetCurRTOffsetX(0);
bool vfbFormatChanged = false;
// Find a matching framebuffer
@@ -335,7 +335,7 @@ VirtualFramebuffer *FramebufferManagerCommon::DoSetRenderFrameBuffer(const Frame
if (v->format == params.fmt && v->fb_stride == params.fb_stride && x_offset < params.fb_stride && v->height >= drawing_height) {
WARN_LOG_REPORT_ONCE(renderoffset, HLE, "Rendering to framebuffer offset: %08x +%dx%d", v->fb_address, x_offset, 0);
vfb = v;
gstate_c.curRTOffsetX = x_offset;
gstate_c.SetCurRTOffsetX(x_offset);
vfb->width = std::max((int)vfb->width, x_offset + drawing_width);
// To prevent the newSize code from being confused.
drawing_width += x_offset;
@@ -633,6 +633,8 @@ void FramebufferManagerCommon::NotifyVideoUpload(u32 addr, int size, int width,
DEBUG_LOG(ME, "Changing stride for %08x from %d to %d", addr, vfb->fb_stride, width);
const int bpp = fmt == GE_FORMAT_8888 ? 4 : 2;
ResizeFramebufFBO(vfb, width, size / (bpp * width));
// Resizing may change the viewport/etc.
gstate_c.Dirty(DIRTY_VIEWPORTSCISSOR_STATE);
vfb->fb_stride = width;
// This might be a bit wider than necessary, but we'll redetect on next render.
vfb->width = width;
@@ -674,6 +676,8 @@ void FramebufferManagerCommon::UpdateFromMemory(u32 addr, int size, bool safe) {
RebindFramebuffer();
}
// TODO: Necessary?
gstate_c.Dirty(DIRTY_FRAGMENTSHADER_STATE);
}
void FramebufferManagerCommon::DrawPixels(VirtualFramebuffer *vfb, int dstX, int dstY, const u8 *srcPixels, GEBufferFormat srcPixelFormat, int srcStride, int width, int height) {
@@ -779,6 +783,8 @@ void FramebufferManagerCommon::DrawFramebufferToOutput(const u8 *srcPixels, GEBu
SetViewport2D(0, 0, pixelWidth_, pixelHeight_);
DrawActiveTexture(x, y, w, h, (float)pixelWidth_, (float)pixelHeight_, u0, v0, u1, v1, uvRotation, flags);
}
gstate_c.Dirty(DIRTY_BLEND_STATE | DIRTY_DEPTHSTENCIL_STATE | DIRTY_RASTER_STATE);
}
void FramebufferManagerCommon::DownloadFramebufferOnSwitch(VirtualFramebuffer *vfb) {
@@ -1645,6 +1651,8 @@ void FramebufferManagerCommon::NotifyBlockTransferAfter(u32 dstBasePtr, int dstS
dstBuffer->newWidth = std::max(dstWidth, (int)dstBuffer->width);
dstBuffer->newHeight = std::max(dstHeight, (int)dstBuffer->height);
dstBuffer->lastFrameNewSize = gpuStats.numFlips;
// Resizing may change the viewport/etc.
gstate_c.Dirty(DIRTY_VIEWPORTSCISSOR_STATE);
}
DrawPixels(dstBuffer, static_cast<int>(dstX * dstXFactor), dstY, srcBase, dstBuffer->format, static_cast<int>(srcStride * dstXFactor), static_cast<int>(dstWidth * dstXFactor), dstHeight);
SetColorUpdated(dstBuffer, skipDrawReason);
@@ -33,7 +33,6 @@
#include "GPU/Common/GPUStateUtils.h"
bool CanUseHardwareTransform(int prim) {
if (!g_Config.bHardwareTransform)
return false;
@@ -100,6 +100,14 @@ enum : uint64_t {
DIRTY_TEXTURE_IMAGE = 1ULL << 41,
DIRTY_TEXTURE_PARAMS = 1ULL << 42,
// Render State
DIRTY_BLEND_STATE = 1ULL << 43,
DIRTY_DEPTHSTENCIL_STATE = 1ULL << 44,
DIRTY_RASTER_STATE = 1ULL << 45,
DIRTY_VIEWPORTSCISSOR_STATE = 1ULL << 46,
DIRTY_VERTEXSHADER_STATE = 1ULL << 47,
DIRTY_FRAGMENTSHADER_STATE = 1ULL << 48,
DIRTY_ALL = 0xFFFFFFFFFFFFFFFF
};
View
@@ -236,6 +236,7 @@ void ComputeFragmentShaderID(ShaderID *id_out) {
ReplaceAlphaType stencilToAlpha = ReplaceAlphaWithStencil(replaceBlend);
// All texfuncs except replace are the same for RGB as for RGBA with full alpha.
// Note that checking this means that we must dirty the fragment shader ID whenever textureFullAlpha changes.
if (gstate_c.textureFullAlpha && gstate.getTextureFunction() != GE_TEXFUNC_REPLACE)
doTextureAlpha = false;
@@ -272,6 +272,9 @@ void TextureCacheCommon::SetTexture(bool force) {
TexCacheEntry *entry = nullptr;
gstate_c.SetNeedShaderTexclamp(false);
gstate_c.skipDrawReason &= ~SKIPDRAW_BAD_FB_TEXTURE;
if (gstate_c.bgraTexture != isBgraBackend_) {
gstate_c.Dirty(DIRTY_FRAGMENTSHADER_STATE);
}
gstate_c.bgraTexture = isBgraBackend_;
if (iter != cache_.end()) {
@@ -759,6 +762,11 @@ void TextureCacheCommon::SetTextureFramebuffer(TexCacheEntry *entry, VirtualFram
// We need to force it, since we may have set it on a texture before attaching.
gstate_c.curTextureWidth = framebuffer->bufferWidth;
gstate_c.curTextureHeight = framebuffer->bufferHeight;
if (gstate_c.bgraTexture) {
gstate_c.Dirty(DIRTY_FRAGMENTSHADER_STATE);
} else if ((gstate_c.curTextureXOffset == 0) != (fbInfo.xOffset == 0) || (gstate_c.curTextureYOffset == 0) != (fbInfo.yOffset == 0)) {
gstate_c.Dirty(DIRTY_FRAGMENTSHADER_STATE);
}
gstate_c.bgraTexture = false;
gstate_c.curTextureXOffset = fbInfo.xOffset;
gstate_c.curTextureYOffset = fbInfo.yOffset;
@@ -119,7 +119,8 @@ void DrawEngineD3D11::ClearTrackedVertexArrays() {
void DrawEngineD3D11::ClearInputLayoutMap() {
for (auto &decl : inputLayoutMap_) {
decl.second->Release();
if (decl.second)
decl.second->Release();
}
inputLayoutMap_.clear();
}
@@ -211,6 +211,7 @@ void FramebufferManagerD3D11::DisableState() {
context_->OMSetBlendState(stockD3D11.blendStateDisabledWithColorMask[0xF], nullptr, 0xFFFFFFFF);
context_->RSSetState(stockD3D11.rasterStateNoCull);
context_->OMSetDepthStencilState(stockD3D11.depthStencilDisabled, 0xFF);
gstate_c.Dirty(DIRTY_BLEND_STATE | DIRTY_DEPTHSTENCIL_STATE | DIRTY_RASTER_STATE);
}
void FramebufferManagerD3D11::CompilePostShader() {
@@ -432,6 +433,7 @@ void FramebufferManagerD3D11::DrawActiveTexture(float x, float y, float w, float
UINT offset = 0;
context_->IASetVertexBuffers(0, 1, &quadBuffer_, &stride, &offset);
context_->Draw(4, 0);
gstate_c.Dirty(DIRTY_BLEND_STATE | DIRTY_DEPTHSTENCIL_STATE | DIRTY_RASTER_STATE);
}
void FramebufferManagerD3D11::Bind2DShader() {
@@ -465,6 +467,8 @@ void FramebufferManagerD3D11::BindPostShader(const PostShaderUniforms &uniforms)
context_->Unmap(postConstants_, 0);
context_->VSSetConstantBuffers(0, 1, &postConstants_); // Probably not necessary
context_->PSSetConstantBuffers(0, 1, &postConstants_);
gstate_c.Dirty(DIRTY_VERTEXSHADER_STATE);
}
void FramebufferManagerD3D11::RebindFramebuffer() {
@@ -505,9 +509,12 @@ void FramebufferManagerD3D11::ReformatFramebufferFrom(VirtualFramebuffer *vfb, G
context_->RSSetViewports(1, &vp);
context_->IASetPrimitiveTopology(D3D11_PRIMITIVE_TOPOLOGY_TRIANGLESTRIP);
context_->Draw(4, 0);
gstate_c.Dirty(DIRTY_BLEND_STATE | DIRTY_RASTER_STATE | DIRTY_DEPTHSTENCIL_STATE | DIRTY_VIEWPORTSCISSOR_STATE);
}
RebindFramebuffer();
gstate_c.Dirty(DIRTY_BLEND_STATE | DIRTY_DEPTHSTENCIL_STATE | DIRTY_RASTER_STATE | DIRTY_VERTEXSHADER_STATE);
}
static void CopyPixelDepthOnly(u32 *dstp, const u32 *srcp, size_t c) {
@@ -709,6 +716,8 @@ void FramebufferManagerD3D11::SimpleBlit(
UINT offset = 0;
context_->IASetVertexBuffers(0, 1, &quadBuffer_, &stride, &offset);
context_->Draw(4, 0);
gstate_c.Dirty(DIRTY_BLEND_STATE | DIRTY_DEPTHSTENCIL_STATE | DIRTY_RASTER_STATE | DIRTY_VIEWPORTSCISSOR_STATE | DIRTY_VERTEXSHADER_STATE);
}
void FramebufferManagerD3D11::BlitFramebuffer(VirtualFramebuffer *dst, int dstX, int dstY, VirtualFramebuffer *src, int srcX, int srcY, int w, int h, int bpp) {
View
@@ -77,7 +77,7 @@ static const D3D11CommandTableEntry commandTable[] = {
// Changes that dirty the current texture.
{ GE_CMD_TEXSIZE0, FLAG_FLUSHBEFOREONCHANGE | FLAG_EXECUTE, 0, &GPU_D3D11::Execute_TexSize0 },
{ GE_CMD_STENCILTEST, FLAG_FLUSHBEFOREONCHANGE, DIRTY_STENCILREPLACEVALUE | DIRTY_FOGCOEF }, // These are combined in D3D11
{ GE_CMD_STENCILTEST, FLAG_FLUSHBEFOREONCHANGE, DIRTY_STENCILREPLACEVALUE | DIRTY_FOGCOEF | DIRTY_BLEND_STATE | DIRTY_DEPTHSTENCIL_STATE }, // These are combined in D3D11
// Changing the vertex type requires us to flush.
{ GE_CMD_VERTEXTYPE, FLAG_FLUSHBEFOREONCHANGE | FLAG_EXECUTEONCHANGE, 0, &GPU_D3D11::Execute_VertexType },
@@ -445,12 +445,18 @@ void GPU_D3D11::ExecuteOp(u32 op, u32 diff) {
}
void GPU_D3D11::Execute_VertexType(u32 op, u32 diff) {
if (diff)
gstate_c.Dirty(DIRTY_VERTEXSHADER_STATE);
if (diff & (GE_VTYPE_TC_MASK | GE_VTYPE_THROUGH_MASK)) {
gstate_c.Dirty(DIRTY_UVSCALEOFFSET);
if (diff & GE_VTYPE_THROUGH_MASK)
gstate_c.Dirty(DIRTY_RASTER_STATE | DIRTY_VIEWPORTSCISSOR_STATE | DIRTY_FRAGMENTSHADER_STATE);
}
}
void GPU_D3D11::Execute_VertexTypeSkinning(u32 op, u32 diff) {
if (diff)
gstate_c.Dirty(DIRTY_VERTEXSHADER_STATE);
// Don't flush when weight count changes, unless morph is enabled.
if ((diff & ~GE_VTYPE_WEIGHTCOUNT_MASK) || (op & GE_VTYPE_MORPHCOUNT_MASK) != 0) {
// Restore and flush
@@ -466,6 +472,8 @@ void GPU_D3D11::Execute_VertexTypeSkinning(u32 op, u32 diff) {
gstate_c.deferredVertTypeDirty = 0;
}
}
if (diff & GE_VTYPE_THROUGH_MASK)
gstate_c.Dirty(DIRTY_RASTER_STATE | DIRTY_VIEWPORTSCISSOR_STATE | DIRTY_FRAGMENTSHADER_STATE);
}
void GPU_D3D11::Execute_Prim(u32 op, u32 diff) {
@@ -474,12 +482,11 @@ void GPU_D3D11::Execute_Prim(u32 op, u32 diff) {
u32 data = op & 0xFFFFFF;
u32 count = data & 0xFFFF;
// Upper bits are ignored.
GEPrimitiveType prim = static_cast<GEPrimitiveType>((data >> 16) & 7);
if (count == 0)
return;
// Upper bits are ignored.
GEPrimitiveType prim = static_cast<GEPrimitiveType>((data >> 16) & 7);
SetDrawType(DRAW_PRIM, prim);
// Discard AA lines as we can't do anything that makes sense with these anyway. The SW plugin might, though.
@@ -581,6 +588,7 @@ void GPU_D3D11::Execute_Bezier(u32 op, u32 diff) {
bool patchFacing = gstate.patchfacing & 1;
if (g_Config.bHardwareTessellation && g_Config.bHardwareTransform && !g_Config.bSoftwareRendering) {
gstate_c.Dirty(DIRTY_VERTEXSHADER_STATE);
gstate_c.bezier = true;
if (gstate_c.spline_count_u != bz_ucount) {
gstate_c.Dirty(DIRTY_BEZIERSPLINE);
@@ -591,6 +599,8 @@ void GPU_D3D11::Execute_Bezier(u32 op, u32 diff) {
int bytesRead = 0;
drawEngine_.SubmitBezier(control_points, indices, gstate.getPatchDivisionU(), gstate.getPatchDivisionV(), bz_ucount, bz_vcount, patchPrim, computeNormals, patchFacing, gstate.vertType, &bytesRead);
if (gstate_c.bezier)
gstate_c.Dirty(DIRTY_VERTEXSHADER_STATE);
gstate_c.bezier = false;
// After drawing, we advance pointers - see SubmitPrim which does the same.
@@ -642,6 +652,7 @@ void GPU_D3D11::Execute_Spline(u32 op, u32 diff) {
u32 vertType = gstate.vertType;
if (g_Config.bHardwareTessellation && g_Config.bHardwareTransform && !g_Config.bSoftwareRendering) {
gstate_c.Dirty(DIRTY_VERTEXSHADER_STATE);
gstate_c.spline = true;
bool countsChanged = gstate_c.spline_count_u != sp_ucount || gstate_c.spline_count_v != sp_vcount;
bool typesChanged = gstate_c.spline_type_u != sp_utype || gstate_c.spline_type_v != sp_vtype;
@@ -656,6 +667,8 @@ void GPU_D3D11::Execute_Spline(u32 op, u32 diff) {
int bytesRead = 0;
drawEngine_.SubmitSpline(control_points, indices, gstate.getPatchDivisionU(), gstate.getPatchDivisionV(), sp_ucount, sp_vcount, sp_utype, sp_vtype, patchPrim, computeNormals, patchFacing, vertType, &bytesRead);
if (gstate_c.spline)
gstate_c.Dirty(DIRTY_VERTEXSHADER_STATE);
gstate_c.spline = false;
// After drawing, we advance pointers - see SubmitPrim which does the same.
@@ -139,6 +139,7 @@ void ShaderManagerD3D11::DirtyLastShader() {
lastVSID_.clear();
lastVShader_ = nullptr;
lastFShader_ = nullptr;
gstate_c.Dirty(DIRTY_VERTEXSHADER_STATE|DIRTY_FRAGMENTSHADER_STATE);
}
uint64_t ShaderManagerD3D11::UpdateUniforms() {
@@ -178,8 +179,20 @@ void ShaderManagerD3D11::BindUniforms() {
void ShaderManagerD3D11::GetShaders(int prim, u32 vertType, D3D11VertexShader **vshader, D3D11FragmentShader **fshader, bool useHWTransform) {
ShaderID VSID;
ShaderID FSID;
ComputeVertexShaderID(&VSID, vertType, useHWTransform);
ComputeFragmentShaderID(&FSID);
if (gstate_c.IsDirty(DIRTY_VERTEXSHADER_STATE)) {
gstate_c.Clean(DIRTY_VERTEXSHADER_STATE);
ComputeVertexShaderID(&VSID, vertType, useHWTransform);
} else {
VSID = lastVSID_;
}
if (gstate_c.IsDirty(DIRTY_FRAGMENTSHADER_STATE)) {
gstate_c.Clean(DIRTY_FRAGMENTSHADER_STATE);
ComputeFragmentShaderID(&FSID);
} else {
FSID = lastFSID_;
}
// Just update uniforms if this is the same shader as last time.
if (lastVShader_ != nullptr && lastFShader_ != nullptr && VSID == lastVSID_ && FSID == lastFSID_) {
@@ -134,8 +134,15 @@ class ShaderManagerD3D11;
void DrawEngineD3D11::ApplyDrawState(int prim) {
bool useBufferedRendering = g_Config.iRenderingMode != FB_NON_BUFFERED_MODE;
dynState_.topology = primToD3D11[prim];
if (!gstate_c.IsDirty(DIRTY_BLEND_STATE | DIRTY_TEXTURE_IMAGE | DIRTY_TEXTURE_PARAMS | DIRTY_VIEWPORTSCISSOR_STATE | DIRTY_DEPTHRANGE | DIRTY_RASTER_STATE)) {
// nothing to do
return;
}
// Blend
{
if (gstate_c.IsDirty(DIRTY_BLEND_STATE)) {
gstate_c.SetAllowShaderBlend(!g_Config.bDisableSlowFramebufEffects);
if (gstate.isModeClear()) {
keys_.blend.value = 0; // full wipe
@@ -274,7 +281,7 @@ void DrawEngineD3D11::ApplyDrawState(int prim) {
}
}
{
if (gstate_c.IsDirty(DIRTY_RASTER_STATE)) {
keys_.raster.value = 0;
if (gstate.isModeClear()) {
keys_.raster.cullMode = D3D11_CULL_NONE;
@@ -300,8 +307,7 @@ void DrawEngineD3D11::ApplyDrawState(int prim) {
rasterState_ = rs;
}
{
// Set ColorMask/Stencil/Depth
if (gstate_c.IsDirty(DIRTY_DEPTHSTENCIL_STATE)) {
if (gstate.isModeClear()) {
keys_.depthStencil.value = 0;
keys_.depthStencil.depthTestEnable = true;
@@ -388,7 +394,7 @@ void DrawEngineD3D11::ApplyDrawState(int prim) {
depthStencilState_ = ds;
}
{
if (gstate_c.IsDirty(DIRTY_VIEWPORTSCISSOR_STATE)) {
ViewportAndScissor vpAndScissor;
ConvertViewportAndScissor(useBufferedRendering,
framebufferManager_->GetRenderWidth(), framebufferManager_->GetRenderHeight(),
@@ -429,8 +435,6 @@ void DrawEngineD3D11::ApplyDrawState(int prim) {
}
}
dynState_.topology = primToD3D11[prim];
if (gstate_c.IsDirty(DIRTY_TEXTURE_IMAGE | DIRTY_TEXTURE_PARAMS) && !gstate.isModeClear() && gstate.isTextureMapEnabled()) {
textureCache_->SetTexture();
gstate_c.Clean(DIRTY_TEXTURE_IMAGE | DIRTY_TEXTURE_PARAMS);
@@ -458,13 +462,22 @@ void DrawEngineD3D11::ApplyDrawStateLate(bool applyStencilRef, uint8_t stencilRe
Uint8x4ToFloat4(blendColor, dynState_.blendColor);
// we go through Draw here because it automatically handles screen rotation, as needed in UWP on mobiles.
draw_->SetViewports(1, &dynState_.viewport);
draw_->SetScissorRect(dynState_.scissor.left, dynState_.scissor.top, dynState_.scissor.right - dynState_.scissor.left, dynState_.scissor.bottom - dynState_.scissor.top);
context_->RSSetState(rasterState_);
if (device1_) {
context1_->OMSetBlendState(blendState1_, blendColor, 0xFFFFFFFF);
} else {
context_->OMSetBlendState(blendState_, blendColor, 0xFFFFFFFF);
if (gstate_c.IsDirty(DIRTY_VIEWPORTSCISSOR_STATE)) {
draw_->SetViewports(1, &dynState_.viewport);
draw_->SetScissorRect(dynState_.scissor.left, dynState_.scissor.top, dynState_.scissor.right - dynState_.scissor.left, dynState_.scissor.bottom - dynState_.scissor.top);
}
context_->OMSetDepthStencilState(depthStencilState_, applyStencilRef ? stencilRef : dynState_.stencilRef);
}
if (gstate_c.IsDirty(DIRTY_RASTER_STATE)) {
context_->RSSetState(rasterState_);
}
if (gstate_c.IsDirty(DIRTY_BLEND_STATE)) {
if (device1_) {
context1_->OMSetBlendState(blendState1_, blendColor, 0xFFFFFFFF);
} else {
context_->OMSetBlendState(blendState_, blendColor, 0xFFFFFFFF);
}
}
if (gstate_c.IsDirty(DIRTY_DEPTHSTENCIL_STATE)) {
context_->OMSetDepthStencilState(depthStencilState_, applyStencilRef ? stencilRef : dynState_.stencilRef);
}
gstate_c.Clean(DIRTY_VIEWPORTSCISSOR_STATE | DIRTY_DEPTHSTENCIL_STATE | DIRTY_RASTER_STATE | DIRTY_BLEND_STATE);
}
@@ -163,6 +163,8 @@ bool FramebufferManagerD3D11::NotifyStencilUpload(u32 addr, int size, bool skipZ
context_->IASetVertexBuffers(0, 1, &fsQuadBuffer_, &quadStride_, &quadOffset_);
context_->IASetPrimitiveTopology(D3D11_PRIMITIVE_TOPOLOGY_TRIANGLESTRIP);
context_->Draw(4, 0);
gstate_c.Dirty(DIRTY_BLEND_STATE | DIRTY_RASTER_STATE | DIRTY_VIEWPORTSCISSOR_STATE);
return true;
}
@@ -200,6 +202,7 @@ bool FramebufferManagerD3D11::NotifyStencilUpload(u32 addr, int size, bool skipZ
}
D3D11_VIEWPORT vp{ 0.0f, 0.0f, (float)w, (float)h, 0.0f, 1.0f };
context_->RSSetViewports(1, &vp);
gstate_c.Dirty(DIRTY_VIEWPORTSCISSOR_STATE);
float coord[20] = {
-1.0f, 1.0f, 0.0f, 0.0f, 0.0f,
@@ -226,6 +229,7 @@ bool FramebufferManagerD3D11::NotifyStencilUpload(u32 addr, int size, bool skipZ
context_->IASetVertexBuffers(0, 1, &quadBuffer_, &quadStride_, &quadOffset_);
context_->PSSetSamplers(0, 1, &stockD3D11.samplerPoint2DClamp);
context_->OMSetDepthStencilState(stockD3D11.depthDisabledStencilWrite, 0xFF);
gstate_c.Dirty(DIRTY_BLEND_STATE | DIRTY_RASTER_STATE | DIRTY_DEPTHSTENCIL_STATE);
for (int i = 1; i < values; i += i) {
if (!(usedBits & i)) {
Oops, something went wrong.

0 comments on commit 00f57a5

Please sign in to comment.