Skip to content

Commit

Permalink
Merge pull request #16094 from unknownbrackets/softgpu-bloom
Browse files Browse the repository at this point in the history
softgpu: Optimize rectangle sampling/blending used in bloom
  • Loading branch information
hrydgard committed Sep 24, 2022
2 parents 7aee1f2 + c76d31d commit 895b916
Show file tree
Hide file tree
Showing 12 changed files with 87 additions and 53 deletions.
2 changes: 1 addition & 1 deletion GPU/Common/TextureCacheCommon.cpp
Expand Up @@ -2797,7 +2797,7 @@ CheckAlphaResult TextureCacheCommon::CheckCLUTAlpha(const uint8_t *pixelData, GE
// Never has any alpha.
return CHECKALPHA_FULL;
default:
return CheckAlpha32((const u32 *)pixelData, w, 0xFF000000); // note, the normal order here, unlike the 16-bit formats
return CheckAlpha32((const u32 *)pixelData, w, 0xFF000000);
}
}

Expand Down
14 changes: 0 additions & 14 deletions GPU/D3D11/TextureCacheD3D11.cpp
Expand Up @@ -453,20 +453,6 @@ DXGI_FORMAT TextureCacheD3D11::GetDestFormat(GETextureFormat format, GEPaletteFo
}
}

CheckAlphaResult TextureCacheD3D11::CheckAlpha(const u32 *pixelData, u32 dstFmt, int w) {
switch (dstFmt) {
case DXGI_FORMAT_B4G4R4A4_UNORM:
return CheckAlpha16((const u16 *)pixelData, w, 0xF000);
case DXGI_FORMAT_B5G5R5A1_UNORM:
return CheckAlpha16((const u16 *)pixelData, w, 0x8000);
case DXGI_FORMAT_B5G6R5_UNORM:
// Never has any alpha.
return CHECKALPHA_FULL;
default:
return CheckAlpha32((const u32 *)pixelData, w, 0xFF000000);
}
}

bool TextureCacheD3D11::GetCurrentTextureDebug(GPUDebugBuffer &buffer, int level) {
SetTexture();
if (!nextTexture_) {
Expand Down
1 change: 0 additions & 1 deletion GPU/D3D11/TextureCacheD3D11.h
Expand Up @@ -65,7 +65,6 @@ class TextureCacheD3D11 : public TextureCacheCommon {

private:
DXGI_FORMAT GetDestFormat(GETextureFormat format, GEPaletteFormat clutFormat) const;
static CheckAlphaResult CheckAlpha(const u32 *pixelData, u32 dstFmt, int w);
void UpdateCurrentClut(GEPaletteFormat clutFormat, u32 clutBase, bool clutIndexIsSimple) override;

void BuildTexture(TexCacheEntry *const entry) override;
Expand Down
14 changes: 0 additions & 14 deletions GPU/Directx9/TextureCacheDX9.cpp
Expand Up @@ -358,20 +358,6 @@ D3DFORMAT TextureCacheDX9::GetDestFormat(GETextureFormat format, GEPaletteFormat
}
}

CheckAlphaResult TextureCacheDX9::CheckAlpha(const u32 *pixelData, u32 dstFmt, int w) {
switch (dstFmt) {
case D3DFMT_A4R4G4B4:
return CheckAlpha16((const u16 *)pixelData, w, 0xF000);
case D3DFMT_A1R5G5B5:
return CheckAlpha16((const u16 *)pixelData, w, 0x8000);
case D3DFMT_R5G6B5:
// Never has any alpha.
return CHECKALPHA_FULL;
default:
return CheckAlpha32(pixelData, w, 0xFF000000);
}
}

bool TextureCacheDX9::GetCurrentTextureDebug(GPUDebugBuffer &buffer, int level) {
SetTexture();
ApplyTexture();
Expand Down
1 change: 0 additions & 1 deletion GPU/Directx9/TextureCacheDX9.h
Expand Up @@ -56,7 +56,6 @@ class TextureCacheDX9 : public TextureCacheCommon {
void ApplySamplingParams(const SamplerCacheKey &key) override;

D3DFORMAT GetDestFormat(GETextureFormat format, GEPaletteFormat clutFormat) const;
static CheckAlphaResult CheckAlpha(const u32 *pixelData, u32 dstFmt, int w);
void UpdateCurrentClut(GEPaletteFormat clutFormat, u32 clutBase, bool clutIndexIsSimple) override;

void BuildTexture(TexCacheEntry *const entry) override;
Expand Down
6 changes: 4 additions & 2 deletions GPU/Software/DrawPixel.cpp
Expand Up @@ -673,11 +673,13 @@ void ComputePixelBlendState(PixelBlendState &state, const PixelFuncID &id) {
state.srcColorAsFactor = true;
break;

case PixelBlendFactor::ZERO:
state.readsDstPixel = state.dstColorAsFactor || state.usesDstAlpha;
break;

default:
break;
}

state.dstColorAsFactor = state.dstColorAsFactor || state.usesDstAlpha;
}
}

Expand Down
1 change: 1 addition & 0 deletions GPU/Software/DrawPixel.h
Expand Up @@ -51,6 +51,7 @@ struct PixelBlendState {
bool dstFactorIsInverse = false;
bool srcColorAsFactor = false;
bool dstColorAsFactor = false;
bool readsDstPixel = true;
};
void ComputePixelBlendState(PixelBlendState &state, const PixelFuncID &id);

Expand Down
12 changes: 8 additions & 4 deletions GPU/Software/DrawPixelX86.cpp
Expand Up @@ -1047,7 +1047,13 @@ bool PixelJitCache::Jit_AlphaBlend(const PixelFuncID &id) {

// Step 1: Load and expand dest color.
X64Reg dstReg = regCache_.Alloc(RegCache::VEC_TEMP0);
if (id.FBFormat() == GE_FORMAT_8888) {
if (!blendState.readsDstPixel) {
// Let's load colorOff just for registers to be consistent.
X64Reg colorOff = GetColorOff(id);
regCache_.Unlock(colorOff, RegCache::GEN_COLOR_OFF);

PXOR(dstReg, R(dstReg));
} else if (id.FBFormat() == GE_FORMAT_8888) {
X64Reg colorOff = GetColorOff(id);
Describe("AlphaBlend");
MOVD_xmm(dstReg, MatR(colorOff));
Expand All @@ -1073,7 +1079,6 @@ bool PixelJitCache::Jit_AlphaBlend(const PixelFuncID &id) {

case GE_FORMAT_4444:
success = success && Jit_ConvertFrom4444(id, dstGenReg, temp1Reg, temp2Reg, blendState.usesDstAlpha);

break;

case GE_FORMAT_8888:
Expand Down Expand Up @@ -1115,7 +1120,7 @@ bool PixelJitCache::Jit_AlphaBlend(const PixelFuncID &id) {
// We also need to add a half bit later, so this gives us space.
if (multiplySrc || blendState.srcColorAsFactor)
PSLLW(argColorReg, 4);
if (multiplyDst || blendState.dstColorAsFactor)
if (multiplyDst || blendState.dstColorAsFactor || blendState.usesDstAlpha)
PSLLW(dstReg, 4);

// Okay, now grab our factors. Don't bother if they're known values.
Expand Down Expand Up @@ -1219,7 +1224,6 @@ bool PixelJitCache::Jit_AlphaBlend(const PixelFuncID &id) {
return success;
}


bool PixelJitCache::Jit_BlendFactor(const PixelFuncID &id, RegCache::Reg factorReg, RegCache::Reg dstReg, PixelBlendFactor factor) {
X64Reg idReg = INVALID_REG;
X64Reg tempReg = INVALID_REG;
Expand Down
8 changes: 8 additions & 0 deletions GPU/Software/FuncId.cpp
Expand Up @@ -168,6 +168,14 @@ void ComputePixelFuncID(PixelFuncID *id) {
id->alphaBlendDst = (uint8_t)OptimizeAlphaFactor(gstate.getFixB());
}

if (id->colorTest && gstate.getColorTestFunction() == GE_COMP_NOTEQUAL && gstate.getColorTestRef() == 0 && gstate.getColorTestMask() == 0xFFFFFF) {
if (!id->depthWrite && !id->stencilTest && id->alphaBlend && id->AlphaBlendEq() == GE_BLENDMODE_MUL_AND_ADD) {
// Might be a pointless color test (seen in Ridge Racer, for example.)
if (id->AlphaBlendDst() == PixelBlendFactor::ONE)
id->colorTest = false;
}
}

id->applyLogicOp = gstate.isLogicOpEnabled() && gstate.getLogicOp() != GE_LOGIC_COPY;
id->applyFog = gstate.isFogEnabled() && !gstate.isModeThrough();

Expand Down
66 changes: 65 additions & 1 deletion GPU/Software/Rasterizer.cpp
Expand Up @@ -143,6 +143,68 @@ void ComputeRasterizerState(RasterizerState *state) {
#endif
}

RasterizerState OptimizeFlatRasterizerState(RasterizerState state, const VertexData &v1) {
uint8_t alpha = v1.color0 >> 24;

bool changedPixelID = false;
bool changedSamplerID = false;
if (!state.pixelID.clearMode) {
auto &pixelID = state.pixelID;
auto &cached = pixelID.cached;

bool useTextureAlpha = state.enableTextures && state.samplerID.useTextureAlpha;
if (pixelID.alphaBlend && pixelID.AlphaBlendSrc() == PixelBlendFactor::SRCALPHA && !useTextureAlpha) {
// Okay, we may be able to convert this to a fixed value.
if (alpha == 0) {
pixelID.alphaBlendSrc = (uint8_t)PixelBlendFactor::ZERO;
changedPixelID = true;
} else if (alpha == 0xFF) {
pixelID.alphaBlendSrc = (uint8_t)PixelBlendFactor::ONE;
changedPixelID = true;
}
}
if (pixelID.alphaBlend && pixelID.AlphaBlendDst() == PixelBlendFactor::INVSRCALPHA && !useTextureAlpha) {
if (alpha == 0) {
pixelID.alphaBlendDst = (uint8_t)PixelBlendFactor::ONE;
changedPixelID = true;
} else if (alpha == 0xFF) {
pixelID.alphaBlendDst = (uint8_t)PixelBlendFactor::ZERO;
changedPixelID = true;
}
}
if (pixelID.alphaBlend && pixelID.AlphaBlendSrc() == PixelBlendFactor::ONE && pixelID.AlphaBlendDst() == PixelBlendFactor::ZERO) {
if (pixelID.AlphaBlendEq() == GE_BLENDMODE_MUL_AND_ADD) {
pixelID.alphaBlend = false;
changedPixelID = true;
}
}
}
if (state.enableTextures) {
if (v1.color0 == 0xFFFFFFFF) {
// Modulate is common, sometimes even with a fixed color. Replace is cheaper.
if (state.samplerID.TexFunc() == GE_TEXFUNC_MODULATE) {
state.samplerID.texFunc = (uint8_t)GE_TEXFUNC_REPLACE;
changedSamplerID = true;
}
}
}

if (changedPixelID)
state.drawPixel = Rasterizer::GetSingleFunc(state.pixelID);
if (changedSamplerID) {
state.linear = Sampler::GetLinearFunc(state.samplerID);
state.nearest = Sampler::GetNearestFunc(state.samplerID);

// Since the definitions are the same, just force this setting using the func pointer.
if (g_Config.iTexFiltering == TEX_FILTER_FORCE_LINEAR)
state.nearest = state.linear;
else if (g_Config.iTexFiltering == TEX_FILTER_FORCE_NEAREST)
state.linear = state.nearest;
}

return state;
}

static inline u8 ClampFogDepth(float fogdepth) {
union FloatBits {
float f;
Expand Down Expand Up @@ -893,7 +955,7 @@ void DrawTriangle(const VertexData &v0, const VertexData &v1, const VertexData &
drawSlice(v0, v1, v2, range.x1, range.y1, range.x2, range.y2, state);
}

void DrawRectangle(const VertexData &v0, const VertexData &v1, const BinCoords &range, const RasterizerState &state) {
void DrawRectangle(const VertexData &v0, const VertexData &v1, const BinCoords &range, const RasterizerState &rastState) {
int entireX1 = std::min(v0.screenpos.x, v1.screenpos.x);
int entireY1 = std::min(v0.screenpos.y, v1.screenpos.y);
int entireX2 = std::max(v0.screenpos.x, v1.screenpos.x) - 1;
Expand All @@ -903,6 +965,8 @@ void DrawRectangle(const VertexData &v0, const VertexData &v1, const BinCoords &
int maxX = std::min(entireX2, range.x2);
int maxY = std::min(entireY2, range.y2);

RasterizerState state = OptimizeFlatRasterizerState(rastState, v1);

Vec2f rowST(0.0f, 0.0f);
// Note: this is double the x or y movement.
Vec2f stx(0.0f, 0.0f);
Expand Down
14 changes: 0 additions & 14 deletions GPU/Vulkan/TextureCacheVulkan.cpp
Expand Up @@ -709,20 +709,6 @@ VkFormat TextureCacheVulkan::GetDestFormat(GETextureFormat format, GEPaletteForm
}
}

CheckAlphaResult TextureCacheVulkan::CheckAlpha(const u32 *pixelData, VkFormat dstFmt, int w) {
switch (dstFmt) {
case VULKAN_4444_FORMAT:
return CheckAlpha16((const u16 *)pixelData, w, 0xF000);
case VULKAN_1555_FORMAT:
return CheckAlpha16((const u16 *)pixelData, w, 0x8000);
case VULKAN_565_FORMAT:
// Never has any alpha.
return CHECKALPHA_FULL;
default:
return CheckAlpha32(pixelData, w, 0xFF000000);
}
}

void TextureCacheVulkan::LoadTextureLevel(TexCacheEntry &entry, uint8_t *writePtr, int rowPitch, int level, int scaleFactor, VkFormat dstFmt) {
int w = gstate.getTextureWidth(level);
int h = gstate.getTextureHeight(level);
Expand Down
1 change: 0 additions & 1 deletion GPU/Vulkan/TextureCacheVulkan.h
Expand Up @@ -102,7 +102,6 @@ class TextureCacheVulkan : public TextureCacheCommon {
private:
void LoadTextureLevel(TexCacheEntry &entry, uint8_t *writePtr, int rowPitch, int level, int scaleFactor, VkFormat dstFmt);
VkFormat GetDestFormat(GETextureFormat format, GEPaletteFormat clutFormat) const;
static CheckAlphaResult CheckAlpha(const u32 *pixelData, VkFormat dstFmt, int w);
void UpdateCurrentClut(GEPaletteFormat clutFormat, u32 clutBase, bool clutIndexIsSimple) override;

void BuildTexture(TexCacheEntry *const entry) override;
Expand Down

0 comments on commit 895b916

Please sign in to comment.