Skip to content

Commit

Permalink
Merge pull request #7716 from unknownbrackets/clut
Browse files Browse the repository at this point in the history
Avoid accidental sign ext for > 24 bit clut shift, clut wrapping
  • Loading branch information
hrydgard committed Apr 26, 2015
2 parents aee69d4 + 4fa50a9 commit de525f3
Show file tree
Hide file tree
Showing 6 changed files with 36 additions and 23 deletions.
3 changes: 2 additions & 1 deletion GPU/Common/DepalettizeShaderCommon.cpp
Expand Up @@ -88,7 +88,7 @@ void GenerateDepalShader300(char *buffer, GEBufferFormat pixelFormat) {
texturePixels = 512;

if (shift) {
WRITE(p, " index = ((index >> %i) & 0x%02x)", shift, mask);
WRITE(p, " index = (int(uint(index) >> %i) & 0x%02x)", shift, mask);
} else {
WRITE(p, " index = (index & 0x%02x)", mask);
}
Expand Down Expand Up @@ -213,6 +213,7 @@ void GenerateDepalShaderFloat(char *buffer, GEBufferFormat pixelFormat, ShaderLa
}

// Offset by half a texel (plus clutBase) to turn NEAREST filtering into FLOOR.
// Technically, the clutBase should be |'d, not added, but that's hard with floats.
float texel_offset = ((float)clutBase + 0.5f) / texturePixels;
sprintf(offset, " + %f", texel_offset);

Expand Down
2 changes: 1 addition & 1 deletion GPU/Directx9/DepalettizeShaderDX9.cpp
Expand Up @@ -109,7 +109,7 @@ LPDIRECT3DTEXTURE9 DepalShaderCacheDX9::GetClutTexture(const u32 clutID, u32 *ra
memcpy(rect.pBits, rawClut, 1024);
tex->texture->UnlockRect(0);

pD3Ddevice->SetSamplerState(1, D3DSAMP_ADDRESSU, D3DTADDRESS_CLAMP);
pD3Ddevice->SetSamplerState(1, D3DSAMP_ADDRESSU, D3DTADDRESS_WRAP);
pD3Ddevice->SetSamplerState(1, D3DSAMP_ADDRESSV, D3DTADDRESS_CLAMP);
pD3Ddevice->SetSamplerState(1, D3DSAMP_MINFILTER, D3DTEXF_POINT);
pD3Ddevice->SetSamplerState(1, D3DSAMP_MAGFILTER, D3DTEXF_POINT);
Expand Down
22 changes: 13 additions & 9 deletions GPU/Directx9/TextureCacheDX9.cpp
Expand Up @@ -70,14 +70,14 @@ TextureCacheDX9::TextureCacheDX9() : cacheSizeEstimate_(0), secondCacheSizeEstim
tmpTexBuf16.resize(1024 * 512); // 1MB
tmpTexBufRearrange.resize(1024 * 512); // 2MB

// Aren't these way too big?
clutBufConverted_ = (u32 *)AllocateAlignedMemory(4096 * sizeof(u32), 16); // 16KB
clutBufRaw_ = (u32 *)AllocateAlignedMemory(4096 * sizeof(u32), 16); // 16KB
// TODO: Clamp down to 256/1KB? Need to check mipmapShareClut and clamp loadclut.
clutBufConverted_ = (u32 *)AllocateAlignedMemory(1024 * sizeof(u32), 16); // 4KB
clutBufRaw_ = (u32 *)AllocateAlignedMemory(1024 * sizeof(u32), 16); // 4KB

// Zap these so that reads from uninitialized parts of the CLUT look the same in
// release and debug
memset(clutBufConverted_, 0, 4096 * sizeof(u32));
memset(clutBufRaw_, 0, 4096 * sizeof(u32));
memset(clutBufConverted_, 0, 1024 * sizeof(u32));
memset(clutBufRaw_, 0, 1024 * sizeof(u32));

D3DCAPS9 pCaps;
ZeroMemory(&pCaps, sizeof(pCaps));
Expand Down Expand Up @@ -865,7 +865,12 @@ void TextureCacheDX9::UpdateCurrentClut() {
const u32 clutBaseBytes = clutBase * (clutFormat == GE_CMODE_32BIT_ABGR8888 ? sizeof(u32) : sizeof(u16));
// Technically, these extra bytes weren't loaded, but hopefully it was loaded earlier.
// If not, we're going to hash random data, which hopefully doesn't cause a performance issue.
const u32 clutExtendedBytes = clutTotalBytes_ + clutBaseBytes;
//
// TODO: Actually, this seems like a hack. The game can upload part of a CLUT and reference other data.
// clutTotalBytes_ is the last amount uploaded. We should hash clutMaxBytes_, but this will often hash
// unrelated old entries for small palettes.
// Adding clutBaseBytes may just be mitigating this for some usage patterns.
const u32 clutExtendedBytes = std::min(clutTotalBytes_ + clutBaseBytes, clutMaxBytes_);

clutHash_ = DoReliableHash32((const char *)clutBufRaw_, clutExtendedBytes, 0xC0108888);
clutBuf_ = clutBufRaw_;
Expand Down Expand Up @@ -973,11 +978,10 @@ void TextureCacheDX9::SetTextureFramebuffer(TexCacheEntry *entry, VirtualFramebu
dxstate.viewport.restore();

const GEPaletteFormat clutFormat = gstate.getClutPaletteFormat();
const u32 clutBase = gstate.getClutIndexStartPos();
const u32 bytesPerColor = clutFormat == GE_CMODE_32BIT_ABGR8888 ? sizeof(u32) : sizeof(u16);
const u32 clutExtendedColors = (clutTotalBytes_ / bytesPerColor) + clutBase;
const u32 clutTotalColors = clutMaxBytes_ / bytesPerColor;

TexCacheEntry::Status alphaStatus = CheckAlpha(clutBuf_, getClutDestFormat(gstate.getClutPaletteFormat()), clutExtendedColors, clutExtendedColors, 1);
TexCacheEntry::Status alphaStatus = CheckAlpha(clutBuf_, getClutDestFormat(gstate.getClutPaletteFormat()), clutTotalColors, clutTotalColors, 1);
gstate_c.textureFullAlpha = alphaStatus == TexCacheEntry::STATUS_ALPHA_FULL;
gstate_c.textureSimpleAlpha = alphaStatus == TexCacheEntry::STATUS_ALPHA_SIMPLE;
} else {
Expand Down
2 changes: 1 addition & 1 deletion GPU/GLES/DepalettizeShader.cpp
Expand Up @@ -152,7 +152,7 @@ GLuint DepalShaderCache::GetClutTexture(const u32 clutID, u32 *rawClut) {

glTexImage2D(GL_TEXTURE_2D, 0, components, texturePixels, 1, 0, components2, dstFmt, (void *)rawClut);

glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_S, GL_CLAMP_TO_EDGE);
glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_S, GL_REPEAT);
glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_T, GL_CLAMP_TO_EDGE);
glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_NEAREST);
glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAG_FILTER, GL_NEAREST);
Expand Down
24 changes: 14 additions & 10 deletions GPU/GLES/TextureCache.cpp
Expand Up @@ -75,14 +75,14 @@ TextureCache::TextureCache() : cacheSizeEstimate_(0), secondCacheSizeEstimate_(0
tmpTexBuf16.resize(1024 * 512); // 1MB
tmpTexBufRearrange.resize(1024 * 512); // 2MB

// Aren't these way too big?
clutBufConverted_ = (u32 *)AllocateAlignedMemory(4096 * sizeof(u32), 16); // 16KB
clutBufRaw_ = (u32 *)AllocateAlignedMemory(4096 * sizeof(u32), 16); // 16KB
// TODO: Clamp down to 256/1KB? Need to check mipmapShareClut and clamp loadclut.
clutBufConverted_ = (u32 *)AllocateAlignedMemory(1024 * sizeof(u32), 16); // 4KB
clutBufRaw_ = (u32 *)AllocateAlignedMemory(1024 * sizeof(u32), 16); // 4KB

// Zap these so that reads from uninitialized parts of the CLUT look the same in
// release and debug
memset(clutBufConverted_, 0, 4096 * sizeof(u32));
memset(clutBufRaw_, 0, 4096 * sizeof(u32));
memset(clutBufConverted_, 0, 1024 * sizeof(u32));
memset(clutBufRaw_, 0, 1024 * sizeof(u32));

glGetFloatv(GL_MAX_TEXTURE_MAX_ANISOTROPY_EXT, &maxAnisotropyLevel);
SetupTextureDecoder();
Expand Down Expand Up @@ -992,13 +992,18 @@ void TextureCache::UpdateCurrentClut() {
const u32 clutBaseBytes = clutBase * (clutFormat == GE_CMODE_32BIT_ABGR8888 ? sizeof(u32) : sizeof(u16));
// Technically, these extra bytes weren't loaded, but hopefully it was loaded earlier.
// If not, we're going to hash random data, which hopefully doesn't cause a performance issue.
const u32 clutExtendedBytes = clutTotalBytes_ + clutBaseBytes;
//
// TODO: Actually, this seems like a hack. The game can upload part of a CLUT and reference other data.
// clutTotalBytes_ is the last amount uploaded. We should hash clutMaxBytes_, but this will often hash
// unrelated old entries for small palettes.
// Adding clutBaseBytes may just be mitigating this for some usage patterns.
const u32 clutExtendedBytes = std::min(clutTotalBytes_ + clutBaseBytes, clutMaxBytes_);

clutHash_ = DoReliableHash32((const char *)clutBufRaw_, clutExtendedBytes, 0xC0108888);

// Avoid a copy when we don't need to convert colors.
if (UseBGRA8888() || clutFormat != GE_CMODE_32BIT_ABGR8888) {
const int numColors = (clutMaxBytes_ + clutBaseBytes) / (clutFormat == GE_CMODE_32BIT_ABGR8888 ? sizeof(u32) : sizeof(u16));
const int numColors = clutMaxBytes_ / (clutFormat == GE_CMODE_32BIT_ABGR8888 ? sizeof(u32) : sizeof(u16));
ConvertColors(clutBufConverted_, clutBufRaw_, getClutDestFormat(clutFormat), numColors);
clutBuf_ = clutBufConverted_;
} else {
Expand Down Expand Up @@ -1147,11 +1152,10 @@ void TextureCache::SetTextureFramebuffer(TexCacheEntry *entry, VirtualFramebuffe
framebufferManager_->RebindFramebuffer();

const GEPaletteFormat clutFormat = gstate.getClutPaletteFormat();
const u32 clutBase = gstate.getClutIndexStartPos();
const u32 bytesPerColor = clutFormat == GE_CMODE_32BIT_ABGR8888 ? sizeof(u32) : sizeof(u16);
const u32 clutExtendedColors = (clutTotalBytes_ / bytesPerColor) + clutBase;
const u32 clutTotalColors = clutMaxBytes_ / bytesPerColor;

TexCacheEntry::Status alphaStatus = CheckAlpha(clutBuf_, getClutDestFormat(gstate.getClutPaletteFormat()), clutExtendedColors, clutExtendedColors, 1);
TexCacheEntry::Status alphaStatus = CheckAlpha(clutBuf_, getClutDestFormat(gstate.getClutPaletteFormat()), clutTotalColors, clutTotalColors, 1);
gstate_c.textureFullAlpha = alphaStatus == TexCacheEntry::STATUS_ALPHA_FULL;
gstate_c.textureSimpleAlpha = alphaStatus == TexCacheEntry::STATUS_ALPHA_SIMPLE;
} else {
Expand Down
6 changes: 5 additions & 1 deletion GPU/GPUState.h
Expand Up @@ -309,7 +309,11 @@ struct GPUgstate
int getClutIndexShift() const { return (clutformat >> 2) & 0x1F; }
int getClutIndexMask() const { return (clutformat >> 8) & 0xFF; }
int getClutIndexStartPos() const { return ((clutformat >> 16) & 0x1F) << 4; }
int transformClutIndex(int index) const { return ((index >> getClutIndexShift()) & getClutIndexMask()) | getClutIndexStartPos(); }
u32 transformClutIndex(u32 index) const {
// We need to wrap any entries beyond the first 1024 bytes.
u32 mask = getClutPaletteFormat() == GE_CMODE_32BIT_ABGR8888 ? 0xFF : 0x1FF;
return ((index >> getClutIndexShift()) & getClutIndexMask()) | (getClutIndexStartPos() & mask);
}
bool isClutIndexSimple() const { return (clutformat & ~3) == 0xC500FF00; } // Meaning, no special mask, shift, or start pos.
bool isTextureSwizzled() const { return texmode & 1; }
bool isClutSharedForMipmaps() const { return (texmode & 0x100) == 0; }
Expand Down

0 comments on commit de525f3

Please sign in to comment.