Skip to content

Commit

Permalink
Merge pull request #18416 from hrydgard/drawpixels-16-bit-opt
Browse files Browse the repository at this point in the history
Optimize DrawPixels for 16-bit RGB565 copies
  • Loading branch information
hrydgard committed Nov 12, 2023
2 parents cc6f9a7 + 7782548 commit bfd1115
Show file tree
Hide file tree
Showing 2 changed files with 36 additions and 21 deletions.
1 change: 1 addition & 0 deletions Common/Data/Convert/ColorConv.cpp
Expand Up @@ -617,6 +617,7 @@ void ConvertRGB565ToBGR565(u16 *dst, const u16 *src, u32 numPixels) {
u32 i = 0;
#endif

// TODO: Add a 64-bit loop too.
const u32 *src32 = (const u32 *)src;
u32 *dst32 = (u32 *)dst;
for (; i < numPixels / 2; i++) {
Expand Down
56 changes: 35 additions & 21 deletions GPU/Common/FramebufferManagerCommon.cpp
Expand Up @@ -1310,18 +1310,6 @@ void FramebufferManagerCommon::CopyFramebufferForColorTexture(VirtualFramebuffer

Draw::Texture *FramebufferManagerCommon::MakePixelTexture(const u8 *srcPixels, GEBufferFormat srcPixelFormat, int srcStride, int width, int height) {
Draw::DataFormat depthFormat = Draw::DataFormat::UNDEFINED;
if (srcPixelFormat == GE_FORMAT_DEPTH16) {
if ((draw_->GetDataFormatSupport(Draw::DataFormat::R16_UNORM) & Draw::FMT_TEXTURE) != 0) {
depthFormat = Draw::DataFormat::R16_UNORM;
} else if ((draw_->GetDataFormatSupport(Draw::DataFormat::R8_UNORM) & Draw::FMT_TEXTURE) != 0) {
// This could be improved by using specific draw shaders to pack full precision in two channels.
// However, not really worth the trouble until we find a game that requires it.
depthFormat = Draw::DataFormat::R8_UNORM;
} else {
// No usable single channel format. Can't be bothered.
return nullptr;
}
}

int bpp = BufferFormatBytesPerPixel(srcPixelFormat);
int srcStrideInBytes = srcStride * bpp;
Expand All @@ -1341,6 +1329,29 @@ Draw::Texture *FramebufferManagerCommon::MakePixelTexture(const u8 *srcPixels, G
XXH3_freeState(hashState);
}

Draw::DataFormat texFormat = preferredPixelsFormat_;

if (srcPixelFormat == GE_FORMAT_DEPTH16) {
if ((draw_->GetDataFormatSupport(Draw::DataFormat::R16_UNORM) & Draw::FMT_TEXTURE) != 0) {
texFormat = Draw::DataFormat::R16_UNORM;
} else if ((draw_->GetDataFormatSupport(Draw::DataFormat::R8_UNORM) & Draw::FMT_TEXTURE) != 0) {
// This could be improved by using specific draw shaders to pack full precision in two channels.
// However, not really worth the trouble until we find a game that requires it.
texFormat = Draw::DataFormat::R8_UNORM;
} else {
// No usable single channel format. Can't be bothered.
return nullptr;
}
} else if (srcPixelFormat == GE_FORMAT_565) {
// Check for supported matching formats.
// This mainly benefits the redundant copies in God of War on low-end platforms.
if ((draw_->GetDataFormatSupport(Draw::DataFormat::B5G6R5_UNORM_PACK16) & Draw::FMT_TEXTURE) != 0) {
texFormat = Draw::DataFormat::B5G6R5_UNORM_PACK16;
} else if ((draw_->GetDataFormatSupport(Draw::DataFormat::R5G6B5_UNORM_PACK16) & Draw::FMT_TEXTURE) != 0) {
texFormat = Draw::DataFormat::R5G6B5_UNORM_PACK16;
}
}

// TODO: We can just change the texture format and flip some bits around instead of this.
// Could share code with the texture cache perhaps.
auto generateTexture = [&](uint8_t *data, const uint8_t *initData, uint32_t w, uint32_t h, uint32_t d, uint32_t byteStride, uint32_t sliceByteStride) {
Expand All @@ -1352,28 +1363,33 @@ Draw::Texture *FramebufferManagerCommon::MakePixelTexture(const u8 *srcPixels, G
u8 *dst8 = (u8 *)(data + byteStride * y);
switch (srcPixelFormat) {
case GE_FORMAT_565:
if (preferredPixelsFormat_ == Draw::DataFormat::B8G8R8A8_UNORM)
if (texFormat == Draw::DataFormat::B5G6R5_UNORM_PACK16) {
memcpy(dst16, src16, w * sizeof(uint16_t));
} else if (texFormat == Draw::DataFormat::R5G6B5_UNORM_PACK16) {
ConvertRGB565ToBGR565(dst16, src16, width); // Fast!
} else if (texFormat == Draw::DataFormat::B8G8R8A8_UNORM) {
ConvertRGB565ToBGRA8888(dst, src16, width);
else
} else {
ConvertRGB565ToRGBA8888(dst, src16, width);
}
break;

case GE_FORMAT_5551:
if (preferredPixelsFormat_ == Draw::DataFormat::B8G8R8A8_UNORM)
if (texFormat == Draw::DataFormat::B8G8R8A8_UNORM)
ConvertRGBA5551ToBGRA8888(dst, src16, width);
else
ConvertRGBA5551ToRGBA8888(dst, src16, width);
break;

case GE_FORMAT_4444:
if (preferredPixelsFormat_ == Draw::DataFormat::B8G8R8A8_UNORM)
if (texFormat == Draw::DataFormat::B8G8R8A8_UNORM)
ConvertRGBA4444ToBGRA8888(dst, src16, width);
else
ConvertRGBA4444ToRGBA8888(dst, src16, width);
break;

case GE_FORMAT_8888:
if (preferredPixelsFormat_ == Draw::DataFormat::B8G8R8A8_UNORM)
if (texFormat == Draw::DataFormat::B8G8R8A8_UNORM)
ConvertRGBA8888ToBGRA8888(dst, src32, width);
// This means use original pointer as-is. May avoid or optimize a copy.
else if (srcStride == width)
Expand All @@ -1388,10 +1404,10 @@ Draw::Texture *FramebufferManagerCommon::MakePixelTexture(const u8 *srcPixels, G
// to do one of two different swizzle operations. However, for the only use of this so far,
// the Burnout lens flare trickery, swizzle doesn't matter since it's just a 0, 7fff, 0, 7fff pattern
// which comes out the same.
if (depthFormat == Draw::DataFormat::R16_UNORM) {
if (texFormat == Draw::DataFormat::R16_UNORM) {
// We just use this format straight.
memcpy(dst16, src16, w * 2);
} else if (depthFormat == Draw::DataFormat::R8_UNORM) {
} else if (texFormat == Draw::DataFormat::R8_UNORM) {
// We fall back to R8_UNORM. Precision is enough for most cases of depth clearing and initialization we've seen,
// but hardly ideal.
for (int i = 0; i < width; i++) {
Expand All @@ -1409,8 +1425,6 @@ Draw::Texture *FramebufferManagerCommon::MakePixelTexture(const u8 *srcPixels, G
return true;
};

Draw::DataFormat texFormat = srcPixelFormat == GE_FORMAT_DEPTH16 ? depthFormat : preferredPixelsFormat_;

int frameNumber = draw_->GetFrameCount();

// First look for an exact match (including contents hash) that we can re-use.
Expand Down

0 comments on commit bfd1115

Please sign in to comment.