Permalink
Browse files

Use the CPU color conversion from GLES for readbacks in Vulkan (and D…

…3D11). Fix bug.
  • Loading branch information...
hrydgard committed Oct 28, 2017
1 parent 34b65c0 commit a9f01c45e07d730adfadf6636da1c5851135c5ac
@@ -1974,6 +1974,63 @@ bool FramebufferManagerCommon::GetOutputFramebuffer(GPUDebugBuffer &buffer) {
return true;
}
// TODO: SSE/NEON
// Could also make C fake-simd for 64-bit, two 8888 pixels fit in a register :)
void ConvertFromRGBA8888(u8 *dst, const u8 *src, u32 dstStride, u32 srcStride, u32 width, u32 height, GEBufferFormat format) {
// Must skip stride in the cases below. Some games pack data into the cracks, like MotoGP.
const u32 *src32 = (const u32 *)src;
if (format == GE_FORMAT_8888) {
u32 *dst32 = (u32 *)dst;
if (src == dst) {
return;
} else {
// Here let's assume they don't intersect
for (u32 y = 0; y < height; ++y) {
memcpy(dst32, src32, width * 4);
src32 += srcStride;
dst32 += dstStride;
}
}
} else {
// But here it shouldn't matter if they do intersect
u16 *dst16 = (u16 *)dst;
switch (format) {
case GE_FORMAT_565: // BGR 565
{
for (u32 y = 0; y < height; ++y) {
ConvertRGBA8888ToRGB565(dst16, src32, width);
src32 += srcStride;
dst16 += dstStride;
}
}
break;
case GE_FORMAT_5551: // ABGR 1555
{
for (u32 y = 0; y < height; ++y) {
ConvertRGBA8888ToRGBA5551(dst16, src32, width);
src32 += srcStride;
dst16 += dstStride;
}
}
break;
case GE_FORMAT_4444: // ABGR 4444
{
for (u32 y = 0; y < height; ++y) {
ConvertRGBA8888ToRGBA4444(dst16, src32, width);
src32 += srcStride;
dst16 += dstStride;
}
}
break;
case GE_FORMAT_8888:
case GE_FORMAT_INVALID:
// Not possible.
break;
}
}
}
// This function takes an already correctly-sized framebuffer and packs it into RAM.
// Does not need to account for scaling.
// Color conversion is currently done on CPU but should theoretically be done on GPU.
@@ -1986,19 +2043,43 @@ void FramebufferManagerCommon::PackFramebufferSync_(VirtualFramebuffer *vfb, int
return;
}
const u32 fb_address = (0x04000000) | vfb->fb_address;
int possibleH = std::max(vfb->height - y, 0);
if (h > possibleH) {
h = possibleH;
}
Draw::DataFormat destFormat = GEFormatToThin3D(vfb->format);
const int dstBpp = (int)DataFormatSizeInBytes(destFormat);
// Pixel size always 4 here because we always request RGBA8888
u32 bufSize = vfb->fb_stride * h * 4;
u32 fb_address = 0x04000000 | vfb->fb_address;
const int dstByteOffset = (y * vfb->fb_stride + x) * dstBpp;
u8 *destPtr = Memory::GetPointer(fb_address + dstByteOffset);
bool convert = vfb->format != GE_FORMAT_8888;
const int dstBpp = vfb->format == GE_FORMAT_8888 ? 4 : 2;
const int packWidth = std::min(vfb->fb_stride, std::min(x + w, (int)vfb->width));
// We always need to convert from the framebuffer native format.
// Right now that's always 8888.
DEBUG_LOG(G3D, "Reading framebuffer to mem, fb_address = %08x", fb_address);
int dstByteOffset = y * vfb->fb_stride * dstBpp;
u8 *dst = Memory::GetPointer(fb_address + dstByteOffset);
draw_->CopyFramebufferToMemorySync(vfb->fbo, Draw::FB_COLOR_BIT, x, y, w, h, destFormat, destPtr, vfb->fb_stride);
u8 *packed = nullptr;
if (!convert) {
packed = (u8 *)dst;
} else {
// End result may be 16-bit but we are reading 32-bit, so there may not be enough space at fb_address
if (!convBuf_ || convBufSize_ < bufSize) {
delete[] convBuf_;
convBuf_ = new u8[bufSize];
convBufSize_ = bufSize;
}
packed = convBuf_;
}
if (packed) {
DEBUG_LOG(FRAMEBUF, "Reading framebuffer to mem, bufSize = %u, fb_address = %08x", bufSize, fb_address);
int packW = h == 1 ? packWidth : vfb->fb_stride; // TODO: What's this about?
draw_->CopyFramebufferToMemorySync(vfb->fbo, Draw::FB_COLOR_BIT, 0, y, w, h, Draw::DataFormat::R8G8B8A8_UNORM, packed, packW);
if (convert) {
ConvertFromRGBA8888(dst, packed, vfb->fb_stride, vfb->fb_stride, packWidth, h, vfb->format);
}
}
}
void FramebufferManagerCommon::ReadFramebufferToMemory(VirtualFramebuffer *vfb, bool sync, int x, int y, int w, int h) {
@@ -402,6 +402,10 @@ class FramebufferManagerCommon {
int bloomHack_ = 0;
bool trueColor_ = false;
// Used to convert readbacks.
u8 *convBuf_ = nullptr;
u32 convBufSize_ = 0;
// Used by post-processing shaders
std::vector<Draw::Framebuffer *> extraFBOs_;
@@ -204,7 +204,6 @@ FramebufferManagerGLES::FramebufferManagerGLES(Draw::DrawContext *draw) :
FramebufferManagerCommon(draw),
drawPixelsTex_(0),
drawPixelsTexFormat_(GE_FORMAT_INVALID),
convBuf_(nullptr),
draw2dprogram_(nullptr),
postShaderProgram_(nullptr),
stencilUploadProgram_(nullptr),
@@ -721,63 +720,6 @@ void FramebufferManagerGLES::BlitFramebuffer(VirtualFramebuffer *dst, int dstX,
CHECK_GL_ERROR_IF_DEBUG();
}
// TODO: SSE/NEON
// Could also make C fake-simd for 64-bit, two 8888 pixels fit in a register :)
void ConvertFromRGBA8888(u8 *dst, const u8 *src, u32 dstStride, u32 srcStride, u32 width, u32 height, GEBufferFormat format) {
// Must skip stride in the cases below. Some games pack data into the cracks, like MotoGP.
const u32 *src32 = (const u32 *)src;
if (format == GE_FORMAT_8888) {
u32 *dst32 = (u32 *)dst;
if (src == dst) {
return;
} else {
// Here let's assume they don't intersect
for (u32 y = 0; y < height; ++y) {
memcpy(dst32, src32, width * 4);
src32 += srcStride;
dst32 += dstStride;
}
}
} else {
// But here it shouldn't matter if they do intersect
u16 *dst16 = (u16 *)dst;
switch (format) {
case GE_FORMAT_565: // BGR 565
{
for (u32 y = 0; y < height; ++y) {
ConvertRGBA8888ToRGB565(dst16, src32, width);
src32 += srcStride;
dst16 += dstStride;
}
}
break;
case GE_FORMAT_5551: // ABGR 1555
{
for (u32 y = 0; y < height; ++y) {
ConvertRGBA8888ToRGBA5551(dst16, src32, width);
src32 += srcStride;
dst16 += dstStride;
}
}
break;
case GE_FORMAT_4444: // ABGR 4444
{
for (u32 y = 0; y < height; ++y) {
ConvertRGBA8888ToRGBA4444(dst16, src32, width);
src32 += srcStride;
dst16 += dstStride;
}
}
break;
case GE_FORMAT_8888:
case GE_FORMAT_INVALID:
// Not possible.
break;
}
}
}
void FramebufferManagerGLES::PackFramebufferAsync_(VirtualFramebuffer *vfb) {
CHECK_GL_ERROR_IF_DEBUG();
const int MAX_PBO = 2;
@@ -900,48 +842,7 @@ void FramebufferManagerGLES::PackFramebufferAsync_(VirtualFramebuffer *vfb) {
}
void FramebufferManagerGLES::PackFramebufferSync_(VirtualFramebuffer *vfb, int x, int y, int w, int h) {
if (!vfb->fbo) {
ERROR_LOG_REPORT_ONCE(vfbfbozero, SCEGE, "PackFramebufferSync_: vfb->fbo == 0");
return;
}
int possibleH = std::max(vfb->height - y, 0);
if (h > possibleH) {
h = possibleH;
}
// Pixel size always 4 here because we always request RGBA8888
u32 bufSize = vfb->fb_stride * h * 4;
u32 fb_address = 0x04000000 | vfb->fb_address;
bool convert = vfb->format != GE_FORMAT_8888;
const int dstBpp = vfb->format == GE_FORMAT_8888 ? 4 : 2;
const int packWidth = std::min(vfb->fb_stride, std::min(x + w, (int)vfb->width));
int dstByteOffset = y * vfb->fb_stride * dstBpp;
u8 *dst = Memory::GetPointer(fb_address + dstByteOffset);
u8 *packed = nullptr;
if (!convert) {
packed = (u8 *)dst;
} else {
// End result may be 16-bit but we are reading 32-bit, so there may not be enough space at fb_address
if (!convBuf_ || convBufSize_ < bufSize) {
delete [] convBuf_;
convBuf_ = new u8[bufSize];
convBufSize_ = bufSize;
}
packed = convBuf_;
}
if (packed) {
DEBUG_LOG(FRAMEBUF, "Reading framebuffer to mem, bufSize = %u, fb_address = %08x", bufSize, fb_address);
int packW = h == 1 ? packWidth : vfb->fb_stride; // TODO: What's this about?
draw_->CopyFramebufferToMemorySync(vfb->fbo, Draw::FB_COLOR_BIT, 0, y, packW, h, Draw::DataFormat::R8G8B8A8_UNORM, packed, packW);
if (convert) {
ConvertFromRGBA8888(dst, packed, vfb->fb_stride, vfb->fb_stride, packWidth, h, vfb->format);
}
}
FramebufferManagerCommon::PackFramebufferSync_(vfb, x, y, w, h);
// TODO: Move this into Thin3d.
if (gl_extensions.GLES3 && glInvalidateFramebuffer != nullptr) {
@@ -112,8 +112,6 @@ class FramebufferManagerGLES : public FramebufferManagerCommon {
int drawPixelsTexW_;
int drawPixelsTexH_;
u8 *convBuf_;
u32 convBufSize_;
GLSLProgram *draw2dprogram_;
GLSLProgram *plainColorProgram_;
GLSLProgram *postShaderProgram_;
@@ -753,7 +753,7 @@ void VulkanQueueRunner::CopyReadbackBuffer(int width, int height, int pixelStrid
assert(res == VK_SUCCESS);
for (int y = 0; y < height; y++) {
const uint8_t *src = (const uint8_t *)mappedData + width * y;
const uint8_t *src = (const uint8_t *)mappedData + width * pixelSize * y;
uint8_t *dst = pixels + pixelStride * pixelSize * y;
memcpy(dst, src, width * pixelSize);
}

0 comments on commit a9f01c4

Please sign in to comment.