Skip to content

Commit

Permalink
Merge pull request #9321 from hrydgard/d3d11-readback
Browse files Browse the repository at this point in the history
D3D11 framebuffer readback
  • Loading branch information
hrydgard committed Feb 17, 2017
2 parents 522ac5c + e8396b1 commit 741a61e
Show file tree
Hide file tree
Showing 21 changed files with 237 additions and 199 deletions.
2 changes: 2 additions & 0 deletions GPU/Common/DepalettizeShaderCommon.cpp
Expand Up @@ -26,6 +26,8 @@

#define WRITE p+=sprintf

// TODO: Add a compute shader path. Complete waste of time to set up a graphics state.

// Uses integer instructions available since OpenGL 3.0. Suitable for ES 3.0 as well.
void GenerateDepalShader300(char *buffer, GEBufferFormat pixelFormat, ShaderLanguage language) {
char *p = buffer;
Expand Down
1 change: 1 addition & 0 deletions GPU/Common/FramebufferCommon.cpp
Expand Up @@ -1325,6 +1325,7 @@ void FramebufferManagerCommon::FindTransferFramebuffers(VirtualFramebuffer *&dst
}
}

// 1:1 pixel sides buffers, we resize buffers to these before we read them back.
VirtualFramebuffer *FramebufferManagerCommon::FindDownloadTempBuffer(VirtualFramebuffer *vfb) {
// For now we'll keep these on the same struct as the ones that can get displayed
// (and blatantly copy work already done above while at it).
Expand Down
7 changes: 7 additions & 0 deletions GPU/Common/FramebufferCommon.h
Expand Up @@ -152,6 +152,7 @@ namespace Draw {
class DrawContext;
}

struct GPUDebugBuffer;
class TextureCacheCommon;
class ShaderManagerCommon;

Expand Down Expand Up @@ -264,6 +265,12 @@ class FramebufferManagerCommon {

Draw::Framebuffer *GetTempFBO(u16 w, u16 h, Draw::FBColorDepth depth = Draw::FBO_8888);

// Debug features
virtual bool GetFramebuffer(u32 fb_address, int fb_stride, GEBufferFormat format, GPUDebugBuffer &buffer, int maxRes) = 0;
virtual bool GetDepthbuffer(u32 fb_address, int fb_stride, u32 z_address, int z_stride, GPUDebugBuffer &buffer) = 0;
virtual bool GetStencilbuffer(u32 fb_address, int fb_stride, GPUDebugBuffer &buffer) = 0;
virtual bool GetOutputFramebuffer(GPUDebugBuffer &buffer) = 0;

protected:
virtual void SetViewport2D(int x, int y, int w, int h) = 0;
void CalculatePostShaderUniforms(int bufferWidth, int bufferHeight, int renderWidth, int renderHeight, PostShaderUniforms *uniforms);
Expand Down
223 changes: 144 additions & 79 deletions GPU/D3D11/FramebufferManagerD3D11.cpp
Expand Up @@ -105,9 +105,22 @@ FramebufferManagerD3D11::FramebufferManagerD3D11(Draw::DrawContext *draw)
vb.Usage = D3D11_USAGE_DYNAMIC;
vb.CPUAccessFlags = D3D11_CPU_ACCESS_WRITE;
device_->CreateBuffer(&vb, nullptr, &quadBuffer_);

D3D11_TEXTURE2D_DESC packDesc{};
packDesc.CPUAccessFlags = D3D11_CPU_ACCESS_READ;
packDesc.BindFlags = 0;
packDesc.Width = 512; // 512x512 is the maximum size of a framebuffer on the PSP.
packDesc.Height = 512;
packDesc.ArraySize = 1;
packDesc.MipLevels = 1;
packDesc.Usage = D3D11_USAGE_STAGING;
packDesc.SampleDesc.Count = 1;
packDesc.Format = DXGI_FORMAT_R8G8B8A8_UNORM;
device_->CreateTexture2D(&packDesc, nullptr, &packTexture_);
}

FramebufferManagerD3D11::~FramebufferManagerD3D11() {
packTexture_->Release();
// Drawing cleanup
if (quadVertexShader_)
quadVertexShader_->Release();
Expand Down Expand Up @@ -635,7 +648,7 @@ void ConvertFromRGBA8888(u8 *dst, u8 *src, u32 dstStride, u32 srcStride, u32 wid
return;
} else {
for (u32 y = 0; y < height; ++y) {
ConvertBGRA8888ToRGBA8888(dst32, src32, width);
memcpy(dst32, src32, width * 4);
src32 += srcStride;
dst32 += dstStride;
}
Expand All @@ -646,21 +659,21 @@ void ConvertFromRGBA8888(u8 *dst, u8 *src, u32 dstStride, u32 srcStride, u32 wid
switch (format) {
case GE_FORMAT_565: // BGR 565
for (u32 y = 0; y < height; ++y) {
ConvertBGRA8888ToRGB565(dst16, src32, width);
ConvertRGBA8888ToRGB565(dst16, src32, width);
src32 += srcStride;
dst16 += dstStride;
}
break;
case GE_FORMAT_5551: // ABGR 1555
for (u32 y = 0; y < height; ++y) {
ConvertBGRA8888ToRGBA5551(dst16, src32, width);
ConvertRGBA8888ToRGBA5551(dst16, src32, width);
src32 += srcStride;
dst16 += dstStride;
}
break;
case GE_FORMAT_4444: // ABGR 4444
for (u32 y = 0; y < height; ++y) {
ConvertBGRA8888ToRGBA4444(dst16, src32, width);
ConvertRGBA8888ToRGBA4444(dst16, src32, width);
src32 += srcStride;
dst16 += dstStride;
}
Expand All @@ -673,6 +686,9 @@ void ConvertFromRGBA8888(u8 *dst, u8 *src, u32 dstStride, u32 srcStride, u32 wid
}
}

// This function takes an already correctly-sized framebuffer and packs it into RAM.
// Does not need to account for scaling.
// Color conversion is currently done on CPU but should be done on GPU.
void FramebufferManagerD3D11::PackFramebufferD3D11_(VirtualFramebuffer *vfb, int x, int y, int w, int h) {
if (!vfb->fbo) {
ERROR_LOG_REPORT_ONCE(vfbfbozero, SCEGE, "PackFramebufferD3D11_: vfb->fbo == 0");
Expand All @@ -686,85 +702,38 @@ void FramebufferManagerD3D11::PackFramebufferD3D11_(VirtualFramebuffer *vfb, int
// We always need to convert from the framebuffer native format.
// Right now that's always 8888.
DEBUG_LOG(HLE, "Reading framebuffer to mem, fb_address = %08x", fb_address);
ID3D11Texture2D *colorTex = (ID3D11Texture2D *)draw_->GetFramebufferAPITexture(vfb->fbo, Draw::FB_COLOR_BIT, 0);

/*
LPDIRECT3DSURFACE9 renderTarget = (LPDIRECT3DSURFACE9)draw_->GetFramebufferAPITexture(vfb->fbo, Draw::FB_COLOR_BIT | Draw::FB_SURFACE_BIT, 0);
D3DSURFACE_DESC desc;
renderTarget->GetDesc(&desc);
LPDIRECT3DSURFACE9 offscreen = GetOffscreenSurface(renderTarget, vfb);
if (offscreen) {
HRESULT hr = pD3Ddevice->GetRenderTargetData(renderTarget, offscreen);
if (SUCCEEDED(hr)) {
D3DLOCKED_RECT locked;
u32 widthFactor = vfb->renderWidth / vfb->bufferWidth;
u32 heightFactor = vfb->renderHeight / vfb->bufferHeight;
RECT rect = { (LONG)(x * widthFactor), (LONG)(y * heightFactor), (LONG)((x + w) * widthFactor), (LONG)((y + h) * heightFactor) };
hr = offscreen->LockRect(&locked, &rect, D3DLOCK_READONLY);
if (SUCCEEDED(hr)) {
// TODO: Handle the other formats? We don't currently create them, I think.
const int dstByteOffset = (y * vfb->fb_stride + x) * dstBpp;
// Pixel size always 4 here because we always request BGRA8888.
ConvertFromRGBA8888(Memory::GetPointer(fb_address + dstByteOffset), (u8 *)locked.pBits, vfb->fb_stride, locked.Pitch / 4, w, h, vfb->format);
offscreen->UnlockRect();
} else {
ERROR_LOG_REPORT(G3D, "Unable to lock rect from %08x: %d,%d %dx%d of %dx%d", fb_address, rect.left, rect.top, rect.right, rect.bottom, vfb->renderWidth, vfb->renderHeight);
}
} else {
ERROR_LOG_REPORT(G3D, "Unable to download render target data from %08x", fb_address);
}
D3D11_BOX srcBox{ 0, 0, 0, vfb->width, vfb->height, 1 };
context_->CopySubresourceRegion(packTexture_, 0, 0, 0, 0, colorTex, 0, &srcBox);

// Ideally, we'd round robin between two packTexture_, and simply use the other one. Though if the game
// does a once-off copy, that won't work at all.

// BIG GPU STALL
D3D11_MAPPED_SUBRESOURCE map;
HRESULT result = context_->Map(packTexture_, 0, D3D11_MAP_READ, 0, &map);
if (FAILED(result)) {
return;
}
*/

// TODO: Handle the other formats? We don't currently create them, I think.
const int srcByteOffset = y * map.RowPitch + x * 4;
const int dstByteOffset = (y * vfb->fb_stride + x) * dstBpp;
// Pixel size always 4 here because we always request BGRA8888.
ConvertFromRGBA8888(Memory::GetPointer(fb_address + dstByteOffset), (u8 *)map.pData, vfb->fb_stride, map.RowPitch/4, w, h, vfb->format);
context_->Unmap(packTexture_, 0);
}

// Nobody calls this yet.
void FramebufferManagerD3D11::PackDepthbuffer(VirtualFramebuffer *vfb, int x, int y, int w, int h) {
if (!vfb->fbo) {
ERROR_LOG_REPORT_ONCE(vfbfbozero, SCEGE, "PackDepthbuffer: vfb->fbo == 0");
return;
}

// We always read the depth buffer in 24_8 format.
const u32 z_address = (0x04000000) | vfb->z_address;

/*
DEBUG_LOG(SCEGE, "Reading depthbuffer to mem at %08x for vfb=%08x", z_address, vfb->fb_address);
LPDIRECT3DTEXTURE9 tex = (LPDIRECT3DTEXTURE9)draw_->GetFramebufferAPITexture(vfb->fbo, Draw::FB_DEPTH_BIT, 0);
if (tex) {
D3DSURFACE_DESC desc;
D3DLOCKED_RECT locked;
tex->GetLevelDesc(0, &desc);
RECT rect = { 0, 0, (LONG)desc.Width, (LONG)desc.Height };
HRESULT hr = tex->LockRect(0, &locked, &rect, D3DLOCK_READONLY);
if (SUCCEEDED(hr)) {
const int dstByteOffset = y * vfb->fb_stride * sizeof(s16);
const u32 *packed = (const u32 *)locked.pBits;
u16 *depth = (u16 *)Memory::GetPointer(z_address);
// TODO: Optimize.
for (int yp = 0; yp < h; ++yp) {
for (int xp = 0; xp < w; ++xp) {
const int offset = (yp + y) & vfb->z_stride + x + xp;
float scaled = FromScaledDepth((packed[offset] & 0x00FFFFFF) * (1.0f / 16777215.0f));
if (scaled <= 0.0f) {
depth[offset] = 0;
} else if (scaled >= 65535.0f) {
depth[offset] = 65535;
} else {
depth[offset] = (int)scaled;
}
}
}
tex->UnlockRect(0);
} else {
ERROR_LOG_REPORT(G3D, "Unable to lock rect from depth %08x: %d,%d %dx%d of %dx%d", vfb->fb_address, rect.left, rect.top, rect.right, rect.bottom, vfb->renderWidth, vfb->renderHeight);
}
} else {
ERROR_LOG_REPORT(G3D, "Unable to download render target depth from %08x", vfb->fb_address);
}*/
// TODO
}

void FramebufferManagerD3D11::EndFrame() {
Expand Down Expand Up @@ -868,18 +837,114 @@ void FramebufferManagerD3D11::Resized() {
resized_ = true;
}

bool FramebufferManagerD3D11::GetCurrentFramebuffer(GPUDebugBuffer &buffer, GPUDebugFramebufferType type, int maxRes) {
return false;
// Lots of this code could be shared (like the downsampling).
bool FramebufferManagerD3D11::GetFramebuffer(u32 fb_address, int fb_stride, GEBufferFormat format, GPUDebugBuffer &buffer, int maxRes) {
VirtualFramebuffer *vfb = currentRenderVfb_;
if (!vfb) {
vfb = GetVFBAt(fb_address);
}

if (!vfb) {
// If there's no vfb and we're drawing there, must be memory?
buffer = GPUDebugBuffer(Memory::GetPointer(fb_address | 0x04000000), fb_stride, 512, format);
return true;
}

int w = vfb->renderWidth, h = vfb->renderHeight;
Draw::Framebuffer *fboForRead = nullptr;
if (vfb->fbo) {
if (maxRes > 0 && vfb->renderWidth > vfb->width * maxRes) {
w = vfb->width * maxRes;
h = vfb->height * maxRes;

Draw::Framebuffer *tempFBO = GetTempFBO(w, h);
VirtualFramebuffer tempVfb = *vfb;
tempVfb.fbo = tempFBO;
tempVfb.bufferWidth = vfb->width;
tempVfb.bufferHeight = vfb->height;
tempVfb.renderWidth = w;
tempVfb.renderHeight = h;
BlitFramebuffer(&tempVfb, 0, 0, vfb, 0, 0, vfb->width, vfb->height, 0);

fboForRead = tempFBO;
} else {
fboForRead = vfb->fbo;
}
}

buffer.Allocate(w, h, GE_FORMAT_8888, !useBufferedRendering_, true);

ID3D11Texture2D *packTex;
D3D11_TEXTURE2D_DESC packDesc{};
packDesc.CPUAccessFlags = D3D11_CPU_ACCESS_READ;
packDesc.BindFlags = 0;
packDesc.Width = w;
packDesc.Height = h;
packDesc.ArraySize = 1;
packDesc.MipLevels = 1;
packDesc.Usage = D3D11_USAGE_STAGING;
packDesc.SampleDesc.Count = 1;
packDesc.Format = DXGI_FORMAT_R8G8B8A8_UNORM;
device_->CreateTexture2D(&packDesc, nullptr, &packTex);

ID3D11Texture2D *nativeTex = (ID3D11Texture2D *)draw_->GetFramebufferAPITexture(fboForRead, Draw::FB_COLOR_BIT, 0);
context_->CopyResource(packTex, nativeTex);

D3D11_MAPPED_SUBRESOURCE map;
context_->Map(packTex, 0, D3D11_MAP_READ, 0, &map);

for (int y = 0; y < h; y++) {
uint8_t *dest = (uint8_t *)buffer.GetData() + y * w * 4;
const uint8_t *src = ((const uint8_t *)map.pData) + map.RowPitch * y;
memcpy(dest, src, 4 * w);
}

context_->Unmap(packTex, 0);
packTex->Release();
return true;
}

bool FramebufferManagerD3D11::GetOutputFramebuffer(GPUDebugBuffer &buffer) {
bool FramebufferManagerD3D11::GetDepthbuffer(u32 fb_address, int fb_stride, u32 z_address, int z_stride, GPUDebugBuffer &buffer) {
return false;
}

bool FramebufferManagerD3D11::GetCurrentDepthbuffer(GPUDebugBuffer &buffer) {
bool FramebufferManagerD3D11::GetStencilbuffer(u32 fb_address, int fb_stride, GPUDebugBuffer &buffer) {
return false;
}

bool FramebufferManagerD3D11::GetCurrentStencilbuffer(GPUDebugBuffer &buffer) {
return false;
}
bool FramebufferManagerD3D11::GetOutputFramebuffer(GPUDebugBuffer &buffer) {
ID3D11Texture2D *backbuffer = (ID3D11Texture2D *)draw_->GetNativeObject(Draw::NativeObject::BACKBUFFER_COLOR_TEX);
D3D11_TEXTURE2D_DESC desc;
backbuffer->GetDesc(&desc);
int w = desc.Width;
int h = desc.Height;
buffer.Allocate(w, h, GE_FORMAT_8888, !useBufferedRendering_, true);

ID3D11Texture2D *packTex;
D3D11_TEXTURE2D_DESC packDesc{};
packDesc.CPUAccessFlags = D3D11_CPU_ACCESS_READ;
packDesc.BindFlags = 0;
packDesc.Width = w;
packDesc.Height = h;
packDesc.ArraySize = 1;
packDesc.MipLevels = 1;
packDesc.Usage = D3D11_USAGE_STAGING;
packDesc.SampleDesc.Count = 1;
packDesc.Format = DXGI_FORMAT_R8G8B8A8_UNORM;
device_->CreateTexture2D(&packDesc, nullptr, &packTex);

context_->CopyResource(packTex, backbuffer);

D3D11_MAPPED_SUBRESOURCE map;
context_->Map(packTex, 0, D3D11_MAP_READ, 0, &map);

for (int y = 0; y < h; y++) {
uint8_t *dest = (uint8_t *)buffer.GetData() + y * w * 4;
const uint8_t *src = ((const uint8_t *)map.pData) + map.RowPitch * y;
memcpy(dest, src, 4 * w);
}

context_->Unmap(packTex, 0);
packTex->Release();
return true;
}
14 changes: 9 additions & 5 deletions GPU/D3D11/FramebufferManagerD3D11.h
Expand Up @@ -69,10 +69,10 @@ class FramebufferManagerD3D11 : public FramebufferManagerCommon {

virtual bool NotifyStencilUpload(u32 addr, int size, bool skipZero = false) override;

bool GetCurrentFramebuffer(GPUDebugBuffer &buffer, GPUDebugFramebufferType type, int maxRes);
bool GetCurrentDepthbuffer(GPUDebugBuffer &buffer);
bool GetCurrentStencilbuffer(GPUDebugBuffer &buffer);
bool GetOutputFramebuffer(GPUDebugBuffer &buffer);
bool GetFramebuffer(u32 fb_address, int fb_stride, GEBufferFormat format, GPUDebugBuffer &buffer, int maxRes) override;
bool GetDepthbuffer(u32 fb_address, int fb_stride, u32 z_address, int z_stride, GPUDebugBuffer &buffer) override;
bool GetStencilbuffer(u32 fb_address, int fb_stride, GPUDebugBuffer &buffer) override;
bool GetOutputFramebuffer(GPUDebugBuffer &buffer) override;

virtual void RebindFramebuffer() override;

Expand Down Expand Up @@ -136,6 +136,10 @@ class FramebufferManagerD3D11 : public FramebufferManagerCommon {
ShaderManagerD3D11 *shaderManagerD3D11_;
DrawEngineD3D11 *drawEngine_;

// 1:1 Readback texture, 512x512 fixed
// For larger debug readbacks, we create/destroy textures on the fly.
ID3D11Texture2D *packTexture_;

// Used by post-processing shader
std::vector<Draw::Framebuffer *> extraFBOs_;

Expand All @@ -145,4 +149,4 @@ class FramebufferManagerD3D11 : public FramebufferManagerCommon {
AsyncPBO *pixelBufObj_; //this isn't that large
u8 currentPBO_;
#endif
};
};
16 changes: 0 additions & 16 deletions GPU/D3D11/GPU_D3D11.cpp
Expand Up @@ -981,18 +981,6 @@ void GPU_D3D11::DoState(PointerWrap &p) {
}
}

bool GPU_D3D11::GetCurrentFramebuffer(GPUDebugBuffer &buffer, GPUDebugFramebufferType type, int maxRes) {
return framebufferManagerD3D11_->GetCurrentFramebuffer(buffer, type, maxRes);
}

bool GPU_D3D11::GetCurrentDepthbuffer(GPUDebugBuffer &buffer) {
return framebufferManagerD3D11_->GetCurrentDepthbuffer(buffer);
}

bool GPU_D3D11::GetCurrentStencilbuffer(GPUDebugBuffer &buffer) {
return framebufferManagerD3D11_->GetCurrentStencilbuffer(buffer);
}

bool GPU_D3D11::GetCurrentTexture(GPUDebugBuffer &buffer, int level) {
if (!gstate.isTextureMapEnabled()) {
return false;
Expand All @@ -1007,10 +995,6 @@ bool GPU_D3D11::GetCurrentClut(GPUDebugBuffer &buffer) {
return textureCacheD3D11_->GetCurrentClutBuffer(buffer);
}

bool GPU_D3D11::GetOutputFramebuffer(GPUDebugBuffer &buffer) {
return framebufferManagerD3D11_->GetOutputFramebuffer(buffer);
}

bool GPU_D3D11::GetCurrentSimpleVertices(int count, std::vector<GPUDebugVertex> &vertices, std::vector<u16> &indices) {
return drawEngine_.GetCurrentSimpleVertices(count, vertices, indices);
}
Expand Down

0 comments on commit 741a61e

Please sign in to comment.