Skip to content

Commit

Permalink
Merge pull request #10093 from unknownbrackets/softgpu
Browse files Browse the repository at this point in the history
Speed up software clears, softgpu crash fix
  • Loading branch information
hrydgard committed Nov 12, 2017
2 parents 4f00190 + ed34cf0 commit 98191f0
Show file tree
Hide file tree
Showing 6 changed files with 206 additions and 34 deletions.
14 changes: 9 additions & 5 deletions GPU/Software/Clipper.cpp
Original file line number Original file line Diff line number Diff line change
Expand Up @@ -218,11 +218,15 @@ void ProcessRect(const VertexData& v0, const VertexData& v1)


RotateUVThrough(v0, v1, *topright, *bottomleft); RotateUVThrough(v0, v1, *topright, *bottomleft);


// Four triangles to do backfaces as well. Two of them will get backface culled. if (gstate.isModeClear()) {
Rasterizer::DrawTriangle(*topleft, *topright, *bottomright); Rasterizer::ClearRectangle(v0, v1);
Rasterizer::DrawTriangle(*bottomright, *topright, *topleft); } else {
Rasterizer::DrawTriangle(*bottomright, *bottomleft, *topleft); // Four triangles to do backfaces as well. Two of them will get backface culled.
Rasterizer::DrawTriangle(*topleft, *bottomleft, *bottomright); Rasterizer::DrawTriangle(*topleft, *topright, *bottomright);
Rasterizer::DrawTriangle(*bottomright, *topright, *topleft);
Rasterizer::DrawTriangle(*bottomright, *bottomleft, *topleft);
Rasterizer::DrawTriangle(*topleft, *bottomleft, *bottomright);
}
} }
} }


Expand Down
188 changes: 168 additions & 20 deletions GPU/Software/Rasterizer.cpp
Original file line number Original file line Diff line number Diff line change
Expand Up @@ -1183,7 +1183,7 @@ template <bool clearMode>
void DrawTriangleSlice( void DrawTriangleSlice(
const VertexData& v0, const VertexData& v1, const VertexData& v2, const VertexData& v0, const VertexData& v1, const VertexData& v2,
int minX, int minY, int maxX, int maxY, int minX, int minY, int maxX, int maxY,
int hy1, int hy2) bool byY, int h1, int h2)
{ {
Vec4<int> bias0 = Vec4<int>::AssignToAll(IsRightSideOrFlatBottomLine(v0.screenpos.xy(), v1.screenpos.xy(), v2.screenpos.xy()) ? -1 : 0); Vec4<int> bias0 = Vec4<int>::AssignToAll(IsRightSideOrFlatBottomLine(v0.screenpos.xy(), v1.screenpos.xy(), v2.screenpos.xy()) ? -1 : 0);
Vec4<int> bias1 = Vec4<int>::AssignToAll(IsRightSideOrFlatBottomLine(v1.screenpos.xy(), v2.screenpos.xy(), v0.screenpos.xy()) ? -1 : 0); Vec4<int> bias1 = Vec4<int>::AssignToAll(IsRightSideOrFlatBottomLine(v1.screenpos.xy(), v2.screenpos.xy(), v0.screenpos.xy()) ? -1 : 0);
Expand Down Expand Up @@ -1215,23 +1215,26 @@ void DrawTriangleSlice(
TriangleEdge e1; TriangleEdge e1;
TriangleEdge e2; TriangleEdge e2;


if (byY) {
maxY = std::min(maxY, minY + h2 * 16 * 2);
minY += h1 * 16 * 2;
} else {
maxX = std::min(maxX, minX + h2 * 16 * 2);
minX += h1 * 16 * 2;
}

ScreenCoords pprime(minX, minY, 0); ScreenCoords pprime(minX, minY, 0);
Vec4<int> w0_base = e0.Start(v1.screenpos, v2.screenpos, pprime); Vec4<int> w0_base = e0.Start(v1.screenpos, v2.screenpos, pprime);
Vec4<int> w1_base = e1.Start(v2.screenpos, v0.screenpos, pprime); Vec4<int> w1_base = e1.Start(v2.screenpos, v0.screenpos, pprime);
Vec4<int> w2_base = e2.Start(v0.screenpos, v1.screenpos, pprime); Vec4<int> w2_base = e2.Start(v0.screenpos, v1.screenpos, pprime);


// Step forward to y1 (slice..)
w0_base += e0.stepY * hy1;
w1_base += e1.stepY * hy1;
w2_base += e2.stepY * hy1;

// All the z values are the same, no interpolation required. // All the z values are the same, no interpolation required.
// This is common, and when we interpolate, we lose accuracy. // This is common, and when we interpolate, we lose accuracy.
const bool flatZ = v0.screenpos.z == v1.screenpos.z && v0.screenpos.z == v2.screenpos.z; const bool flatZ = v0.screenpos.z == v1.screenpos.z && v0.screenpos.z == v2.screenpos.z;


Sampler::Funcs sampler = Sampler::GetFuncs(); Sampler::Funcs sampler = Sampler::GetFuncs();


for (pprime.y = minY + hy1 * 32; pprime.y < minY + hy2 * 32; pprime.y += 32, for (pprime.y = minY; pprime.y < maxY; pprime.y += 32,
w0_base = e0.StepY(w0_base), w0_base = e0.StepY(w0_base),
w1_base = e1.StepY(w1_base), w1_base = e1.StepY(w1_base),
w2_base = e2.StepY(w2_base)) { w2_base = e2.StepY(w2_base)) {
Expand Down Expand Up @@ -1351,8 +1354,8 @@ void DrawTriangle(const VertexData& v0, const VertexData& v1, const VertexData&


int minX = std::min(std::min(v0.screenpos.x, v1.screenpos.x), v2.screenpos.x) & ~0xF; int minX = std::min(std::min(v0.screenpos.x, v1.screenpos.x), v2.screenpos.x) & ~0xF;
int minY = std::min(std::min(v0.screenpos.y, v1.screenpos.y), v2.screenpos.y) & ~0xF; int minY = std::min(std::min(v0.screenpos.y, v1.screenpos.y), v2.screenpos.y) & ~0xF;
int maxX = std::max(std::max(v0.screenpos.x, v1.screenpos.x), v2.screenpos.x) & ~0xF; int maxX = (std::max(std::max(v0.screenpos.x, v1.screenpos.x), v2.screenpos.x) + 0xF) & ~0xF;
int maxY = std::max(std::max(v0.screenpos.y, v1.screenpos.y), v2.screenpos.y) & ~0xF; int maxY = (std::max(std::max(v0.screenpos.y, v1.screenpos.y), v2.screenpos.y) + 0xF) & ~0xF;


DrawingCoords scissorTL(gstate.getScissorX1(), gstate.getScissorY1(), 0); DrawingCoords scissorTL(gstate.getScissorX1(), gstate.getScissorY1(), 0);
DrawingCoords scissorBR(gstate.getScissorX2(), gstate.getScissorY2(), 0); DrawingCoords scissorBR(gstate.getScissorX2(), gstate.getScissorY2(), 0);
Expand All @@ -1362,24 +1365,37 @@ void DrawTriangle(const VertexData& v0, const VertexData& v1, const VertexData&
maxY = std::min(maxY, (int)TransformUnit::DrawingToScreen(scissorBR).y); maxY = std::min(maxY, (int)TransformUnit::DrawingToScreen(scissorBR).y);


// 32 because we do two pixels at once, and we don't want overlap. // 32 because we do two pixels at once, and we don't want overlap.
int range = (maxY - minY) / 32 + 1; int rangeY = (maxY - minY) / 32 + 1;
if (gstate.isModeClear()) { int rangeX = (maxX - minX) / 32 + 1;
if (range >= 12 && (maxX - minX) >= 24 * 16) { if (rangeY >= 12 && rangeX >= rangeY * 4) {
if (gstate.isModeClear()) {
auto bound = [&](int a, int b) -> void { auto bound = [&](int a, int b) -> void {
DrawTriangleSlice<true>(v0, v1, v2, minX, minY, maxX, maxY, a, b); DrawTriangleSlice<true>(v0, v1, v2, minX, minY, maxX, maxY, false, a, b);
}; };
GlobalThreadPool::Loop(bound, 0, range); GlobalThreadPool::Loop(bound, 0, rangeX);
} else { } else {
DrawTriangleSlice<true>(v0, v1, v2, minX, minY, maxX, maxY, 0, range); auto bound = [&](int a, int b) -> void {
DrawTriangleSlice<false>(v0, v1, v2, minX, minY, maxX, maxY, false, a, b);
};
GlobalThreadPool::Loop(bound, 0, rangeX);
} }
} else { } else if (rangeY >= 12 && rangeX >= 12) {
if (range >= 12 && (maxX - minX) >= 24 * 16) { if (gstate.isModeClear()) {
auto bound = [&](int a, int b) -> void {
DrawTriangleSlice<true>(v0, v1, v2, minX, minY, maxX, maxY, true, a, b);
};
GlobalThreadPool::Loop(bound, 0, rangeY);
} else {
auto bound = [&](int a, int b) -> void { auto bound = [&](int a, int b) -> void {
DrawTriangleSlice<false>(v0, v1, v2, minX, minY, maxX, maxY, a, b); DrawTriangleSlice<false>(v0, v1, v2, minX, minY, maxX, maxY, true, a, b);
}; };
GlobalThreadPool::Loop(bound, 0, range); GlobalThreadPool::Loop(bound, 0, rangeY);
}
} else {
if (gstate.isModeClear()) {
DrawTriangleSlice<true>(v0, v1, v2, minX, minY, maxX, maxY, true, 0, rangeY);
} else { } else {
DrawTriangleSlice<false>(v0, v1, v2, minX, minY, maxX, maxY, 0, range); DrawTriangleSlice<false>(v0, v1, v2, minX, minY, maxX, maxY, true, 0, rangeY);
} }
} }
} }
Expand Down Expand Up @@ -1460,6 +1476,138 @@ void DrawPoint(const VertexData &v0)
} }
} }


void ClearRectangle(const VertexData &v0, const VertexData &v1)
{
int minX = std::min(v0.screenpos.x, v1.screenpos.x) & ~0xF;
int minY = std::min(v0.screenpos.y, v1.screenpos.y) & ~0xF;
int maxX = (std::max(v0.screenpos.x, v1.screenpos.x) + 0xF) & ~0xF;
int maxY = (std::max(v0.screenpos.y, v1.screenpos.y) + 0xF) & ~0xF;

DrawingCoords scissorTL(gstate.getScissorX1(), gstate.getScissorY1(), 0);
DrawingCoords scissorBR(gstate.getScissorX2(), gstate.getScissorY2(), 0);
minX = std::max(minX, (int)TransformUnit::DrawingToScreen(scissorTL).x);
maxX = std::max(0, std::min(maxX, (int)TransformUnit::DrawingToScreen(scissorBR).x));
minY = std::max(minY, (int)TransformUnit::DrawingToScreen(scissorTL).y);
maxY = std::max(0, std::min(maxY, (int)TransformUnit::DrawingToScreen(scissorBR).y));

const int w = (maxX - minX) / 16;
if (w <= 0)
return;

if (gstate.isClearModeDepthMask()) {
ScreenCoords pprime(minX, minY, 0);
const u16 z = v1.screenpos.z;
const int stride = gstate.DepthBufStride();

for (pprime.y = minY; pprime.y < maxY; pprime.y += 16) {
DrawingCoords p = TransformUnit::ScreenToDrawing(pprime);

if ((z & 0xFF) == (z >> 8)) {
u16 *row = &depthbuf.as16[p.x + p.y * stride];
memset(row, z, w * 2);
} else {
for (int x = 0; x < w; ++x) {
SetPixelDepth(p.x + x, p.y, z);
}
}
}
}

const u32 new_color = v1.color0.ToRGBA();
u16 new_color16;

// Note: this stays 0xFFFFFFFF if keeping color and alpha, even for 16-bit.
u32 keepOldMask = 0xFFFFFFFF;
switch (gstate.FrameBufFormat()) {
case GE_FORMAT_565:
new_color16 = RGBA8888ToRGB565(new_color);
if (gstate.isClearModeColorMask())
keepOldMask = 0;
break;

case GE_FORMAT_5551:
new_color16 = RGBA8888ToRGBA5551(new_color);
if (gstate.isClearModeColorMask())
keepOldMask &= 0x00008000;
if (gstate.isClearModeAlphaMask())
keepOldMask &= 0x00007FFF;
break;

case GE_FORMAT_4444:
new_color16 = RGBA8888ToRGBA4444(new_color);
if (gstate.isClearModeColorMask())
keepOldMask &= 0x0000F000;
if (gstate.isClearModeAlphaMask())
keepOldMask &= 0x00000FFF;
break;

case GE_FORMAT_8888:
if (gstate.isClearModeColorMask())
keepOldMask &= 0xFF000000;
if (gstate.isClearModeAlphaMask())
keepOldMask &= 0x00FFFFFF;
break;

case GE_FORMAT_INVALID:
_dbg_assert_msg_(G3D, false, "Software: invalid framebuf format.");
break;
}

if (keepOldMask == 0) {
ScreenCoords pprime(minX, minY, 0);
const int stride = gstate.FrameBufStride();

if (gstate.FrameBufFormat() == GE_FORMAT_8888) {
for (pprime.y = minY; pprime.y < maxY; pprime.y += 16) {
DrawingCoords p = TransformUnit::ScreenToDrawing(pprime);
if ((new_color & 0xFF) == (new_color >> 8) && (new_color && 0xFFFF) == (new_color >> 16)) {
u32 *row = &fb.as32[p.x + p.y * stride];
memset(row, new_color, w * 4);
} else {
for (int x = 0; x < w; ++x) {
fb.Set32(p.x + x, p.y, stride, new_color);
}
}
}
} else {
for (pprime.y = minY; pprime.y < maxY; pprime.y += 16) {
DrawingCoords p = TransformUnit::ScreenToDrawing(pprime);
if ((new_color16 & 0xFF) == (new_color16 >> 8)) {
u16 *row = &fb.as16[p.x + p.y * stride];
memset(row, new_color16, w * 2);
} else {
for (int x = 0; x < w; ++x) {
fb.Set16(p.x + x, p.y, stride, new_color16);
}
}
}
}
} else if (keepOldMask != 0xFFFFFFFF) {
ScreenCoords pprime(minX, minY, 0);
const int stride = gstate.FrameBufStride();

if (gstate.FrameBufFormat() == GE_FORMAT_8888) {
for (pprime.y = minY; pprime.y < maxY; pprime.y += 16) {
DrawingCoords p = TransformUnit::ScreenToDrawing(pprime);
for (int x = 0; x < w; ++x) {
const u32 old_color = fb.Get32(p.x + x, p.y, stride);
const u32 c = (old_color & keepOldMask) | (new_color & ~keepOldMask);
fb.Set32(p.x + x, p.y, stride, c);
}
}
} else {
for (pprime.y = minY; pprime.y < maxY; pprime.y += 16) {
DrawingCoords p = TransformUnit::ScreenToDrawing(pprime);
for (int x = 0; x < w; ++x) {
const u16 old_color = fb.Get16(p.x + x, p.y, stride);
const u16 c = (old_color & keepOldMask) | (new_color16 & ~keepOldMask);
fb.Set16(p.x + x, p.y, stride, c);
}
}
}
}
}

void DrawLine(const VertexData &v0, const VertexData &v1) void DrawLine(const VertexData &v0, const VertexData &v1)
{ {
// TODO: Use a proper line drawing algorithm that handles fractional endpoints correctly. // TODO: Use a proper line drawing algorithm that handles fractional endpoints correctly.
Expand Down
1 change: 1 addition & 0 deletions GPU/Software/Rasterizer.h
Original file line number Original file line Diff line number Diff line change
Expand Up @@ -27,6 +27,7 @@ namespace Rasterizer {
void DrawTriangle(const VertexData& v0, const VertexData& v1, const VertexData& v2); void DrawTriangle(const VertexData& v0, const VertexData& v1, const VertexData& v2);
void DrawPoint(const VertexData &v0); void DrawPoint(const VertexData &v0);
void DrawLine(const VertexData &v0, const VertexData &v1); void DrawLine(const VertexData &v0, const VertexData &v1);
void ClearRectangle(const VertexData &v0, const VertexData &v1);


bool GetCurrentStencilbuffer(GPUDebugBuffer &buffer); bool GetCurrentStencilbuffer(GPUDebugBuffer &buffer);
bool GetCurrentTexture(GPUDebugBuffer &buffer, int level); bool GetCurrentTexture(GPUDebugBuffer &buffer, int level);
Expand Down
15 changes: 12 additions & 3 deletions GPU/Software/SoftGpu.cpp
Original file line number Original file line Diff line number Diff line change
Expand Up @@ -155,22 +155,31 @@ void SoftGPU::CopyToCurrentFboFromDisplayRam(int srcwidth, int srcheight) {
fbTex = nullptr; fbTex = nullptr;
} }


// For accuracy, try to handle 0 stride - sometimes used.
if (displayStride_ == 0) {
srcheight = 1;
}

Draw::TextureDesc desc{}; Draw::TextureDesc desc{};
desc.type = Draw::TextureType::LINEAR2D; desc.type = Draw::TextureType::LINEAR2D;
desc.format = Draw::DataFormat::R8G8B8A8_UNORM; desc.format = Draw::DataFormat::R8G8B8A8_UNORM;
desc.depth = 1; desc.depth = 1;
desc.mipLevels = 1; desc.mipLevels = 1;
bool hasImage = true; bool hasImage = true;
if (!Memory::IsValidAddress(displayFramebuf_)) { if (!Memory::IsValidAddress(displayFramebuf_) || srcwidth == 0 || srcheight == 0) {
hasImage = false; hasImage = false;
u1 = 1.0f; u1 = 1.0f;
} else if (displayFormat_ == GE_FORMAT_8888) { } else if (displayFormat_ == GE_FORMAT_8888) {
u8 *data = Memory::GetPointer(displayFramebuf_); u8 *data = Memory::GetPointer(displayFramebuf_);
desc.width = displayStride_; desc.width = displayStride_ == 0 ? srcwidth : displayStride_;
desc.height = srcheight; desc.height = srcheight;
desc.initData.push_back(data); desc.initData.push_back(data);
desc.format = Draw::DataFormat::R8G8B8A8_UNORM; desc.format = Draw::DataFormat::R8G8B8A8_UNORM;
u1 = (float)srcwidth / displayStride_; if (displayStride_ != 0) {
u1 = (float)srcwidth / displayStride_;
} else {
u1 = 1.0f;
}
} else { } else {
// TODO: This should probably be converted in a shader instead.. // TODO: This should probably be converted in a shader instead..
fbTexBuffer.resize(srcwidth * srcheight); fbTexBuffer.resize(srcwidth * srcheight);
Expand Down
15 changes: 12 additions & 3 deletions UI/EmuScreen.cpp
Original file line number Original file line Diff line number Diff line change
Expand Up @@ -965,7 +965,11 @@ void EmuScreen::preRender() {
bool useBufferedRendering = g_Config.iRenderingMode != FB_NON_BUFFERED_MODE; bool useBufferedRendering = g_Config.iRenderingMode != FB_NON_BUFFERED_MODE;
if ((!useBufferedRendering && !g_Config.bSoftwareRendering) || Core_IsStepping()) { if ((!useBufferedRendering && !g_Config.bSoftwareRendering) || Core_IsStepping()) {
// We need to clear here already so that drawing during the frame is done on a clean slate. // We need to clear here already so that drawing during the frame is done on a clean slate.
draw->BindFramebufferAsRenderTarget(nullptr, { RPAction::CLEAR, RPAction::CLEAR, 0xFF000000 }); if (Core_IsStepping()) {
draw->BindFramebufferAsRenderTarget(nullptr, { RPAction::KEEP, RPAction::DONT_CARE });
} else {
draw->BindFramebufferAsRenderTarget(nullptr, { RPAction::CLEAR, RPAction::CLEAR, 0xFF000000 });
}


Viewport viewport; Viewport viewport;
viewport.TopLeftX = 0; viewport.TopLeftX = 0;
Expand Down Expand Up @@ -1029,6 +1033,9 @@ void EmuScreen::render() {
if (coreState == CORE_NEXTFRAME) { if (coreState == CORE_NEXTFRAME) {
// set back to running for the next frame // set back to running for the next frame
coreState = CORE_RUNNING; coreState = CORE_RUNNING;
} else if (coreState == CORE_STEPPING) {
// If we're stepping, it's convenient not to clear the screen.
thin3d->BindFramebufferAsRenderTarget(nullptr, { RPAction::KEEP, RPAction::DONT_CARE });
} else { } else {
// Didn't actually reach the end of the frame, ran out of the blockTicks cycles. // Didn't actually reach the end of the frame, ran out of the blockTicks cycles.
// In this case we need to bind and wipe the backbuffer, at least. // In this case we need to bind and wipe the backbuffer, at least.
Expand All @@ -1040,8 +1047,10 @@ void EmuScreen::render() {
PSP_EndHostFrame(); PSP_EndHostFrame();
if (invalid_) if (invalid_)
return; return;


if (!osm.IsEmpty() || g_Config.bShowDebugStats || g_Config.iShowFPSCounter || g_Config.bShowTouchControls || g_Config.bShowDeveloperMenu || g_Config.bShowAudioDebug || saveStatePreview_->GetVisibility() != UI::V_GONE || g_Config.bShowFrameProfiler) { const bool hasVisibileUI = !osm.IsEmpty() || saveStatePreview_->GetVisibility() != UI::V_GONE || g_Config.bShowTouchControls;
const bool showDebugUI = g_Config.bShowDebugStats || g_Config.bShowDeveloperMenu || g_Config.bShowAudioDebug || g_Config.bShowFrameProfiler;
if (hasVisibileUI || showDebugUI || g_Config.iShowFPSCounter != 0) {
// This sets up some important states but not the viewport. // This sets up some important states but not the viewport.
screenManager()->getUIContext()->Begin(); screenManager()->getUIContext()->Begin();


Expand Down
7 changes: 4 additions & 3 deletions ext/native/thin3d/thin3d_vulkan.cpp
Original file line number Original file line Diff line number Diff line change
Expand Up @@ -312,8 +312,9 @@ struct DescriptorSetKey {
class VKTexture : public Texture { class VKTexture : public Texture {
public: public:
VKTexture(VulkanContext *vulkan, VkCommandBuffer cmd, const TextureDesc &desc) VKTexture(VulkanContext *vulkan, VkCommandBuffer cmd, const TextureDesc &desc)
: vulkan_(vulkan), format_(desc.format), mipLevels_(desc.mipLevels) { : vulkan_(vulkan), mipLevels_(desc.mipLevels), format_(desc.format) {
Create(cmd, desc); bool result = Create(cmd, desc);
assert(result);
} }


~VKTexture() { ~VKTexture() {
Expand All @@ -336,7 +337,7 @@ class VKTexture : public Texture {
} }


VulkanContext *vulkan_; VulkanContext *vulkan_;
VulkanTexture *vkTex_; VulkanTexture *vkTex_ = nullptr;


int mipLevels_; int mipLevels_;


Expand Down

0 comments on commit 98191f0

Please sign in to comment.