diff --git a/.gitignore b/.gitignore index 2beadcfc838b..918734392741 100644 --- a/.gitignore +++ b/.gitignore @@ -68,6 +68,7 @@ build.ios versionname.txt versioncode.txt build*/ +android/.cxx # Temp file used by jenkins windows build (TODO: remove) desc.txt diff --git a/Core/Compatibility.cpp b/Core/Compatibility.cpp index ff2d6d23fadb..b0064470724c 100644 --- a/Core/Compatibility.cpp +++ b/Core/Compatibility.cpp @@ -67,6 +67,8 @@ void Compatibility::CheckSettings(IniFile &iniFile, const std::string &gameID) { CheckSetting(iniFile, gameID, "JitInvalidationHack", &flags_.JitInvalidationHack); CheckSetting(iniFile, gameID, "HideISOFiles", &flags_.HideISOFiles); CheckSetting(iniFile, gameID, "MoreAccurateVMMUL", &flags_.MoreAccurateVMMUL); + CheckSetting(iniFile, gameID, "ForceSoftwareRenderer", &flags_.ForceSoftwareRenderer); + CheckSetting(iniFile, gameID, "DarkStalkersPresentHack", &flags_.DarkStalkersPresentHack); } void Compatibility::CheckSetting(IniFile &iniFile, const std::string &gameID, const char *option, bool *flag) { diff --git a/Core/Compatibility.h b/Core/Compatibility.h index 0938b4c731fb..0baf9db5fac6 100644 --- a/Core/Compatibility.h +++ b/Core/Compatibility.h @@ -67,6 +67,8 @@ struct CompatFlags { bool JitInvalidationHack; bool HideISOFiles; bool MoreAccurateVMMUL; + bool ForceSoftwareRenderer; + bool DarkStalkersPresentHack; }; class IniFile; diff --git a/Core/HLE/sceUtility.cpp b/Core/HLE/sceUtility.cpp index 1a9c9947015d..3eb751436b6e 100644 --- a/Core/HLE/sceUtility.cpp +++ b/Core/HLE/sceUtility.cpp @@ -130,7 +130,7 @@ enum UtilityDialogType { // Only a single dialog is allowed at a time. static UtilityDialogType currentDialogType; -static bool currentDialogActive; +bool currentDialogActive; static PSPSaveDialog saveDialog; static PSPMsgDialog msgDialog; static PSPOskDialog oskDialog; diff --git a/Core/System.cpp b/Core/System.cpp index 266eab30e3a8..f36cbcec8359 100644 --- a/Core/System.cpp +++ b/Core/System.cpp @@ -349,6 +349,11 @@ bool PSP_InitStart(const CoreParameter &coreParam, std::string *error_string) { CPU_Init(); + // Compat flags get loaded in CPU_Init (which is a bit of a misnomer) so we check for SW renderer here. + if (g_Config.bSoftwareRendering || PSP_CoreParameter().compat.flags().ForceSoftwareRenderer) { + coreParameter.gpuCore = GPUCORE_SOFTWARE; + } + *error_string = coreParameter.errorString; bool success = coreParameter.fileToStart != ""; if (!success) { diff --git a/GPU/GPU.vcxproj b/GPU/GPU.vcxproj index 53903c75c132..44da1289345f 100644 --- a/GPU/GPU.vcxproj +++ b/GPU/GPU.vcxproj @@ -197,6 +197,7 @@ MultiThreadedDebug Common/DbgNew.h ProgramDatabase + false true diff --git a/GPU/Math3D.h b/GPU/Math3D.h index 292c63be0529..ada1a6931eda 100644 --- a/GPU/Math3D.h +++ b/GPU/Math3D.h @@ -625,6 +625,10 @@ class Vec4 *this = *this / f; } + bool operator ==(const Vec4 &other) const { + return x == other.x && y == other.y && z == other.z && w == other.w; + } + T Length2() const { return x*x + y*y + z*z + w*w; diff --git a/GPU/Software/Clipper.cpp b/GPU/Software/Clipper.cpp index eb181ce6131b..6d22d2e7ee03 100644 --- a/GPU/Software/Clipper.cpp +++ b/GPU/Software/Clipper.cpp @@ -17,6 +17,8 @@ #include +#include "Core/System.h" + #include "GPU/GPUState.h" #include "GPU/Software/Clipper.h" @@ -24,6 +26,11 @@ #include "profiler/profiler.h" + +extern bool g_DarkStalkerStretch; +// For Darkstalkers hack. Ugh. +extern bool currentDialogActive; + namespace Clipper { enum { @@ -49,39 +56,36 @@ static inline int CalcClipMask(const ClipCoords& v) return mask; } -#define AddInterpolatedVertex(t, out, in, numVertices) \ -{ \ - Vertices[numVertices]->Lerp(t, *Vertices[out], *Vertices[in]); \ - numVertices++; \ +inline bool different_signs(float x, float y) { + return ((x <= 0 && y > 0) || (x > 0 && y <= 0)); } -#define DIFFERENT_SIGNS(x,y) ((x <= 0 && y > 0) || (x > 0 && y <= 0)) - -#define CLIP_DOTPROD(I, A, B, C, D) \ - (Vertices[I]->clippos.x * A + Vertices[I]->clippos.y * B + Vertices[I]->clippos.z * C + Vertices[I]->clippos.w * D) +inline float clip_dotprod(const VertexData &vert, float A, float B, float C, float D) { + return (vert.clippos.x * A + vert.clippos.y * B + vert.clippos.z * C + vert.clippos.w * D); +} #define POLY_CLIP( PLANE_BIT, A, B, C, D ) \ { \ if (mask & PLANE_BIT) { \ int idxPrev = inlist[0]; \ - float dpPrev = CLIP_DOTPROD(idxPrev, A, B, C, D ); \ + float dpPrev = clip_dotprod(*Vertices[idxPrev], A, B, C, D );\ int outcount = 0; \ \ inlist[n] = inlist[0]; \ for (int j = 1; j <= n; j++) { \ int idx = inlist[j]; \ - float dp = CLIP_DOTPROD(idx, A, B, C, D ); \ + float dp = clip_dotprod(*Vertices[idx], A, B, C, D ); \ if (dpPrev >= 0) { \ outlist[outcount++] = idxPrev; \ } \ \ - if (DIFFERENT_SIGNS(dp, dpPrev)) { \ + if (different_signs(dp, dpPrev)) { \ if (dp < 0) { \ float t = dp / (dp - dpPrev); \ - AddInterpolatedVertex(t, idx, idxPrev, numVertices); \ + Vertices[numVertices++]->Lerp(t, *Vertices[idx], *Vertices[idxPrev]); \ } else { \ float t = dpPrev / (dpPrev - dp); \ - AddInterpolatedVertex(t, idxPrev, idx, numVertices); \ + Vertices[numVertices++]->Lerp(t, *Vertices[idxPrev], *Vertices[idx]); \ } \ outlist[outcount++] = numVertices - 1; \ } \ @@ -104,25 +108,23 @@ static inline int CalcClipMask(const ClipCoords& v) #define CLIP_LINE(PLANE_BIT, A, B, C, D) \ { \ - if (mask & PLANE_BIT) { \ - float dp0 = CLIP_DOTPROD(0, A, B, C, D ); \ - float dp1 = CLIP_DOTPROD(1, A, B, C, D ); \ - int i = 0; \ + if (mask & PLANE_BIT) { \ + float dp0 = clip_dotprod(*Vertices[0], A, B, C, D ); \ + float dp1 = clip_dotprod(*Vertices[1], A, B, C, D ); \ + int numVertices = 0; \ \ if (mask0 & PLANE_BIT) { \ if (dp0 < 0) { \ float t = dp1 / (dp1 - dp0); \ - i = 0; \ - AddInterpolatedVertex(t, 1, 0, i); \ + Vertices[0]->Lerp(t, *Vertices[1], *Vertices[0]); \ } \ } \ - dp0 = CLIP_DOTPROD(0, A, B, C, D ); \ + dp0 = clip_dotprod(*Vertices[0], A, B, C, D ); \ \ if (mask1 & PLANE_BIT) { \ if (dp1 < 0) { \ float t = dp1 / (dp1- dp0); \ - i = 1; \ - AddInterpolatedVertex(t, 1, 0, i); \ + Vertices[1]->Lerp(t, *Vertices[1], *Vertices[0]); \ } \ } \ } \ @@ -139,8 +141,11 @@ static void RotateUVThrough(const VertexData &tl, const VertexData &br, VertexDa } } +bool needsClear = false; + void ProcessRect(const VertexData& v0, const VertexData& v1) { + g_DarkStalkerStretch = false; if (!gstate.isModeThrough()) { VertexData buf[4]; buf[0].clippos = ClipCoords(v0.clippos.x, v0.clippos.y, v1.clippos.z, v1.clippos.w); @@ -182,6 +187,44 @@ void ProcessRect(const VertexData& v0, const VertexData& v1) ProcessTriangle(*topleft, *bottomleft, *bottomright, buf[3]); } else { // through mode handling + + // Check for 1:1 texture mapping. In that case we can call DrawSprite. + int xdiff = v1.screenpos.x - v0.screenpos.x; + int ydiff = v1.screenpos.y - v0.screenpos.y; + int udiff = (v1.texturecoords.x - v0.texturecoords.x) * 16.0f; + int vdiff = (v1.texturecoords.y - v0.texturecoords.y) * 16.0f; + bool coord_check = + (xdiff == udiff || xdiff == -udiff) && + (ydiff == vdiff || ydiff == -vdiff); + bool state_check = !gstate.isModeClear(); // TODO: Add support for clear modes in Rasterizer::DrawSprite. + if ((coord_check || !gstate.isTextureMapEnabled()) && state_check) { + Rasterizer::DrawSprite(v0, v1); + return; + } + + // Eliminate the stretch blit in DarkStalkers. + // We compensate for that when blitting the framebuffer in SoftGpu.cpp. + if (PSP_CoreParameter().compat.flags().DarkStalkersPresentHack && v0.texturecoords.x == 64.0f && v0.texturecoords.y == 16.0f && v1.texturecoords.x == 448.0f && v1.texturecoords.y == 240.0f) { + if (v0.screenpos.x == 0x7100 && v0.screenpos.y == 0x7780 && v1.screenpos.x == 0x8f00 && v1.screenpos.y == 0x8880) { + // Also check for save/load dialog. + if (!currentDialogActive) { + g_DarkStalkerStretch = true; + if (needsClear) { + needsClear = false; + // Afterwards, we also need to clear the actual destination. Can do a fast rectfill. + gstate.textureMapEnable &= ~1; + VertexData newV0 = v0; + newV0.color0 = Vec4(0, 0, 0, 255); + Rasterizer::DrawSprite(newV0, v1); + gstate.textureMapEnable |= 1; + } + return; + } else { + needsClear = true; + } + } // else, handle the Capcom screen stretch, or the non-wide stretch? Or let's just not bother. + } + VertexData buf[4]; buf[0].screenpos = ScreenCoords(v0.screenpos.x, v0.screenpos.y, v1.screenpos.z); buf[0].texturecoords = v0.texturecoords; @@ -196,7 +239,7 @@ void ProcessRect(const VertexData& v0, const VertexData& v1) // Color and depth values of second vertex are used for the whole rectangle buf[0].color0 = buf[1].color0 = buf[2].color0 = buf[3].color0; - buf[0].color1 = buf[1].color1 = buf[2].color1 = buf[3].color1; + buf[0].color1 = buf[1].color1 = buf[2].color1 = buf[3].color1; // is color1 ever used in through mode? buf[0].clippos.w = buf[1].clippos.w = buf[2].clippos.w = buf[3].clippos.w = 1.0f; buf[0].fogdepth = buf[1].fogdepth = buf[2].fogdepth = buf[3].fogdepth = 1.0f; diff --git a/GPU/Software/Rasterizer.cpp b/GPU/Software/Rasterizer.cpp index 5db77b9407ff..71560776b852 100644 --- a/GPU/Software/Rasterizer.cpp +++ b/GPU/Software/Rasterizer.cpp @@ -1287,6 +1287,200 @@ void DrawTriangleSlice( } } +// Through mode, with the specific Darkstalker settings. +inline void DrawSinglePixel5551(u16 *pixel, const Vec4 &color_in) { + u32 new_color; + if (color_in.a() == 255) { + new_color = color_in.ToRGBA() & 0xFFFFFF; + } else { + const u32 old_color = RGBA5551ToRGBA8888(*pixel); + const Vec4 dst = Vec4::FromRGBA(old_color); + Vec3 blended = AlphaBlendingResult(color_in, dst); + // ToRGB() always automatically clamps. + new_color = blended.ToRGB(); + } + + new_color |= (*pixel & 0x8000) ? 0xff000000 : 0x00000000; + *pixel = RGBA8888ToRGBA5551(new_color); +} + +static inline Vec4 ModulateRGBA(const Vec4& prim_color, const Vec4& texcolor) { + Vec3 out_rgb; + int out_a; + +#if defined(_M_SSE) + // We can be accurate up to 24 bit integers, should be enough. + const __m128 p = _mm_cvtepi32_ps(prim_color.ivec); + const __m128 t = _mm_cvtepi32_ps(texcolor.ivec); + const __m128 b = _mm_mul_ps(p, t); + if (gstate.isColorDoublingEnabled()) { + // We double right here, only for modulate. Other tex funcs do not color double. + const __m128 doubleColor = _mm_setr_ps(2.0f / 255.0f, 2.0f / 255.0f, 2.0f / 255.0f, 1.0f / 255.0f); + out_rgb.ivec = _mm_cvtps_epi32(_mm_mul_ps(b, doubleColor)); + } else { + out_rgb.ivec = _mm_cvtps_epi32(_mm_mul_ps(b, _mm_set_ps1(1.0f / 255.0f))); + } + return Vec4(out_rgb.ivec); +#else + if (gstate.isColorDoublingEnabled()) { + out_rgb = (prim_color.rgb() * texcolor.rgb() * 2) / 255; + } else { + out_rgb = prim_color.rgb() * texcolor.rgb() / 255; + } + out_a = (prim_color.a() * texcolor.a() / 255); +#endif + + return Vec4(out_rgb.r(), out_rgb.g(), out_rgb.b(), out_a); + +} + +void DrawSprite(const VertexData& v0, const VertexData& v1) { + const u8 *texptr = nullptr; + + GETextureFormat texfmt = gstate.getTextureFormat(); + u32 texaddr = gstate.getTextureAddress(0); + int texbufw = GetTextureBufw(0, texaddr, texfmt); + if (Memory::IsValidAddress(texaddr)) + texptr = Memory::GetPointerUnchecked(texaddr); + + ScreenCoords pprime(v0.screenpos.x, v0.screenpos.y, 0); + Sampler::NearestFunc nearestFunc = Sampler::GetNearestFunc(); // Looks at gstate. + + DrawingCoords pos0 = TransformUnit::ScreenToDrawing(v0.screenpos); + DrawingCoords pos1 = TransformUnit::ScreenToDrawing(v1.screenpos); + + DrawingCoords scissorTL(gstate.getScissorX1(), gstate.getScissorY1(), 0); + DrawingCoords scissorBR(gstate.getScissorX2(), gstate.getScissorY2(), 0); + + int z = pos0.z; + float fog = 1.0f; + + bool isWhite = v0.color0 == Vec4(255, 255, 255, 255); + + if (gstate.isTextureMapEnabled()) { + // 1:1 (but with mirror support) texture mapping! + int s_start = v0.texturecoords.x; + int t_start = v0.texturecoords.y; + int ds = v1.texturecoords.x > v0.texturecoords.x ? 1 : -1; + int dt = v1.texturecoords.y > v0.texturecoords.y ? 1 : -1; + + if (ds < 0) { + s_start += ds; + } + if (dt < 0) { + t_start += dt; + } + + // First clip the right and bottom sides, since we don't need to adjust the deltas. + if (pos1.x > scissorBR.x) pos1.x = scissorBR.x + 1; + if (pos1.y > scissorBR.y) pos1.y = scissorBR.y + 1; + // Now clip the other sides. + if (pos0.x < scissorTL.x) { + s_start += (scissorTL.x - pos0.x) * ds; + pos0.x = scissorTL.x; + } + if (pos0.y < scissorTL.y) { + t_start += (scissorTL.y - pos0.y) * dt; + pos0.y = scissorTL.y; + } + + if (!gstate.isStencilTestEnabled() && + !gstate.isDepthTestEnabled() && + !gstate.isLogicOpEnabled() && + !gstate.isColorTestEnabled() && + !gstate.isDitherEnabled() && + gstate.isAlphaTestEnabled() && + gstate.getAlphaTestRef() == 0 && + gstate.getAlphaTestMask() == 0xFF && + gstate.isAlphaBlendEnabled() && + gstate.isTextureAlphaUsed() && + gstate.getTextureFunction() == GE_TEXFUNC_MODULATE && + gstate.getColorMask() == 0x000000 && + gstate.FrameBufFormat() == GE_FORMAT_5551) { + int t = t_start; + for (int y = pos0.y; y < pos1.y; y++) { + int s = s_start; + u16 *pixel = fb.Get16Ptr(pos0.x, y, gstate.FrameBufStride()); + if (isWhite) { + for (int x = pos0.x; x < pos1.x; x++) { + u32 tex_color = nearestFunc(s, t, texptr, texbufw, 0); + if (tex_color & 0xFF000000) { + DrawSinglePixel5551(pixel, Vec4::FromRGBA(tex_color)); + } + s += ds; + pixel++; + } + } else { + for (int x = pos0.x; x < pos1.x; x++) { + Vec4 prim_color = v0.color0; + Vec4 tex_color = Vec4::FromRGBA(nearestFunc(s, t, texptr, texbufw, 0)); + prim_color = ModulateRGBA(prim_color, tex_color); + if (prim_color.a() > 0) { + DrawSinglePixel5551(pixel, prim_color); + } + s += ds; + pixel++; + } + } + t += dt; + } + } else { + int t = t_start; + for (int y = pos0.y; y < pos1.y; y++) { + int s = s_start; + // Not really that fast but faster than triangle. + for (int x = pos0.x; x < pos1.x; x++) { + Vec4 prim_color = v0.color0; + Vec4 tex_color = Vec4::FromRGBA(nearestFunc(s, t, texptr, texbufw, 0)); + prim_color = GetTextureFunctionOutput(prim_color, tex_color); + DrawingCoords pos(x, y, z); + DrawSinglePixel(pos, (u16)z, 1.0f, prim_color); + s += ds; + } + t += dt; + } + } + } else { + if (pos1.x > scissorBR.x) pos1.x = scissorBR.x; + if (pos1.y > scissorBR.y) pos1.y = scissorBR.y; + if (pos0.x < scissorTL.x) pos0.x = scissorTL.x; + if (pos0.y < scissorTL.y) pos0.y = scissorTL.y; + if (!gstate.isStencilTestEnabled() && + !gstate.isDepthTestEnabled() && + !gstate.isLogicOpEnabled() && + !gstate.isColorTestEnabled() && + !gstate.isDitherEnabled() && + gstate.isAlphaTestEnabled() && + gstate.getAlphaTestRef() == 0 && + gstate.getAlphaTestMask() == 0xFF && + gstate.isAlphaBlendEnabled() && + gstate.isTextureAlphaUsed() && + gstate.getTextureFunction() == GE_TEXFUNC_MODULATE && + gstate.getColorMask() == 0x000000 && + gstate.FrameBufFormat() == GE_FORMAT_5551) { + if (v0.color0.a() == 0) + return; + + for (int y = pos0.y; y < pos1.y; y++) { + u16 *pixel = fb.Get16Ptr(pos0.x, y, gstate.FrameBufStride()); + for (int x = pos0.x; x < pos1.x; x++) { + Vec4 prim_color = v0.color0; + DrawSinglePixel5551(pixel, prim_color); + pixel++; + } + } + } else { + for (int y = pos0.y; y < pos1.y; y++) { + for (int x = pos0.x; x < pos1.x; x++) { + Vec4 prim_color = v0.color0; + DrawingCoords pos(x, y, z); + DrawSinglePixel(pos, (u16)z, fog, prim_color); + } + } + } + } +} + // Draws triangle, vertices specified in counter-clockwise direction void DrawTriangle(const VertexData& v0, const VertexData& v1, const VertexData& v2) { diff --git a/GPU/Software/Rasterizer.h b/GPU/Software/Rasterizer.h index 53d44e8af0a3..df3075e3a067 100644 --- a/GPU/Software/Rasterizer.h +++ b/GPU/Software/Rasterizer.h @@ -27,6 +27,7 @@ namespace Rasterizer { void DrawTriangle(const VertexData& v0, const VertexData& v1, const VertexData& v2); void DrawPoint(const VertexData &v0); void DrawLine(const VertexData &v0, const VertexData &v1); +void DrawSprite(const VertexData &v0, const VertexData &v1); void ClearRectangle(const VertexData &v0, const VertexData &v1); bool GetCurrentStencilbuffer(GPUDebugBuffer &buffer); diff --git a/GPU/Software/SoftGpu.cpp b/GPU/Software/SoftGpu.cpp index 76f4a8d7b945..7254c2ce11c1 100644 --- a/GPU/Software/SoftGpu.cpp +++ b/GPU/Software/SoftGpu.cpp @@ -73,8 +73,6 @@ SoftGPU::SoftGPU(GraphicsContext *gfxCtx, Draw::DrawContext *draw) }, }; - ShaderModule *vshader = draw_->GetVshaderPreset(VS_TEXTURE_COLOR_2D); - vdata = draw_->CreateBuffer(sizeof(Vertex) * 4, BufferUsageFlag::DYNAMIC | BufferUsageFlag::VERTEXDATA); idata = draw_->CreateBuffer(sizeof(int) * 6, BufferUsageFlag::DYNAMIC | BufferUsageFlag::INDEXDATA); @@ -92,6 +90,14 @@ SoftGPU::SoftGPU(GraphicsContext *gfxCtx, Draw::DrawContext *draw) inputLayout, depth, blendstateOff, rasterNoCull, &vsTexColBufDesc }; texColor = draw_->CreateGraphicsPipeline(pipelineDesc); + + PipelineDesc pipelineDescRBSwizzle{ + Primitive::TRIANGLE_LIST, + { draw_->GetVshaderPreset(VS_TEXTURE_COLOR_2D), draw_->GetFshaderPreset(FS_TEXTURE_COLOR_2D_RB_SWIZZLE) }, + inputLayout, depth, blendstateOff, rasterNoCull, &vsTexColBufDesc + }; + texColorRBSwizzle = draw_->CreateGraphicsPipeline(pipelineDescRBSwizzle); + inputLayout->Release(); depth->Release(); blendstateOff->Release(); @@ -122,6 +128,8 @@ void SoftGPU::DeviceRestore() { SoftGPU::~SoftGPU() { texColor->Release(); texColor = nullptr; + texColorRBSwizzle->Release(); + texColorRBSwizzle = nullptr; if (fbTex) { fbTex->Release(); @@ -148,12 +156,16 @@ void SoftGPU::SetDisplayFramebuffer(u32 framebuf, u32 stride, GEBufferFormat for GPURecord::NotifyDisplay(framebuf, stride, format); } +bool g_DarkStalkerStretch; + // Copies RGBA8 data from RAM to the currently bound render target. void SoftGPU::CopyToCurrentFboFromDisplayRam(int srcwidth, int srcheight) { if (!draw_) return; float u0 = 0.0f; float u1; + float v0 = 1.0f; + float v1 = 0.0f; if (fbTex) { fbTex->Release(); @@ -163,6 +175,9 @@ void SoftGPU::CopyToCurrentFboFromDisplayRam(int srcwidth, int srcheight) { // For accuracy, try to handle 0 stride - sometimes used. if (displayStride_ == 0) { srcheight = 1; + u1 = 1.0f; + } else { + u1 = (float)srcwidth / displayStride_; } Draw::TextureDesc desc{}; @@ -172,7 +187,26 @@ void SoftGPU::CopyToCurrentFboFromDisplayRam(int srcwidth, int srcheight) { desc.mipLevels = 1; desc.tag = "SoftGPU"; bool hasImage = true; - if (!Memory::IsValidAddress(displayFramebuf_) || srcwidth == 0 || srcheight == 0) { + + Draw::Pipeline *pipeline = texColor; + if (PSP_CoreParameter().compat.flags().DarkStalkersPresentHack && displayFormat_ == GE_FORMAT_5551 && g_DarkStalkerStretch) { + u8 *data = Memory::GetPointer(0x04088000); + if (draw_->GetDataFormatSupport(Draw::DataFormat::A1B5G5R5_UNORM_PACK16) & Draw::FMT_TEXTURE) { + // The perfect one. + desc.format = Draw::DataFormat::A1B5G5R5_UNORM_PACK16; + } else if (draw_->GetDataFormatSupport(Draw::DataFormat::A1R5G5B5_UNORM_PACK16) & Draw::FMT_TEXTURE) { + // RB swapped, compensate with a shader. + desc.format = Draw::DataFormat::A1R5G5B5_UNORM_PACK16; + pipeline = texColorRBSwizzle; + } + desc.width = displayStride_ == 0 ? srcwidth : displayStride_; + desc.height = srcheight; + desc.initData.push_back(data); + u0 = 64.5f / 512.0f; + u1 = 447.5f / 512.0f; + v1 = 16.0f / 272.0f; + v0 = 240.0f / 272.0f; + } else if (!Memory::IsValidAddress(displayFramebuf_) || srcwidth == 0 || srcheight == 0) { hasImage = false; u1 = 1.0f; } else if (displayFormat_ == GE_FORMAT_8888) { @@ -181,11 +215,20 @@ void SoftGPU::CopyToCurrentFboFromDisplayRam(int srcwidth, int srcheight) { desc.height = srcheight; desc.initData.push_back(data); desc.format = Draw::DataFormat::R8G8B8A8_UNORM; - if (displayStride_ != 0) { - u1 = (float)srcwidth / displayStride_; - } else { - u1 = 1.0f; + } else if (displayFormat_ == GE_FORMAT_5551) { + u8 *data = Memory::GetPointer(displayFramebuf_); + desc.format = Draw::DataFormat::A1R5G5B5_UNORM_PACK16; + if (draw_->GetDataFormatSupport(Draw::DataFormat::A1B5G5R5_UNORM_PACK16) & Draw::FMT_TEXTURE) { + // The perfect one. + desc.format = Draw::DataFormat::A1B5G5R5_UNORM_PACK16; + } else if (draw_->GetDataFormatSupport(Draw::DataFormat::A1R5G5B5_UNORM_PACK16) & Draw::FMT_TEXTURE) { + // RB swapped, compensate with a shader. + desc.format = Draw::DataFormat::A1R5G5B5_UNORM_PACK16; + pipeline = texColorRBSwizzle; } + desc.width = displayStride_ == 0 ? srcwidth : displayStride_; + desc.height = srcheight; + desc.initData.push_back(data); } else { // TODO: This should probably be converted in a shader instead.. fbTexBuffer.resize(srcwidth * srcheight); @@ -247,12 +290,10 @@ void SoftGPU::CopyToCurrentFboFromDisplayRam(int srcwidth, int srcheight) { x2 -= 1.0f; y2 -= 1.0f; - float v0 = 1.0f; - float v1 = 0.0f; - if (GetGPUBackend() == GPUBackend::VULKAN) { std::swap(v0, v1); } + draw_->BindFramebufferAsRenderTarget(nullptr, { Draw::RPAction::CLEAR, Draw::RPAction::DONT_CARE, Draw::RPAction::DONT_CARE }); Draw::Viewport viewport = { 0.0f, 0.0f, dstwidth, dstheight, 0.0f, 1.0f }; draw_->SetViewports(1, &viewport); @@ -288,7 +329,7 @@ void SoftGPU::CopyToCurrentFboFromDisplayRam(int srcwidth, int srcheight) { Draw::VsTexColUB ub{}; memcpy(ub.WorldViewProj, g_display_rot_matrix.m, sizeof(float) * 16); - draw_->BindPipeline(texColor); + draw_->BindPipeline(pipeline); draw_->UpdateDynamicUniformBuffer(&ub, sizeof(ub)); draw_->BindVertexBuffers(0, 1, &vdata, nullptr); draw_->BindIndexBuffer(idata, 0); diff --git a/GPU/Software/SoftGpu.h b/GPU/Software/SoftGpu.h index f92344742e99..02660b557665 100644 --- a/GPU/Software/SoftGpu.h +++ b/GPU/Software/SoftGpu.h @@ -44,6 +44,10 @@ struct FormatBuffer { inline u32 Get32(int x, int y, int stride) { return as32[x + y * stride]; } + + inline u16 *Get16Ptr(int x, int y, int stride) { + return &as16[x + y * stride]; + } }; class SoftwareDrawEngine; @@ -108,6 +112,7 @@ class SoftGPU : public GPUCommon { Draw::Texture *fbTex; Draw::Pipeline *texColor; + Draw::Pipeline *texColorRBSwizzle; std::vector fbTexBuffer; Draw::SamplerState *samplerNearest = nullptr; diff --git a/GPU/Software/TransformUnit.cpp b/GPU/Software/TransformUnit.cpp index 3b0603fd79b8..82a84976845a 100644 --- a/GPU/Software/TransformUnit.cpp +++ b/GPU/Software/TransformUnit.cpp @@ -319,8 +319,7 @@ void TransformUnit::SubmitPrimitive(void* vertices, void* indices, GEPrimitiveTy VertexReader vreader(buf, vtxfmt, vertex_type); - const int max_vtcs_per_prim = 3; - static VertexData data[max_vtcs_per_prim]; + static VertexData data[4]; // Normally max verts per prim is 3, but we temporarily need 4 to detect rectangles from strips. // This is the index of the next vert in data (or higher, may need modulus.) static int data_index = 0; @@ -439,6 +438,62 @@ void TransformUnit::SubmitPrimitive(void* vertices, void* indices, GEPrimitiveTy // Don't draw a triangle when loading the first two vertices. int skip_count = data_index >= 2 ? 0 : 2 - data_index; + // If index count == 4, check if we can convert to a rectangle. + // This is for Darkstalkers (and should speed up many 2D games). + if (vertex_count == 4 && gstate.isModeThrough()) { + for (int vtx = 0; vtx < 4; ++vtx) { + if (indices) { + vreader.Goto(ConvertIndex(vtx) - index_lower_bound); + } + else { + vreader.Goto(vtx); + } + data[vtx] = ReadVertex(vreader); + } + + // OK, now let's look at data to detect rectangles. There are a few possibilities + // but we focus on Darkstalkers for now. + if (data[0].screenpos.x == data[1].screenpos.x && + data[0].screenpos.y == data[2].screenpos.y && + data[2].screenpos.x == data[3].screenpos.x && + data[1].screenpos.y == data[3].screenpos.y && + data[1].screenpos.y > data[0].screenpos.y && // Avoid rotation handling + data[2].screenpos.x > data[0].screenpos.x && + data[0].texturecoords.x == data[1].texturecoords.x && + data[0].texturecoords.y == data[2].texturecoords.y && + data[2].texturecoords.x == data[3].texturecoords.x && + data[1].texturecoords.y == data[3].texturecoords.y && + data[1].texturecoords.y > data[0].texturecoords.y && + data[2].texturecoords.x > data[0].texturecoords.x && + data[0].color0 == data[1].color0 && + data[1].color0 == data[2].color0 && + data[2].color0 == data[3].color0) { + // It's a rectangle! + Clipper::ProcessRect(data[0], data[3]); + break; + } + // There's the other vertex order too... + if (data[0].screenpos.x == data[2].screenpos.x && + data[0].screenpos.y == data[1].screenpos.y && + data[1].screenpos.x == data[3].screenpos.x && + data[2].screenpos.y == data[3].screenpos.y && + data[2].screenpos.y > data[0].screenpos.y && // Avoid rotation handling + data[1].screenpos.x > data[0].screenpos.x && + data[0].texturecoords.x == data[2].texturecoords.x && + data[0].texturecoords.y == data[1].texturecoords.y && + data[1].texturecoords.x == data[3].texturecoords.x && + data[2].texturecoords.y == data[3].texturecoords.y && + data[2].texturecoords.y > data[0].texturecoords.y && + data[1].texturecoords.x > data[0].texturecoords.x && + data[0].color0 == data[1].color0 && + data[1].color0 == data[2].color0 && + data[2].color0 == data[3].color0) { + // It's a rectangle! + Clipper::ProcessRect(data[0], data[3]); + break; + } + } + for (int vtx = 0; vtx < vertex_count; ++vtx) { if (indices) { vreader.Goto(ConvertIndex(vtx) - index_lower_bound); diff --git a/UI/EmuScreen.cpp b/UI/EmuScreen.cpp index 4a7a8a466c62..6c062d6a529d 100644 --- a/UI/EmuScreen.cpp +++ b/UI/EmuScreen.cpp @@ -232,9 +232,6 @@ void EmuScreen::bootGame(const std::string &filename) { break; #endif } - if (g_Config.bSoftwareRendering) { - coreParam.gpuCore = GPUCORE_SOFTWARE; - } // Preserve the existing graphics context. coreParam.graphicsContext = PSP_CoreParameter().graphicsContext; diff --git a/Windows/GPU/WindowsVulkanContext.cpp b/Windows/GPU/WindowsVulkanContext.cpp index 09f9f16a0856..7797b58289a5 100644 --- a/Windows/GPU/WindowsVulkanContext.cpp +++ b/Windows/GPU/WindowsVulkanContext.cpp @@ -116,6 +116,7 @@ bool WindowsVulkanContext::Init(HINSTANCE hInst, HWND hWnd, std::string *error_m if (!g_Config.sVulkanDevice.empty()) g_Config.sVulkanDevice = g_Vulkan->GetPhysicalDeviceProperties(deviceNum).properties.deviceName; } + g_Vulkan->ChooseDevice(deviceNum); if (g_Vulkan->CreateDevice() != VK_SUCCESS) { *error_message = g_Vulkan->InitError(); diff --git a/assets/compat.ini b/assets/compat.ini index 662e3b0e2562..c67f0b8360c6 100644 --- a/assets/compat.ini +++ b/assets/compat.ini @@ -683,3 +683,13 @@ UCET00844 = true UCUS98705 = true UCED00971 = true UCUS98713 = true + +[ForceSoftwareRenderer] +# Darkstalkers +ULES00016 = true +ULUS10005 = true + +[DarkStalkersPresentHack] +# Darkstalkers +ULES00016 = true +ULUS10005 = true diff --git a/ext/native/base/display.cpp b/ext/native/base/display.cpp index 3e912788db7f..c6736395519d 100644 --- a/ext/native/base/display.cpp +++ b/ext/native/base/display.cpp @@ -17,7 +17,7 @@ float pixel_in_dps_y = 1.0f; float display_hz = 60.0f; DisplayRotation g_display_rotation; -Lin::Matrix4x4 g_display_rot_matrix; +Lin::Matrix4x4 g_display_rot_matrix = Lin::Matrix4x4::identity(); template void RotateRectToDisplayImpl(DisplayRect &rect, T curRTWidth, T curRTHeight) { diff --git a/ext/native/math/lin/matrix4x4.h b/ext/native/math/lin/matrix4x4.h index c9464384ad58..a9e312de7009 100644 --- a/ext/native/math/lin/matrix4x4.h +++ b/ext/native/math/lin/matrix4x4.h @@ -57,7 +57,11 @@ class Matrix4x4 { empty(); xx=yy=zz=f; ww=1.0f; } - + static Matrix4x4 identity() { + Matrix4x4 id; + id.setIdentity(); + return id; + } void setIdentity() { setScaling(1.0f); } diff --git a/ext/native/thin3d/DataFormat.h b/ext/native/thin3d/DataFormat.h index 467f9e4a0048..f1e168763019 100644 --- a/ext/native/thin3d/DataFormat.h +++ b/ext/native/thin3d/DataFormat.h @@ -30,6 +30,7 @@ enum class DataFormat : uint8_t { R5G5B5A1_UNORM_PACK16, // A1 in the LOWER bit B5G5R5A1_UNORM_PACK16, // A1 in the LOWER bit A1R5G5B5_UNORM_PACK16, // A1 in the UPPER bit. + A1B5G5R5_UNORM_PACK16, // A1 in the UPPER bit. OpenGL-only. R16_FLOAT, R16G16_FLOAT, diff --git a/ext/native/thin3d/thin3d.cpp b/ext/native/thin3d/thin3d.cpp index d142638e07fc..340157708dc6 100644 --- a/ext/native/thin3d/thin3d.cpp +++ b/ext/native/thin3d/thin3d.cpp @@ -146,6 +146,50 @@ static const std::vector fsTexCol = { } }; +static const std::vector fsTexColRBSwizzle = { + {ShaderLanguage::GLSL_ES_200, + "#ifdef GL_ES\n" + "precision lowp float;\n" + "#endif\n" + "#if __VERSION__ >= 130\n" + "#define varying in\n" + "#define texture2D texture\n" + "#define gl_FragColor fragColor0\n" + "out vec4 fragColor0;\n" + "#endif\n" + "varying vec4 oColor0;\n" + "varying vec2 oTexCoord0;\n" + "uniform sampler2D Sampler0;\n" + "void main() { gl_FragColor = texture2D(Sampler0, oTexCoord0).zyxw * oColor0; }\n" + }, + {ShaderLanguage::HLSL_D3D9, + "struct PS_INPUT { float4 color : COLOR0; float2 uv : TEXCOORD0; };\n" + "sampler2D Sampler0 : register(s0);\n" + "float4 main(PS_INPUT input) : COLOR0 {\n" + " return input.color * tex2D(Sampler0, input.uv).zyxw;\n" + "}\n" + }, + {ShaderLanguage::HLSL_D3D11, + "struct PS_INPUT { float4 color : COLOR0; float2 uv : TEXCOORD0; };\n" + "SamplerState samp : register(s0);\n" + "Texture2D tex : register(t0);\n" + "float4 main(PS_INPUT input) : SV_Target {\n" + " float4 col = input.color * tex.Sample(samp, input.uv).bgra;\n" + " return col;\n" + "}\n" + }, + {ShaderLanguage::GLSL_VULKAN, + "#version 140\n" + "#extension GL_ARB_separate_shader_objects : enable\n" + "#extension GL_ARB_shading_language_420pack : enable\n" + "layout(location = 0) in vec4 oColor0;\n" + "layout(location = 1) in vec2 oTexCoord0;\n" + "layout(location = 0) out vec4 fragColor0\n;" + "layout(set = 0, binding = 1) uniform sampler2D Sampler0;\n" + "void main() { fragColor0 = texture(Sampler0, oTexCoord0).bgra * oColor0; }\n" + } +}; + static const std::vector fsCol = { { ShaderLanguage::GLSL_ES_200, "#ifdef GL_ES\n" @@ -330,8 +374,9 @@ bool DrawContext::CreatePresets() { fsPresets_[FS_TEXTURE_COLOR_2D] = CreateShader(this, ShaderStage::FRAGMENT, fsTexCol); fsPresets_[FS_COLOR_2D] = CreateShader(this, ShaderStage::FRAGMENT, fsCol); + fsPresets_[FS_TEXTURE_COLOR_2D_RB_SWIZZLE] = CreateShader(this, ShaderStage::FRAGMENT, fsTexColRBSwizzle); - return vsPresets_[VS_TEXTURE_COLOR_2D] && vsPresets_[VS_COLOR_2D] && fsPresets_[FS_TEXTURE_COLOR_2D] && fsPresets_[FS_COLOR_2D]; + return vsPresets_[VS_TEXTURE_COLOR_2D] && vsPresets_[VS_COLOR_2D] && fsPresets_[FS_TEXTURE_COLOR_2D] && fsPresets_[FS_COLOR_2D] && fsPresets_[FS_TEXTURE_COLOR_2D_RB_SWIZZLE]; } void DrawContext::DestroyPresets() { diff --git a/ext/native/thin3d/thin3d.h b/ext/native/thin3d/thin3d.h index 7488e5ef4710..146ea138c05c 100644 --- a/ext/native/thin3d/thin3d.h +++ b/ext/native/thin3d/thin3d.h @@ -146,6 +146,7 @@ enum VertexShaderPreset : int { enum FragmentShaderPreset : int { FS_COLOR_2D, FS_TEXTURE_COLOR_2D, + FS_TEXTURE_COLOR_2D_RB_SWIZZLE, FS_MAX_PRESET, }; diff --git a/ext/native/thin3d/thin3d_d3d9.cpp b/ext/native/thin3d/thin3d_d3d9.cpp index cdd784b5f1c2..e9c2a8d17123 100644 --- a/ext/native/thin3d/thin3d_d3d9.cpp +++ b/ext/native/thin3d/thin3d_d3d9.cpp @@ -348,6 +348,10 @@ bool D3D9Texture::Create(const TextureDesc &desc) { format_ = desc.format; tex_ = NULL; d3dfmt_ = FormatToD3DFMT(desc.format); + + if (d3dfmt_ == D3DFMT_UNKNOWN) { + return false; + } HRESULT hr = E_FAIL; D3DPOOL pool = D3DPOOL_MANAGED; @@ -424,6 +428,7 @@ void D3D9Texture::SetImageData(int x, int y, int z, int width, int height, int d } break; case DataFormat::A4R4G4B4_UNORM_PACK16: + case DataFormat::A1R5G5B5_UNORM_PACK16: // Native memcpy(dest, source, width * sizeof(uint16_t)); break; @@ -437,6 +442,10 @@ void D3D9Texture::SetImageData(int x, int y, int z, int width, int height, int d case DataFormat::B8G8R8A8_UNORM: memcpy(dest, source, sizeof(uint32_t) * width); break; + default: + // Unhandled data format copy. + DebugBreak(); + break; } } tex_->UnlockRect(level); diff --git a/ext/native/thin3d/thin3d_gl.cpp b/ext/native/thin3d/thin3d_gl.cpp index d31e63f892a0..9b164ec2abb0 100644 --- a/ext/native/thin3d/thin3d_gl.cpp +++ b/ext/native/thin3d/thin3d_gl.cpp @@ -277,7 +277,7 @@ bool OpenGLShaderModule::Compile(GLRenderManager *render, ShaderLanguage languag class OpenGLInputLayout : public InputLayout { public: - OpenGLInputLayout(GLRenderManager *render) : render_(render), stride(0) {} + OpenGLInputLayout(GLRenderManager *render) : render_(render) {} ~OpenGLInputLayout(); void Compile(const InputLayoutDesc &desc); @@ -286,7 +286,7 @@ class OpenGLInputLayout : public InputLayout { } GLRInputLayout *inputLayout_ = nullptr; - int stride; + int stride = 0; private: GLRenderManager *render_; }; @@ -718,6 +718,15 @@ class OpenGLFramebuffer : public Framebuffer { FBColorDepth colorDepth = FBO_8888; }; +// TODO: SSE/NEON optimize, and move to ColorConv.cpp. +void MoveABit(u16 *dest, const u16 *src, size_t count) { + for (int i = 0; i < count; i++) { + u16 data = src[i]; + data = (data >> 15) | (data << 1); + dest[i] = data; + } +} + void OpenGLTexture::SetImageData(int x, int y, int z, int width, int height, int depth, int level, int stride, const uint8_t *data) { if (width != width_ || height != height_ || depth != depth_) { // When switching to texStorage we need to handle this correctly. @@ -729,12 +738,20 @@ void OpenGLTexture::SetImageData(int x, int y, int z, int width, int height, int if (stride == 0) stride = width; - size_t alignment = DataFormatSizeInBytes(format_); // Make a copy of data with stride eliminated. uint8_t *texData = new uint8_t[(size_t)(width * height * alignment)]; - for (int y = 0; y < height; y++) { - memcpy(texData + y * width * alignment, data + y * stride * alignment, width * alignment); + + // Emulate support for DataFormat::A1R5G5B5_UNORM_PACK16. + if (format_ == DataFormat::A1R5G5B5_UNORM_PACK16) { + format_ = DataFormat::R5G5B5A1_UNORM_PACK16; + for (int y = 0; y < height; y++) { + MoveABit((u16 *)(texData + y * width * alignment), (const u16 *)(data + y * stride * alignment), width); + } + } else { + for (int y = 0; y < height; y++) { + memcpy(texData + y * width * alignment, data + y * stride * alignment, width * alignment); + } } render_->TextureImage(tex_, level, width, height, format_, texData); } @@ -1220,6 +1237,9 @@ uint32_t OpenGLContext::GetDataFormatSupport(DataFormat fmt) const { case DataFormat::R8G8B8A8_UNORM: return FMT_RENDERTARGET | FMT_TEXTURE | FMT_INPUTLAYOUT | FMT_AUTOGEN_MIPS; + case DataFormat::A1R5G5B5_UNORM_PACK16: + return FMT_TEXTURE; // we will emulate this! Very fast to convert from R5G5B5A1_UNORM_PACK16 during upload. + case DataFormat::R32_FLOAT: case DataFormat::R32G32_FLOAT: case DataFormat::R32G32B32_FLOAT: diff --git a/ext/native/thin3d/thin3d_vulkan.cpp b/ext/native/thin3d/thin3d_vulkan.cpp index 3de9e59db4dd..3222abf2c04e 100644 --- a/ext/native/thin3d/thin3d_vulkan.cpp +++ b/ext/native/thin3d/thin3d_vulkan.cpp @@ -699,6 +699,7 @@ bool VKTexture::Create(VkCommandBuffer cmd, VulkanPushBuffer *push, const Textur // Gonna have to generate some, which requires TRANSFER_SRC usageBits |= VK_IMAGE_USAGE_TRANSFER_SRC_BIT; } + if (!vkTex_->CreateDirect(cmd, alloc, width_, height_, mipLevels_, vulkanFormat, VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL, usageBits)) { ELOG("Failed to create VulkanTexture: %dx%dx%d fmt %d, %d levels", width_, height_, depth_, (int)vulkanFormat, mipLevels_); return false; @@ -1358,6 +1359,8 @@ uint32_t VKContext::GetDataFormatSupport(DataFormat fmt) const { return 0; case DataFormat::A4R4G4B4_UNORM_PACK16: return 0; + case DataFormat::A1R5G5B5_UNORM_PACK16: + return FMT_RENDERTARGET | FMT_TEXTURE; case DataFormat::R8G8B8A8_UNORM: return FMT_RENDERTARGET | FMT_TEXTURE | FMT_INPUTLAYOUT;