diff --git a/.gitignore b/.gitignore
index 2beadcfc838b..918734392741 100644
--- a/.gitignore
+++ b/.gitignore
@@ -68,6 +68,7 @@ build.ios
versionname.txt
versioncode.txt
build*/
+android/.cxx
# Temp file used by jenkins windows build (TODO: remove)
desc.txt
diff --git a/Core/Compatibility.cpp b/Core/Compatibility.cpp
index ff2d6d23fadb..b0064470724c 100644
--- a/Core/Compatibility.cpp
+++ b/Core/Compatibility.cpp
@@ -67,6 +67,8 @@ void Compatibility::CheckSettings(IniFile &iniFile, const std::string &gameID) {
CheckSetting(iniFile, gameID, "JitInvalidationHack", &flags_.JitInvalidationHack);
CheckSetting(iniFile, gameID, "HideISOFiles", &flags_.HideISOFiles);
CheckSetting(iniFile, gameID, "MoreAccurateVMMUL", &flags_.MoreAccurateVMMUL);
+ CheckSetting(iniFile, gameID, "ForceSoftwareRenderer", &flags_.ForceSoftwareRenderer);
+ CheckSetting(iniFile, gameID, "DarkStalkersPresentHack", &flags_.DarkStalkersPresentHack);
}
void Compatibility::CheckSetting(IniFile &iniFile, const std::string &gameID, const char *option, bool *flag) {
diff --git a/Core/Compatibility.h b/Core/Compatibility.h
index 0938b4c731fb..0baf9db5fac6 100644
--- a/Core/Compatibility.h
+++ b/Core/Compatibility.h
@@ -67,6 +67,8 @@ struct CompatFlags {
bool JitInvalidationHack;
bool HideISOFiles;
bool MoreAccurateVMMUL;
+ bool ForceSoftwareRenderer;
+ bool DarkStalkersPresentHack;
};
class IniFile;
diff --git a/Core/HLE/sceUtility.cpp b/Core/HLE/sceUtility.cpp
index 1a9c9947015d..3eb751436b6e 100644
--- a/Core/HLE/sceUtility.cpp
+++ b/Core/HLE/sceUtility.cpp
@@ -130,7 +130,7 @@ enum UtilityDialogType {
// Only a single dialog is allowed at a time.
static UtilityDialogType currentDialogType;
-static bool currentDialogActive;
+bool currentDialogActive;
static PSPSaveDialog saveDialog;
static PSPMsgDialog msgDialog;
static PSPOskDialog oskDialog;
diff --git a/Core/System.cpp b/Core/System.cpp
index 266eab30e3a8..f36cbcec8359 100644
--- a/Core/System.cpp
+++ b/Core/System.cpp
@@ -349,6 +349,11 @@ bool PSP_InitStart(const CoreParameter &coreParam, std::string *error_string) {
CPU_Init();
+ // Compat flags get loaded in CPU_Init (which is a bit of a misnomer) so we check for SW renderer here.
+ if (g_Config.bSoftwareRendering || PSP_CoreParameter().compat.flags().ForceSoftwareRenderer) {
+ coreParameter.gpuCore = GPUCORE_SOFTWARE;
+ }
+
*error_string = coreParameter.errorString;
bool success = coreParameter.fileToStart != "";
if (!success) {
diff --git a/GPU/GPU.vcxproj b/GPU/GPU.vcxproj
index 53903c75c132..44da1289345f 100644
--- a/GPU/GPU.vcxproj
+++ b/GPU/GPU.vcxproj
@@ -197,6 +197,7 @@
MultiThreadedDebug
Common/DbgNew.h
ProgramDatabase
+ false
true
diff --git a/GPU/Math3D.h b/GPU/Math3D.h
index 292c63be0529..ada1a6931eda 100644
--- a/GPU/Math3D.h
+++ b/GPU/Math3D.h
@@ -625,6 +625,10 @@ class Vec4
*this = *this / f;
}
+ bool operator ==(const Vec4 &other) const {
+ return x == other.x && y == other.y && z == other.z && w == other.w;
+ }
+
T Length2() const
{
return x*x + y*y + z*z + w*w;
diff --git a/GPU/Software/Clipper.cpp b/GPU/Software/Clipper.cpp
index eb181ce6131b..6d22d2e7ee03 100644
--- a/GPU/Software/Clipper.cpp
+++ b/GPU/Software/Clipper.cpp
@@ -17,6 +17,8 @@
#include
+#include "Core/System.h"
+
#include "GPU/GPUState.h"
#include "GPU/Software/Clipper.h"
@@ -24,6 +26,11 @@
#include "profiler/profiler.h"
+
+extern bool g_DarkStalkerStretch;
+// For Darkstalkers hack. Ugh.
+extern bool currentDialogActive;
+
namespace Clipper {
enum {
@@ -49,39 +56,36 @@ static inline int CalcClipMask(const ClipCoords& v)
return mask;
}
-#define AddInterpolatedVertex(t, out, in, numVertices) \
-{ \
- Vertices[numVertices]->Lerp(t, *Vertices[out], *Vertices[in]); \
- numVertices++; \
+inline bool different_signs(float x, float y) {
+ return ((x <= 0 && y > 0) || (x > 0 && y <= 0));
}
-#define DIFFERENT_SIGNS(x,y) ((x <= 0 && y > 0) || (x > 0 && y <= 0))
-
-#define CLIP_DOTPROD(I, A, B, C, D) \
- (Vertices[I]->clippos.x * A + Vertices[I]->clippos.y * B + Vertices[I]->clippos.z * C + Vertices[I]->clippos.w * D)
+inline float clip_dotprod(const VertexData &vert, float A, float B, float C, float D) {
+ return (vert.clippos.x * A + vert.clippos.y * B + vert.clippos.z * C + vert.clippos.w * D);
+}
#define POLY_CLIP( PLANE_BIT, A, B, C, D ) \
{ \
if (mask & PLANE_BIT) { \
int idxPrev = inlist[0]; \
- float dpPrev = CLIP_DOTPROD(idxPrev, A, B, C, D ); \
+ float dpPrev = clip_dotprod(*Vertices[idxPrev], A, B, C, D );\
int outcount = 0; \
\
inlist[n] = inlist[0]; \
for (int j = 1; j <= n; j++) { \
int idx = inlist[j]; \
- float dp = CLIP_DOTPROD(idx, A, B, C, D ); \
+ float dp = clip_dotprod(*Vertices[idx], A, B, C, D ); \
if (dpPrev >= 0) { \
outlist[outcount++] = idxPrev; \
} \
\
- if (DIFFERENT_SIGNS(dp, dpPrev)) { \
+ if (different_signs(dp, dpPrev)) { \
if (dp < 0) { \
float t = dp / (dp - dpPrev); \
- AddInterpolatedVertex(t, idx, idxPrev, numVertices); \
+ Vertices[numVertices++]->Lerp(t, *Vertices[idx], *Vertices[idxPrev]); \
} else { \
float t = dpPrev / (dpPrev - dp); \
- AddInterpolatedVertex(t, idxPrev, idx, numVertices); \
+ Vertices[numVertices++]->Lerp(t, *Vertices[idxPrev], *Vertices[idx]); \
} \
outlist[outcount++] = numVertices - 1; \
} \
@@ -104,25 +108,23 @@ static inline int CalcClipMask(const ClipCoords& v)
#define CLIP_LINE(PLANE_BIT, A, B, C, D) \
{ \
- if (mask & PLANE_BIT) { \
- float dp0 = CLIP_DOTPROD(0, A, B, C, D ); \
- float dp1 = CLIP_DOTPROD(1, A, B, C, D ); \
- int i = 0; \
+ if (mask & PLANE_BIT) { \
+ float dp0 = clip_dotprod(*Vertices[0], A, B, C, D ); \
+ float dp1 = clip_dotprod(*Vertices[1], A, B, C, D ); \
+ int numVertices = 0; \
\
if (mask0 & PLANE_BIT) { \
if (dp0 < 0) { \
float t = dp1 / (dp1 - dp0); \
- i = 0; \
- AddInterpolatedVertex(t, 1, 0, i); \
+ Vertices[0]->Lerp(t, *Vertices[1], *Vertices[0]); \
} \
} \
- dp0 = CLIP_DOTPROD(0, A, B, C, D ); \
+ dp0 = clip_dotprod(*Vertices[0], A, B, C, D ); \
\
if (mask1 & PLANE_BIT) { \
if (dp1 < 0) { \
float t = dp1 / (dp1- dp0); \
- i = 1; \
- AddInterpolatedVertex(t, 1, 0, i); \
+ Vertices[1]->Lerp(t, *Vertices[1], *Vertices[0]); \
} \
} \
} \
@@ -139,8 +141,11 @@ static void RotateUVThrough(const VertexData &tl, const VertexData &br, VertexDa
}
}
+bool needsClear = false;
+
void ProcessRect(const VertexData& v0, const VertexData& v1)
{
+ g_DarkStalkerStretch = false;
if (!gstate.isModeThrough()) {
VertexData buf[4];
buf[0].clippos = ClipCoords(v0.clippos.x, v0.clippos.y, v1.clippos.z, v1.clippos.w);
@@ -182,6 +187,44 @@ void ProcessRect(const VertexData& v0, const VertexData& v1)
ProcessTriangle(*topleft, *bottomleft, *bottomright, buf[3]);
} else {
// through mode handling
+
+ // Check for 1:1 texture mapping. In that case we can call DrawSprite.
+ int xdiff = v1.screenpos.x - v0.screenpos.x;
+ int ydiff = v1.screenpos.y - v0.screenpos.y;
+ int udiff = (v1.texturecoords.x - v0.texturecoords.x) * 16.0f;
+ int vdiff = (v1.texturecoords.y - v0.texturecoords.y) * 16.0f;
+ bool coord_check =
+ (xdiff == udiff || xdiff == -udiff) &&
+ (ydiff == vdiff || ydiff == -vdiff);
+ bool state_check = !gstate.isModeClear(); // TODO: Add support for clear modes in Rasterizer::DrawSprite.
+ if ((coord_check || !gstate.isTextureMapEnabled()) && state_check) {
+ Rasterizer::DrawSprite(v0, v1);
+ return;
+ }
+
+ // Eliminate the stretch blit in DarkStalkers.
+ // We compensate for that when blitting the framebuffer in SoftGpu.cpp.
+ if (PSP_CoreParameter().compat.flags().DarkStalkersPresentHack && v0.texturecoords.x == 64.0f && v0.texturecoords.y == 16.0f && v1.texturecoords.x == 448.0f && v1.texturecoords.y == 240.0f) {
+ if (v0.screenpos.x == 0x7100 && v0.screenpos.y == 0x7780 && v1.screenpos.x == 0x8f00 && v1.screenpos.y == 0x8880) {
+ // Also check for save/load dialog.
+ if (!currentDialogActive) {
+ g_DarkStalkerStretch = true;
+ if (needsClear) {
+ needsClear = false;
+ // Afterwards, we also need to clear the actual destination. Can do a fast rectfill.
+ gstate.textureMapEnable &= ~1;
+ VertexData newV0 = v0;
+ newV0.color0 = Vec4(0, 0, 0, 255);
+ Rasterizer::DrawSprite(newV0, v1);
+ gstate.textureMapEnable |= 1;
+ }
+ return;
+ } else {
+ needsClear = true;
+ }
+ } // else, handle the Capcom screen stretch, or the non-wide stretch? Or let's just not bother.
+ }
+
VertexData buf[4];
buf[0].screenpos = ScreenCoords(v0.screenpos.x, v0.screenpos.y, v1.screenpos.z);
buf[0].texturecoords = v0.texturecoords;
@@ -196,7 +239,7 @@ void ProcessRect(const VertexData& v0, const VertexData& v1)
// Color and depth values of second vertex are used for the whole rectangle
buf[0].color0 = buf[1].color0 = buf[2].color0 = buf[3].color0;
- buf[0].color1 = buf[1].color1 = buf[2].color1 = buf[3].color1;
+ buf[0].color1 = buf[1].color1 = buf[2].color1 = buf[3].color1; // is color1 ever used in through mode?
buf[0].clippos.w = buf[1].clippos.w = buf[2].clippos.w = buf[3].clippos.w = 1.0f;
buf[0].fogdepth = buf[1].fogdepth = buf[2].fogdepth = buf[3].fogdepth = 1.0f;
diff --git a/GPU/Software/Rasterizer.cpp b/GPU/Software/Rasterizer.cpp
index 5db77b9407ff..71560776b852 100644
--- a/GPU/Software/Rasterizer.cpp
+++ b/GPU/Software/Rasterizer.cpp
@@ -1287,6 +1287,200 @@ void DrawTriangleSlice(
}
}
+// Through mode, with the specific Darkstalker settings.
+inline void DrawSinglePixel5551(u16 *pixel, const Vec4 &color_in) {
+ u32 new_color;
+ if (color_in.a() == 255) {
+ new_color = color_in.ToRGBA() & 0xFFFFFF;
+ } else {
+ const u32 old_color = RGBA5551ToRGBA8888(*pixel);
+ const Vec4 dst = Vec4::FromRGBA(old_color);
+ Vec3 blended = AlphaBlendingResult(color_in, dst);
+ // ToRGB() always automatically clamps.
+ new_color = blended.ToRGB();
+ }
+
+ new_color |= (*pixel & 0x8000) ? 0xff000000 : 0x00000000;
+ *pixel = RGBA8888ToRGBA5551(new_color);
+}
+
+static inline Vec4 ModulateRGBA(const Vec4& prim_color, const Vec4& texcolor) {
+ Vec3 out_rgb;
+ int out_a;
+
+#if defined(_M_SSE)
+ // We can be accurate up to 24 bit integers, should be enough.
+ const __m128 p = _mm_cvtepi32_ps(prim_color.ivec);
+ const __m128 t = _mm_cvtepi32_ps(texcolor.ivec);
+ const __m128 b = _mm_mul_ps(p, t);
+ if (gstate.isColorDoublingEnabled()) {
+ // We double right here, only for modulate. Other tex funcs do not color double.
+ const __m128 doubleColor = _mm_setr_ps(2.0f / 255.0f, 2.0f / 255.0f, 2.0f / 255.0f, 1.0f / 255.0f);
+ out_rgb.ivec = _mm_cvtps_epi32(_mm_mul_ps(b, doubleColor));
+ } else {
+ out_rgb.ivec = _mm_cvtps_epi32(_mm_mul_ps(b, _mm_set_ps1(1.0f / 255.0f)));
+ }
+ return Vec4(out_rgb.ivec);
+#else
+ if (gstate.isColorDoublingEnabled()) {
+ out_rgb = (prim_color.rgb() * texcolor.rgb() * 2) / 255;
+ } else {
+ out_rgb = prim_color.rgb() * texcolor.rgb() / 255;
+ }
+ out_a = (prim_color.a() * texcolor.a() / 255);
+#endif
+
+ return Vec4(out_rgb.r(), out_rgb.g(), out_rgb.b(), out_a);
+
+}
+
+void DrawSprite(const VertexData& v0, const VertexData& v1) {
+ const u8 *texptr = nullptr;
+
+ GETextureFormat texfmt = gstate.getTextureFormat();
+ u32 texaddr = gstate.getTextureAddress(0);
+ int texbufw = GetTextureBufw(0, texaddr, texfmt);
+ if (Memory::IsValidAddress(texaddr))
+ texptr = Memory::GetPointerUnchecked(texaddr);
+
+ ScreenCoords pprime(v0.screenpos.x, v0.screenpos.y, 0);
+ Sampler::NearestFunc nearestFunc = Sampler::GetNearestFunc(); // Looks at gstate.
+
+ DrawingCoords pos0 = TransformUnit::ScreenToDrawing(v0.screenpos);
+ DrawingCoords pos1 = TransformUnit::ScreenToDrawing(v1.screenpos);
+
+ DrawingCoords scissorTL(gstate.getScissorX1(), gstate.getScissorY1(), 0);
+ DrawingCoords scissorBR(gstate.getScissorX2(), gstate.getScissorY2(), 0);
+
+ int z = pos0.z;
+ float fog = 1.0f;
+
+ bool isWhite = v0.color0 == Vec4(255, 255, 255, 255);
+
+ if (gstate.isTextureMapEnabled()) {
+ // 1:1 (but with mirror support) texture mapping!
+ int s_start = v0.texturecoords.x;
+ int t_start = v0.texturecoords.y;
+ int ds = v1.texturecoords.x > v0.texturecoords.x ? 1 : -1;
+ int dt = v1.texturecoords.y > v0.texturecoords.y ? 1 : -1;
+
+ if (ds < 0) {
+ s_start += ds;
+ }
+ if (dt < 0) {
+ t_start += dt;
+ }
+
+ // First clip the right and bottom sides, since we don't need to adjust the deltas.
+ if (pos1.x > scissorBR.x) pos1.x = scissorBR.x + 1;
+ if (pos1.y > scissorBR.y) pos1.y = scissorBR.y + 1;
+ // Now clip the other sides.
+ if (pos0.x < scissorTL.x) {
+ s_start += (scissorTL.x - pos0.x) * ds;
+ pos0.x = scissorTL.x;
+ }
+ if (pos0.y < scissorTL.y) {
+ t_start += (scissorTL.y - pos0.y) * dt;
+ pos0.y = scissorTL.y;
+ }
+
+ if (!gstate.isStencilTestEnabled() &&
+ !gstate.isDepthTestEnabled() &&
+ !gstate.isLogicOpEnabled() &&
+ !gstate.isColorTestEnabled() &&
+ !gstate.isDitherEnabled() &&
+ gstate.isAlphaTestEnabled() &&
+ gstate.getAlphaTestRef() == 0 &&
+ gstate.getAlphaTestMask() == 0xFF &&
+ gstate.isAlphaBlendEnabled() &&
+ gstate.isTextureAlphaUsed() &&
+ gstate.getTextureFunction() == GE_TEXFUNC_MODULATE &&
+ gstate.getColorMask() == 0x000000 &&
+ gstate.FrameBufFormat() == GE_FORMAT_5551) {
+ int t = t_start;
+ for (int y = pos0.y; y < pos1.y; y++) {
+ int s = s_start;
+ u16 *pixel = fb.Get16Ptr(pos0.x, y, gstate.FrameBufStride());
+ if (isWhite) {
+ for (int x = pos0.x; x < pos1.x; x++) {
+ u32 tex_color = nearestFunc(s, t, texptr, texbufw, 0);
+ if (tex_color & 0xFF000000) {
+ DrawSinglePixel5551(pixel, Vec4::FromRGBA(tex_color));
+ }
+ s += ds;
+ pixel++;
+ }
+ } else {
+ for (int x = pos0.x; x < pos1.x; x++) {
+ Vec4 prim_color = v0.color0;
+ Vec4 tex_color = Vec4::FromRGBA(nearestFunc(s, t, texptr, texbufw, 0));
+ prim_color = ModulateRGBA(prim_color, tex_color);
+ if (prim_color.a() > 0) {
+ DrawSinglePixel5551(pixel, prim_color);
+ }
+ s += ds;
+ pixel++;
+ }
+ }
+ t += dt;
+ }
+ } else {
+ int t = t_start;
+ for (int y = pos0.y; y < pos1.y; y++) {
+ int s = s_start;
+ // Not really that fast but faster than triangle.
+ for (int x = pos0.x; x < pos1.x; x++) {
+ Vec4 prim_color = v0.color0;
+ Vec4 tex_color = Vec4::FromRGBA(nearestFunc(s, t, texptr, texbufw, 0));
+ prim_color = GetTextureFunctionOutput(prim_color, tex_color);
+ DrawingCoords pos(x, y, z);
+ DrawSinglePixel(pos, (u16)z, 1.0f, prim_color);
+ s += ds;
+ }
+ t += dt;
+ }
+ }
+ } else {
+ if (pos1.x > scissorBR.x) pos1.x = scissorBR.x;
+ if (pos1.y > scissorBR.y) pos1.y = scissorBR.y;
+ if (pos0.x < scissorTL.x) pos0.x = scissorTL.x;
+ if (pos0.y < scissorTL.y) pos0.y = scissorTL.y;
+ if (!gstate.isStencilTestEnabled() &&
+ !gstate.isDepthTestEnabled() &&
+ !gstate.isLogicOpEnabled() &&
+ !gstate.isColorTestEnabled() &&
+ !gstate.isDitherEnabled() &&
+ gstate.isAlphaTestEnabled() &&
+ gstate.getAlphaTestRef() == 0 &&
+ gstate.getAlphaTestMask() == 0xFF &&
+ gstate.isAlphaBlendEnabled() &&
+ gstate.isTextureAlphaUsed() &&
+ gstate.getTextureFunction() == GE_TEXFUNC_MODULATE &&
+ gstate.getColorMask() == 0x000000 &&
+ gstate.FrameBufFormat() == GE_FORMAT_5551) {
+ if (v0.color0.a() == 0)
+ return;
+
+ for (int y = pos0.y; y < pos1.y; y++) {
+ u16 *pixel = fb.Get16Ptr(pos0.x, y, gstate.FrameBufStride());
+ for (int x = pos0.x; x < pos1.x; x++) {
+ Vec4 prim_color = v0.color0;
+ DrawSinglePixel5551(pixel, prim_color);
+ pixel++;
+ }
+ }
+ } else {
+ for (int y = pos0.y; y < pos1.y; y++) {
+ for (int x = pos0.x; x < pos1.x; x++) {
+ Vec4 prim_color = v0.color0;
+ DrawingCoords pos(x, y, z);
+ DrawSinglePixel(pos, (u16)z, fog, prim_color);
+ }
+ }
+ }
+ }
+}
+
// Draws triangle, vertices specified in counter-clockwise direction
void DrawTriangle(const VertexData& v0, const VertexData& v1, const VertexData& v2)
{
diff --git a/GPU/Software/Rasterizer.h b/GPU/Software/Rasterizer.h
index 53d44e8af0a3..df3075e3a067 100644
--- a/GPU/Software/Rasterizer.h
+++ b/GPU/Software/Rasterizer.h
@@ -27,6 +27,7 @@ namespace Rasterizer {
void DrawTriangle(const VertexData& v0, const VertexData& v1, const VertexData& v2);
void DrawPoint(const VertexData &v0);
void DrawLine(const VertexData &v0, const VertexData &v1);
+void DrawSprite(const VertexData &v0, const VertexData &v1);
void ClearRectangle(const VertexData &v0, const VertexData &v1);
bool GetCurrentStencilbuffer(GPUDebugBuffer &buffer);
diff --git a/GPU/Software/SoftGpu.cpp b/GPU/Software/SoftGpu.cpp
index 76f4a8d7b945..7254c2ce11c1 100644
--- a/GPU/Software/SoftGpu.cpp
+++ b/GPU/Software/SoftGpu.cpp
@@ -73,8 +73,6 @@ SoftGPU::SoftGPU(GraphicsContext *gfxCtx, Draw::DrawContext *draw)
},
};
- ShaderModule *vshader = draw_->GetVshaderPreset(VS_TEXTURE_COLOR_2D);
-
vdata = draw_->CreateBuffer(sizeof(Vertex) * 4, BufferUsageFlag::DYNAMIC | BufferUsageFlag::VERTEXDATA);
idata = draw_->CreateBuffer(sizeof(int) * 6, BufferUsageFlag::DYNAMIC | BufferUsageFlag::INDEXDATA);
@@ -92,6 +90,14 @@ SoftGPU::SoftGPU(GraphicsContext *gfxCtx, Draw::DrawContext *draw)
inputLayout, depth, blendstateOff, rasterNoCull, &vsTexColBufDesc
};
texColor = draw_->CreateGraphicsPipeline(pipelineDesc);
+
+ PipelineDesc pipelineDescRBSwizzle{
+ Primitive::TRIANGLE_LIST,
+ { draw_->GetVshaderPreset(VS_TEXTURE_COLOR_2D), draw_->GetFshaderPreset(FS_TEXTURE_COLOR_2D_RB_SWIZZLE) },
+ inputLayout, depth, blendstateOff, rasterNoCull, &vsTexColBufDesc
+ };
+ texColorRBSwizzle = draw_->CreateGraphicsPipeline(pipelineDescRBSwizzle);
+
inputLayout->Release();
depth->Release();
blendstateOff->Release();
@@ -122,6 +128,8 @@ void SoftGPU::DeviceRestore() {
SoftGPU::~SoftGPU() {
texColor->Release();
texColor = nullptr;
+ texColorRBSwizzle->Release();
+ texColorRBSwizzle = nullptr;
if (fbTex) {
fbTex->Release();
@@ -148,12 +156,16 @@ void SoftGPU::SetDisplayFramebuffer(u32 framebuf, u32 stride, GEBufferFormat for
GPURecord::NotifyDisplay(framebuf, stride, format);
}
+bool g_DarkStalkerStretch;
+
// Copies RGBA8 data from RAM to the currently bound render target.
void SoftGPU::CopyToCurrentFboFromDisplayRam(int srcwidth, int srcheight) {
if (!draw_)
return;
float u0 = 0.0f;
float u1;
+ float v0 = 1.0f;
+ float v1 = 0.0f;
if (fbTex) {
fbTex->Release();
@@ -163,6 +175,9 @@ void SoftGPU::CopyToCurrentFboFromDisplayRam(int srcwidth, int srcheight) {
// For accuracy, try to handle 0 stride - sometimes used.
if (displayStride_ == 0) {
srcheight = 1;
+ u1 = 1.0f;
+ } else {
+ u1 = (float)srcwidth / displayStride_;
}
Draw::TextureDesc desc{};
@@ -172,7 +187,26 @@ void SoftGPU::CopyToCurrentFboFromDisplayRam(int srcwidth, int srcheight) {
desc.mipLevels = 1;
desc.tag = "SoftGPU";
bool hasImage = true;
- if (!Memory::IsValidAddress(displayFramebuf_) || srcwidth == 0 || srcheight == 0) {
+
+ Draw::Pipeline *pipeline = texColor;
+ if (PSP_CoreParameter().compat.flags().DarkStalkersPresentHack && displayFormat_ == GE_FORMAT_5551 && g_DarkStalkerStretch) {
+ u8 *data = Memory::GetPointer(0x04088000);
+ if (draw_->GetDataFormatSupport(Draw::DataFormat::A1B5G5R5_UNORM_PACK16) & Draw::FMT_TEXTURE) {
+ // The perfect one.
+ desc.format = Draw::DataFormat::A1B5G5R5_UNORM_PACK16;
+ } else if (draw_->GetDataFormatSupport(Draw::DataFormat::A1R5G5B5_UNORM_PACK16) & Draw::FMT_TEXTURE) {
+ // RB swapped, compensate with a shader.
+ desc.format = Draw::DataFormat::A1R5G5B5_UNORM_PACK16;
+ pipeline = texColorRBSwizzle;
+ }
+ desc.width = displayStride_ == 0 ? srcwidth : displayStride_;
+ desc.height = srcheight;
+ desc.initData.push_back(data);
+ u0 = 64.5f / 512.0f;
+ u1 = 447.5f / 512.0f;
+ v1 = 16.0f / 272.0f;
+ v0 = 240.0f / 272.0f;
+ } else if (!Memory::IsValidAddress(displayFramebuf_) || srcwidth == 0 || srcheight == 0) {
hasImage = false;
u1 = 1.0f;
} else if (displayFormat_ == GE_FORMAT_8888) {
@@ -181,11 +215,20 @@ void SoftGPU::CopyToCurrentFboFromDisplayRam(int srcwidth, int srcheight) {
desc.height = srcheight;
desc.initData.push_back(data);
desc.format = Draw::DataFormat::R8G8B8A8_UNORM;
- if (displayStride_ != 0) {
- u1 = (float)srcwidth / displayStride_;
- } else {
- u1 = 1.0f;
+ } else if (displayFormat_ == GE_FORMAT_5551) {
+ u8 *data = Memory::GetPointer(displayFramebuf_);
+ desc.format = Draw::DataFormat::A1R5G5B5_UNORM_PACK16;
+ if (draw_->GetDataFormatSupport(Draw::DataFormat::A1B5G5R5_UNORM_PACK16) & Draw::FMT_TEXTURE) {
+ // The perfect one.
+ desc.format = Draw::DataFormat::A1B5G5R5_UNORM_PACK16;
+ } else if (draw_->GetDataFormatSupport(Draw::DataFormat::A1R5G5B5_UNORM_PACK16) & Draw::FMT_TEXTURE) {
+ // RB swapped, compensate with a shader.
+ desc.format = Draw::DataFormat::A1R5G5B5_UNORM_PACK16;
+ pipeline = texColorRBSwizzle;
}
+ desc.width = displayStride_ == 0 ? srcwidth : displayStride_;
+ desc.height = srcheight;
+ desc.initData.push_back(data);
} else {
// TODO: This should probably be converted in a shader instead..
fbTexBuffer.resize(srcwidth * srcheight);
@@ -247,12 +290,10 @@ void SoftGPU::CopyToCurrentFboFromDisplayRam(int srcwidth, int srcheight) {
x2 -= 1.0f;
y2 -= 1.0f;
- float v0 = 1.0f;
- float v1 = 0.0f;
-
if (GetGPUBackend() == GPUBackend::VULKAN) {
std::swap(v0, v1);
}
+
draw_->BindFramebufferAsRenderTarget(nullptr, { Draw::RPAction::CLEAR, Draw::RPAction::DONT_CARE, Draw::RPAction::DONT_CARE });
Draw::Viewport viewport = { 0.0f, 0.0f, dstwidth, dstheight, 0.0f, 1.0f };
draw_->SetViewports(1, &viewport);
@@ -288,7 +329,7 @@ void SoftGPU::CopyToCurrentFboFromDisplayRam(int srcwidth, int srcheight) {
Draw::VsTexColUB ub{};
memcpy(ub.WorldViewProj, g_display_rot_matrix.m, sizeof(float) * 16);
- draw_->BindPipeline(texColor);
+ draw_->BindPipeline(pipeline);
draw_->UpdateDynamicUniformBuffer(&ub, sizeof(ub));
draw_->BindVertexBuffers(0, 1, &vdata, nullptr);
draw_->BindIndexBuffer(idata, 0);
diff --git a/GPU/Software/SoftGpu.h b/GPU/Software/SoftGpu.h
index f92344742e99..02660b557665 100644
--- a/GPU/Software/SoftGpu.h
+++ b/GPU/Software/SoftGpu.h
@@ -44,6 +44,10 @@ struct FormatBuffer {
inline u32 Get32(int x, int y, int stride) {
return as32[x + y * stride];
}
+
+ inline u16 *Get16Ptr(int x, int y, int stride) {
+ return &as16[x + y * stride];
+ }
};
class SoftwareDrawEngine;
@@ -108,6 +112,7 @@ class SoftGPU : public GPUCommon {
Draw::Texture *fbTex;
Draw::Pipeline *texColor;
+ Draw::Pipeline *texColorRBSwizzle;
std::vector fbTexBuffer;
Draw::SamplerState *samplerNearest = nullptr;
diff --git a/GPU/Software/TransformUnit.cpp b/GPU/Software/TransformUnit.cpp
index 3b0603fd79b8..82a84976845a 100644
--- a/GPU/Software/TransformUnit.cpp
+++ b/GPU/Software/TransformUnit.cpp
@@ -319,8 +319,7 @@ void TransformUnit::SubmitPrimitive(void* vertices, void* indices, GEPrimitiveTy
VertexReader vreader(buf, vtxfmt, vertex_type);
- const int max_vtcs_per_prim = 3;
- static VertexData data[max_vtcs_per_prim];
+ static VertexData data[4]; // Normally max verts per prim is 3, but we temporarily need 4 to detect rectangles from strips.
// This is the index of the next vert in data (or higher, may need modulus.)
static int data_index = 0;
@@ -439,6 +438,62 @@ void TransformUnit::SubmitPrimitive(void* vertices, void* indices, GEPrimitiveTy
// Don't draw a triangle when loading the first two vertices.
int skip_count = data_index >= 2 ? 0 : 2 - data_index;
+ // If index count == 4, check if we can convert to a rectangle.
+ // This is for Darkstalkers (and should speed up many 2D games).
+ if (vertex_count == 4 && gstate.isModeThrough()) {
+ for (int vtx = 0; vtx < 4; ++vtx) {
+ if (indices) {
+ vreader.Goto(ConvertIndex(vtx) - index_lower_bound);
+ }
+ else {
+ vreader.Goto(vtx);
+ }
+ data[vtx] = ReadVertex(vreader);
+ }
+
+ // OK, now let's look at data to detect rectangles. There are a few possibilities
+ // but we focus on Darkstalkers for now.
+ if (data[0].screenpos.x == data[1].screenpos.x &&
+ data[0].screenpos.y == data[2].screenpos.y &&
+ data[2].screenpos.x == data[3].screenpos.x &&
+ data[1].screenpos.y == data[3].screenpos.y &&
+ data[1].screenpos.y > data[0].screenpos.y && // Avoid rotation handling
+ data[2].screenpos.x > data[0].screenpos.x &&
+ data[0].texturecoords.x == data[1].texturecoords.x &&
+ data[0].texturecoords.y == data[2].texturecoords.y &&
+ data[2].texturecoords.x == data[3].texturecoords.x &&
+ data[1].texturecoords.y == data[3].texturecoords.y &&
+ data[1].texturecoords.y > data[0].texturecoords.y &&
+ data[2].texturecoords.x > data[0].texturecoords.x &&
+ data[0].color0 == data[1].color0 &&
+ data[1].color0 == data[2].color0 &&
+ data[2].color0 == data[3].color0) {
+ // It's a rectangle!
+ Clipper::ProcessRect(data[0], data[3]);
+ break;
+ }
+ // There's the other vertex order too...
+ if (data[0].screenpos.x == data[2].screenpos.x &&
+ data[0].screenpos.y == data[1].screenpos.y &&
+ data[1].screenpos.x == data[3].screenpos.x &&
+ data[2].screenpos.y == data[3].screenpos.y &&
+ data[2].screenpos.y > data[0].screenpos.y && // Avoid rotation handling
+ data[1].screenpos.x > data[0].screenpos.x &&
+ data[0].texturecoords.x == data[2].texturecoords.x &&
+ data[0].texturecoords.y == data[1].texturecoords.y &&
+ data[1].texturecoords.x == data[3].texturecoords.x &&
+ data[2].texturecoords.y == data[3].texturecoords.y &&
+ data[2].texturecoords.y > data[0].texturecoords.y &&
+ data[1].texturecoords.x > data[0].texturecoords.x &&
+ data[0].color0 == data[1].color0 &&
+ data[1].color0 == data[2].color0 &&
+ data[2].color0 == data[3].color0) {
+ // It's a rectangle!
+ Clipper::ProcessRect(data[0], data[3]);
+ break;
+ }
+ }
+
for (int vtx = 0; vtx < vertex_count; ++vtx) {
if (indices) {
vreader.Goto(ConvertIndex(vtx) - index_lower_bound);
diff --git a/UI/EmuScreen.cpp b/UI/EmuScreen.cpp
index 4a7a8a466c62..6c062d6a529d 100644
--- a/UI/EmuScreen.cpp
+++ b/UI/EmuScreen.cpp
@@ -232,9 +232,6 @@ void EmuScreen::bootGame(const std::string &filename) {
break;
#endif
}
- if (g_Config.bSoftwareRendering) {
- coreParam.gpuCore = GPUCORE_SOFTWARE;
- }
// Preserve the existing graphics context.
coreParam.graphicsContext = PSP_CoreParameter().graphicsContext;
diff --git a/Windows/GPU/WindowsVulkanContext.cpp b/Windows/GPU/WindowsVulkanContext.cpp
index 09f9f16a0856..7797b58289a5 100644
--- a/Windows/GPU/WindowsVulkanContext.cpp
+++ b/Windows/GPU/WindowsVulkanContext.cpp
@@ -116,6 +116,7 @@ bool WindowsVulkanContext::Init(HINSTANCE hInst, HWND hWnd, std::string *error_m
if (!g_Config.sVulkanDevice.empty())
g_Config.sVulkanDevice = g_Vulkan->GetPhysicalDeviceProperties(deviceNum).properties.deviceName;
}
+
g_Vulkan->ChooseDevice(deviceNum);
if (g_Vulkan->CreateDevice() != VK_SUCCESS) {
*error_message = g_Vulkan->InitError();
diff --git a/assets/compat.ini b/assets/compat.ini
index 662e3b0e2562..c67f0b8360c6 100644
--- a/assets/compat.ini
+++ b/assets/compat.ini
@@ -683,3 +683,13 @@ UCET00844 = true
UCUS98705 = true
UCED00971 = true
UCUS98713 = true
+
+[ForceSoftwareRenderer]
+# Darkstalkers
+ULES00016 = true
+ULUS10005 = true
+
+[DarkStalkersPresentHack]
+# Darkstalkers
+ULES00016 = true
+ULUS10005 = true
diff --git a/ext/native/base/display.cpp b/ext/native/base/display.cpp
index 3e912788db7f..c6736395519d 100644
--- a/ext/native/base/display.cpp
+++ b/ext/native/base/display.cpp
@@ -17,7 +17,7 @@ float pixel_in_dps_y = 1.0f;
float display_hz = 60.0f;
DisplayRotation g_display_rotation;
-Lin::Matrix4x4 g_display_rot_matrix;
+Lin::Matrix4x4 g_display_rot_matrix = Lin::Matrix4x4::identity();
template
void RotateRectToDisplayImpl(DisplayRect &rect, T curRTWidth, T curRTHeight) {
diff --git a/ext/native/math/lin/matrix4x4.h b/ext/native/math/lin/matrix4x4.h
index c9464384ad58..a9e312de7009 100644
--- a/ext/native/math/lin/matrix4x4.h
+++ b/ext/native/math/lin/matrix4x4.h
@@ -57,7 +57,11 @@ class Matrix4x4 {
empty();
xx=yy=zz=f; ww=1.0f;
}
-
+ static Matrix4x4 identity() {
+ Matrix4x4 id;
+ id.setIdentity();
+ return id;
+ }
void setIdentity() {
setScaling(1.0f);
}
diff --git a/ext/native/thin3d/DataFormat.h b/ext/native/thin3d/DataFormat.h
index 467f9e4a0048..f1e168763019 100644
--- a/ext/native/thin3d/DataFormat.h
+++ b/ext/native/thin3d/DataFormat.h
@@ -30,6 +30,7 @@ enum class DataFormat : uint8_t {
R5G5B5A1_UNORM_PACK16, // A1 in the LOWER bit
B5G5R5A1_UNORM_PACK16, // A1 in the LOWER bit
A1R5G5B5_UNORM_PACK16, // A1 in the UPPER bit.
+ A1B5G5R5_UNORM_PACK16, // A1 in the UPPER bit. OpenGL-only.
R16_FLOAT,
R16G16_FLOAT,
diff --git a/ext/native/thin3d/thin3d.cpp b/ext/native/thin3d/thin3d.cpp
index d142638e07fc..340157708dc6 100644
--- a/ext/native/thin3d/thin3d.cpp
+++ b/ext/native/thin3d/thin3d.cpp
@@ -146,6 +146,50 @@ static const std::vector fsTexCol = {
}
};
+static const std::vector fsTexColRBSwizzle = {
+ {ShaderLanguage::GLSL_ES_200,
+ "#ifdef GL_ES\n"
+ "precision lowp float;\n"
+ "#endif\n"
+ "#if __VERSION__ >= 130\n"
+ "#define varying in\n"
+ "#define texture2D texture\n"
+ "#define gl_FragColor fragColor0\n"
+ "out vec4 fragColor0;\n"
+ "#endif\n"
+ "varying vec4 oColor0;\n"
+ "varying vec2 oTexCoord0;\n"
+ "uniform sampler2D Sampler0;\n"
+ "void main() { gl_FragColor = texture2D(Sampler0, oTexCoord0).zyxw * oColor0; }\n"
+ },
+ {ShaderLanguage::HLSL_D3D9,
+ "struct PS_INPUT { float4 color : COLOR0; float2 uv : TEXCOORD0; };\n"
+ "sampler2D Sampler0 : register(s0);\n"
+ "float4 main(PS_INPUT input) : COLOR0 {\n"
+ " return input.color * tex2D(Sampler0, input.uv).zyxw;\n"
+ "}\n"
+ },
+ {ShaderLanguage::HLSL_D3D11,
+ "struct PS_INPUT { float4 color : COLOR0; float2 uv : TEXCOORD0; };\n"
+ "SamplerState samp : register(s0);\n"
+ "Texture2D tex : register(t0);\n"
+ "float4 main(PS_INPUT input) : SV_Target {\n"
+ " float4 col = input.color * tex.Sample(samp, input.uv).bgra;\n"
+ " return col;\n"
+ "}\n"
+ },
+ {ShaderLanguage::GLSL_VULKAN,
+ "#version 140\n"
+ "#extension GL_ARB_separate_shader_objects : enable\n"
+ "#extension GL_ARB_shading_language_420pack : enable\n"
+ "layout(location = 0) in vec4 oColor0;\n"
+ "layout(location = 1) in vec2 oTexCoord0;\n"
+ "layout(location = 0) out vec4 fragColor0\n;"
+ "layout(set = 0, binding = 1) uniform sampler2D Sampler0;\n"
+ "void main() { fragColor0 = texture(Sampler0, oTexCoord0).bgra * oColor0; }\n"
+ }
+};
+
static const std::vector fsCol = {
{ ShaderLanguage::GLSL_ES_200,
"#ifdef GL_ES\n"
@@ -330,8 +374,9 @@ bool DrawContext::CreatePresets() {
fsPresets_[FS_TEXTURE_COLOR_2D] = CreateShader(this, ShaderStage::FRAGMENT, fsTexCol);
fsPresets_[FS_COLOR_2D] = CreateShader(this, ShaderStage::FRAGMENT, fsCol);
+ fsPresets_[FS_TEXTURE_COLOR_2D_RB_SWIZZLE] = CreateShader(this, ShaderStage::FRAGMENT, fsTexColRBSwizzle);
- return vsPresets_[VS_TEXTURE_COLOR_2D] && vsPresets_[VS_COLOR_2D] && fsPresets_[FS_TEXTURE_COLOR_2D] && fsPresets_[FS_COLOR_2D];
+ return vsPresets_[VS_TEXTURE_COLOR_2D] && vsPresets_[VS_COLOR_2D] && fsPresets_[FS_TEXTURE_COLOR_2D] && fsPresets_[FS_COLOR_2D] && fsPresets_[FS_TEXTURE_COLOR_2D_RB_SWIZZLE];
}
void DrawContext::DestroyPresets() {
diff --git a/ext/native/thin3d/thin3d.h b/ext/native/thin3d/thin3d.h
index 7488e5ef4710..146ea138c05c 100644
--- a/ext/native/thin3d/thin3d.h
+++ b/ext/native/thin3d/thin3d.h
@@ -146,6 +146,7 @@ enum VertexShaderPreset : int {
enum FragmentShaderPreset : int {
FS_COLOR_2D,
FS_TEXTURE_COLOR_2D,
+ FS_TEXTURE_COLOR_2D_RB_SWIZZLE,
FS_MAX_PRESET,
};
diff --git a/ext/native/thin3d/thin3d_d3d9.cpp b/ext/native/thin3d/thin3d_d3d9.cpp
index cdd784b5f1c2..e9c2a8d17123 100644
--- a/ext/native/thin3d/thin3d_d3d9.cpp
+++ b/ext/native/thin3d/thin3d_d3d9.cpp
@@ -348,6 +348,10 @@ bool D3D9Texture::Create(const TextureDesc &desc) {
format_ = desc.format;
tex_ = NULL;
d3dfmt_ = FormatToD3DFMT(desc.format);
+
+ if (d3dfmt_ == D3DFMT_UNKNOWN) {
+ return false;
+ }
HRESULT hr = E_FAIL;
D3DPOOL pool = D3DPOOL_MANAGED;
@@ -424,6 +428,7 @@ void D3D9Texture::SetImageData(int x, int y, int z, int width, int height, int d
}
break;
case DataFormat::A4R4G4B4_UNORM_PACK16:
+ case DataFormat::A1R5G5B5_UNORM_PACK16:
// Native
memcpy(dest, source, width * sizeof(uint16_t));
break;
@@ -437,6 +442,10 @@ void D3D9Texture::SetImageData(int x, int y, int z, int width, int height, int d
case DataFormat::B8G8R8A8_UNORM:
memcpy(dest, source, sizeof(uint32_t) * width);
break;
+ default:
+ // Unhandled data format copy.
+ DebugBreak();
+ break;
}
}
tex_->UnlockRect(level);
diff --git a/ext/native/thin3d/thin3d_gl.cpp b/ext/native/thin3d/thin3d_gl.cpp
index d31e63f892a0..9b164ec2abb0 100644
--- a/ext/native/thin3d/thin3d_gl.cpp
+++ b/ext/native/thin3d/thin3d_gl.cpp
@@ -277,7 +277,7 @@ bool OpenGLShaderModule::Compile(GLRenderManager *render, ShaderLanguage languag
class OpenGLInputLayout : public InputLayout {
public:
- OpenGLInputLayout(GLRenderManager *render) : render_(render), stride(0) {}
+ OpenGLInputLayout(GLRenderManager *render) : render_(render) {}
~OpenGLInputLayout();
void Compile(const InputLayoutDesc &desc);
@@ -286,7 +286,7 @@ class OpenGLInputLayout : public InputLayout {
}
GLRInputLayout *inputLayout_ = nullptr;
- int stride;
+ int stride = 0;
private:
GLRenderManager *render_;
};
@@ -718,6 +718,15 @@ class OpenGLFramebuffer : public Framebuffer {
FBColorDepth colorDepth = FBO_8888;
};
+// TODO: SSE/NEON optimize, and move to ColorConv.cpp.
+void MoveABit(u16 *dest, const u16 *src, size_t count) {
+ for (int i = 0; i < count; i++) {
+ u16 data = src[i];
+ data = (data >> 15) | (data << 1);
+ dest[i] = data;
+ }
+}
+
void OpenGLTexture::SetImageData(int x, int y, int z, int width, int height, int depth, int level, int stride, const uint8_t *data) {
if (width != width_ || height != height_ || depth != depth_) {
// When switching to texStorage we need to handle this correctly.
@@ -729,12 +738,20 @@ void OpenGLTexture::SetImageData(int x, int y, int z, int width, int height, int
if (stride == 0)
stride = width;
-
size_t alignment = DataFormatSizeInBytes(format_);
// Make a copy of data with stride eliminated.
uint8_t *texData = new uint8_t[(size_t)(width * height * alignment)];
- for (int y = 0; y < height; y++) {
- memcpy(texData + y * width * alignment, data + y * stride * alignment, width * alignment);
+
+ // Emulate support for DataFormat::A1R5G5B5_UNORM_PACK16.
+ if (format_ == DataFormat::A1R5G5B5_UNORM_PACK16) {
+ format_ = DataFormat::R5G5B5A1_UNORM_PACK16;
+ for (int y = 0; y < height; y++) {
+ MoveABit((u16 *)(texData + y * width * alignment), (const u16 *)(data + y * stride * alignment), width);
+ }
+ } else {
+ for (int y = 0; y < height; y++) {
+ memcpy(texData + y * width * alignment, data + y * stride * alignment, width * alignment);
+ }
}
render_->TextureImage(tex_, level, width, height, format_, texData);
}
@@ -1220,6 +1237,9 @@ uint32_t OpenGLContext::GetDataFormatSupport(DataFormat fmt) const {
case DataFormat::R8G8B8A8_UNORM:
return FMT_RENDERTARGET | FMT_TEXTURE | FMT_INPUTLAYOUT | FMT_AUTOGEN_MIPS;
+ case DataFormat::A1R5G5B5_UNORM_PACK16:
+ return FMT_TEXTURE; // we will emulate this! Very fast to convert from R5G5B5A1_UNORM_PACK16 during upload.
+
case DataFormat::R32_FLOAT:
case DataFormat::R32G32_FLOAT:
case DataFormat::R32G32B32_FLOAT:
diff --git a/ext/native/thin3d/thin3d_vulkan.cpp b/ext/native/thin3d/thin3d_vulkan.cpp
index 3de9e59db4dd..3222abf2c04e 100644
--- a/ext/native/thin3d/thin3d_vulkan.cpp
+++ b/ext/native/thin3d/thin3d_vulkan.cpp
@@ -699,6 +699,7 @@ bool VKTexture::Create(VkCommandBuffer cmd, VulkanPushBuffer *push, const Textur
// Gonna have to generate some, which requires TRANSFER_SRC
usageBits |= VK_IMAGE_USAGE_TRANSFER_SRC_BIT;
}
+
if (!vkTex_->CreateDirect(cmd, alloc, width_, height_, mipLevels_, vulkanFormat, VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL, usageBits)) {
ELOG("Failed to create VulkanTexture: %dx%dx%d fmt %d, %d levels", width_, height_, depth_, (int)vulkanFormat, mipLevels_);
return false;
@@ -1358,6 +1359,8 @@ uint32_t VKContext::GetDataFormatSupport(DataFormat fmt) const {
return 0;
case DataFormat::A4R4G4B4_UNORM_PACK16:
return 0;
+ case DataFormat::A1R5G5B5_UNORM_PACK16:
+ return FMT_RENDERTARGET | FMT_TEXTURE;
case DataFormat::R8G8B8A8_UNORM:
return FMT_RENDERTARGET | FMT_TEXTURE | FMT_INPUTLAYOUT;