Skip to content

Commit

Permalink
Merge pull request #13421 from hrydgard/darkstalkers-opt
Browse files Browse the repository at this point in the history
Darkstalkers minor optimizations
  • Loading branch information
unknownbrackets committed Sep 12, 2020
2 parents 9753e31 + 7f27f56 commit a3c17ee
Show file tree
Hide file tree
Showing 3 changed files with 27 additions and 27 deletions.
25 changes: 9 additions & 16 deletions Common/ColorConv.h
Expand Up @@ -66,25 +66,18 @@ inline u32 RGB565ToRGBA8888(u16 src) {
}

inline u16 RGBA8888ToRGB565(u32 value) {
u8 r = value & 0xFF;
u8 g = (value >> 8) & 0xFF;
u8 b = (value >> 16) & 0xFF;
r >>= 3;
g >>= 2;
b >>= 3;
return (u16)r | ((u16)g << 5) | ((u16)b << 11);
u32 r = (value >> 3) & 0x1F;
u32 g = (value >> 5) & (0x3F << 5);
u32 b = (value >> 8) & (0x1F << 11);
return (u16)(r | g | b);
}

inline u16 RGBA8888ToRGBA5551(u32 value) {
u8 r = value & 0xFF;
u8 g = (value >> 8) & 0xFF;
u8 b = (value >> 16) & 0xFF;
u8 a = (value >> 24) & 0xFF;
r >>= 3;
g >>= 3;
b >>= 3;
a >>= 7;
return (u16)r | ((u16)g << 5) | ((u16)b << 10) | ((u16)a << 15);
u32 r = (value >> 3) & 0x1F;
u32 g = (value >> 6) & (0x1F << 5);
u32 b = (value >> 9) & (0x1F << 10);
u32 a = (value >> 16) & 0x8000;
return (u16)(r | g | b | a);
}

inline u16 RGBA8888ToRGBA4444(u32 value) {
Expand Down
14 changes: 11 additions & 3 deletions GPU/Common/VertexDecoderX86.cpp
Expand Up @@ -167,7 +167,7 @@ JittedVertexDecoder VertexDecoderJitCache::Compile(const VertexDecoder &dec, int
BeginWrite();
const u8 *start = this->AlignCode16();

#ifdef _M_IX86
#if PPSSPP_ARCH(X86)
// Store register values
PUSH(ESI);
PUSH(EDI);
Expand All @@ -179,12 +179,20 @@ JittedVertexDecoder VertexDecoderJitCache::Compile(const VertexDecoder &dec, int
MOV(32, R(srcReg), MDisp(ESP, 16 + offset + 0));
MOV(32, R(dstReg), MDisp(ESP, 16 + offset + 4));
MOV(32, R(counterReg), MDisp(ESP, 16 + offset + 8));

const uint8_t STACK_FIXED_ALLOC = 64;
#else
// Parameters automatically fall into place.

// This will align the stack properly to 16 bytes (the call of this function pushed RIP, which is 8 bytes).
const uint8_t STACK_FIXED_ALLOC = 64 + 8;
#endif

// Allocate temporary storage on the stack.
SUB(PTRBITS, R(ESP), Imm8(STACK_FIXED_ALLOC));
// Save XMM4/XMM5 which apparently can be problematic?
// Actually, if they are, it must be a compiler bug because they SHOULD be ok.
// So I won't bother.
SUB(PTRBITS, R(ESP), Imm8(64));
MOVUPS(MDisp(ESP, 0), XMM4);
MOVUPS(MDisp(ESP, 16), XMM5);
MOVUPS(MDisp(ESP, 32), XMM6);
Expand Down Expand Up @@ -265,7 +273,7 @@ JittedVertexDecoder VertexDecoderJitCache::Compile(const VertexDecoder &dec, int
MOVUPS(XMM5, MDisp(ESP, 16));
MOVUPS(XMM6, MDisp(ESP, 32));
MOVUPS(XMM7, MDisp(ESP, 48));
ADD(PTRBITS, R(ESP), Imm8(64));
ADD(PTRBITS, R(ESP), Imm8(STACK_FIXED_ALLOC));

#ifdef _M_IX86
// Restore register values
Expand Down
15 changes: 7 additions & 8 deletions GPU/Software/RasterizerRectangle.cpp
Expand Up @@ -31,18 +31,17 @@ extern bool currentDialogActive;
namespace Rasterizer {

// Through mode, with the specific Darkstalker settings.
inline void DrawSinglePixel5551(u16 *pixel, const Vec4<int> &color_in) {
inline void DrawSinglePixel5551(u16 *pixel, const u32 color_in) {
u32 new_color;
if (color_in.a() == 255) {
new_color = color_in.ToRGBA() & 0xFFFFFF;
if ((color_in >> 24) == 255) {
new_color = color_in & 0xFFFFFF;
} else {
const u32 old_color = RGBA5551ToRGBA8888(*pixel);
const Vec4<int> dst = Vec4<int>::FromRGBA(old_color);
Vec3<int> blended = AlphaBlendingResult(color_in, dst);
Vec3<int> blended = AlphaBlendingResult(Vec4<int>::FromRGBA(color_in), dst);
// ToRGB() always automatically clamps.
new_color = blended.ToRGB();
}

new_color |= (*pixel & 0x8000) ? 0xff000000 : 0x00000000;
*pixel = RGBA8888ToRGBA5551(new_color);
}
Expand Down Expand Up @@ -148,7 +147,7 @@ void DrawSprite(const VertexData& v0, const VertexData& v1) {
for (int x = pos0.x; x < pos1.x; x++) {
u32 tex_color = nearestFunc(s, t, texptr, texbufw, 0);
if (tex_color & 0xFF000000) {
DrawSinglePixel5551(pixel, Vec4<int>::FromRGBA(tex_color));
DrawSinglePixel5551(pixel, tex_color);
}
s += ds;
pixel++;
Expand All @@ -159,7 +158,7 @@ void DrawSprite(const VertexData& v0, const VertexData& v1) {
Vec4<int> tex_color = Vec4<int>::FromRGBA(nearestFunc(s, t, texptr, texbufw, 0));
prim_color = ModulateRGBA(prim_color, tex_color);
if (prim_color.a() > 0) {
DrawSinglePixel5551(pixel, prim_color);
DrawSinglePixel5551(pixel, prim_color.ToRGBA());
}
s += ds;
pixel++;
Expand Down Expand Up @@ -208,7 +207,7 @@ void DrawSprite(const VertexData& v0, const VertexData& v1) {
u16 *pixel = fb.Get16Ptr(pos0.x, y, gstate.FrameBufStride());
for (int x = pos0.x; x < pos1.x; x++) {
Vec4<int> prim_color = v0.color0;
DrawSinglePixel5551(pixel, prim_color);
DrawSinglePixel5551(pixel, prim_color.ToRGBA());
pixel++;
}
}
Expand Down

0 comments on commit a3c17ee

Please sign in to comment.