Skip to content

Commit

Permalink
Merge pull request #18580 from hrydgard/sleepy-describe-vertexdec
Browse files Browse the repository at this point in the history
DescribePtr: Individually describe vertex decoders
  • Loading branch information
hrydgard committed Dec 19, 2023
2 parents f2ee437 + bca83c0 commit bf22517
Show file tree
Hide file tree
Showing 9 changed files with 102 additions and 26 deletions.
43 changes: 40 additions & 3 deletions Core/HW/MediaEngine.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,18 @@

#include <algorithm>

#ifdef _M_SSE
#include <emmintrin.h>
#endif

#if PPSSPP_ARCH(ARM_NEON)
#if defined(_MSC_VER) && PPSSPP_ARCH(ARM64)
#include <arm64_neon.h>
#else
#include <arm_neon.h>
#endif
#endif

#ifdef USE_FFMPEG

extern "C" {
Expand Down Expand Up @@ -744,9 +756,34 @@ inline void writeVideoLineRGBA(void *destp, const void *srcp, int width) {
u32_le *dest = (u32_le *)destp;
const u32_le *src = (u32_le *)srcp;

const u32 mask = 0x00FFFFFF;
for (int i = 0; i < width; ++i) {
dest[i] = src[i] & mask;
int count = width;

#if PPSSPP_ARCH(SSE2)
__m128i mask = _mm_set1_epi32(0x00FFFFFF);
while (count >= 8) {
__m128i pixels1 = _mm_and_si128(_mm_loadu_si128((const __m128i *)src), mask);
__m128i pixels2 = _mm_and_si128(_mm_loadu_si128((const __m128i *)src + 1), mask);
_mm_storeu_si128((__m128i *)dest, pixels1);
_mm_storeu_si128((__m128i *)dest + 1, pixels2);
src += 8;
dest += 8;
count -= 8;
}
#elif PPSSPP_ARCH(ARM_NEON)
int32x4_t mask = vdupq_n_u32(0x00FFFFFF);
while (count >= 8) {
int32x4_t pixels1 = vandq_u32(vld1q_u32(src), mask);
int32x4_t pixels2 = vandq_u32(vld1q_u32(src + 4), mask);
vst1q_u32(dest, pixels1);
vst1q_u32(dest + 4, pixels2);
src += 8;
dest += 8;
count -= 8;
}
#endif
const u32 mask32 = 0x00FFFFFF;
while (count--) {
*dest++ = *src++ & mask32;
}
}

Expand Down
30 changes: 30 additions & 0 deletions GPU/Common/DrawEngineCommon.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1036,3 +1036,33 @@ void TessellationDataTransfer::CopyControlPoints(float *pos, float *tex, float *
}
}
}

bool DrawEngineCommon::DescribeCodePtr(const u8 *ptr, std::string &name) const {
if (!decJitCache_ || !decJitCache_->IsInSpace(ptr)) {
return false;
}

// Loop through all the decoders and see if we have a match.
VertexDecoder *found = nullptr;
u32 foundKey;

decoderMap_.Iterate([&](u32 key, VertexDecoder *value) {
if (!found) {
if (value->IsInSpace(ptr)) {
foundKey = key;
found = value;
}
}
});

if (found) {
char temp[256];
found->ToString(temp, false);
name = temp;
snprintf(temp, sizeof(temp), "_%08X", foundKey);
name += temp;
return true;
} else {
return false;
}
}
6 changes: 1 addition & 5 deletions GPU/Common/DrawEngineCommon.h
Original file line number Diff line number Diff line change
Expand Up @@ -138,11 +138,7 @@ class DrawEngineCommon {
everUsedExactEqualDepth_ = v;
}

bool IsCodePtrVertexDecoder(const u8 *ptr) const {
if (decJitCache_)
return decJitCache_->IsInSpace(ptr);
return false;
}
bool DescribeCodePtr(const u8 *ptr, std::string &name) const;
int GetNumDrawCalls() const {
return numDrawVerts_;
}
Expand Down
4 changes: 2 additions & 2 deletions GPU/Common/VertexDecoderArm.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -257,7 +257,7 @@ JittedVertexDecoder VertexDecoderJitCache::Compile(const VertexDecoder &dec, int
// Reset the code ptr and return zero to indicate that we failed.
ResetCodePtr(GetOffset(start));
char temp[1024] = {0};
dec.ToString(temp);
dec.ToString(temp, true);
INFO_LOG(G3D, "Could not compile vertex decoder: %s", temp);
return 0;
}
Expand Down Expand Up @@ -285,7 +285,7 @@ JittedVertexDecoder VertexDecoderJitCache::Compile(const VertexDecoder &dec, int
/*
DisassembleArm(start, GetCodePtr() - start);
char temp[1024] = {0};
dec.ToString(temp);
dec.ToString(temp, true);
INFO_LOG(G3D, "%s", temp);
*/

Expand Down
4 changes: 2 additions & 2 deletions GPU/Common/VertexDecoderArm64.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -253,7 +253,7 @@ JittedVertexDecoder VertexDecoderJitCache::Compile(const VertexDecoder &dec, int
// Reset the code ptr (effectively undoing what we generated) and return zero to indicate that we failed.
ResetCodePtr(GetOffset(start));
char temp[1024] = {0};
dec.ToString(temp);
dec.ToString(temp, true);
ERROR_LOG(G3D, "Could not compile vertex decoder, failed at step %d: %s", i, temp);
return nullptr;
}
Expand Down Expand Up @@ -288,7 +288,7 @@ JittedVertexDecoder VertexDecoderJitCache::Compile(const VertexDecoder &dec, int

if (log) {
char temp[1024] = { 0 };
dec.ToString(temp);
dec.ToString(temp, true);
INFO_LOG(JIT, "=== %s (%d bytes) ===", temp, (int)(GetCodePtr() - start));
std::vector<std::string> lines = DisassembleArm64(start, (int)(GetCodePtr() - start));
for (auto line : lines) {
Expand Down
22 changes: 16 additions & 6 deletions GPU/Common/VertexDecoderCommon.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1276,7 +1276,7 @@ void VertexDecoder::SetVertexType(u32 fmt, const VertexDecoderOptions &options,

if (reportNoPos) {
char temp[256]{};
ToString(temp);
ToString(temp, true);
ERROR_LOG_REPORT(G3D, "Vertices without position found: (%08x) %s", fmt_, temp);
}

Expand Down Expand Up @@ -1402,7 +1402,7 @@ void VertexDecoder::CompareToJit(const u8 *startPtr, u8 *decodedptr, int count,
jittedReader.Goto(i);
if (!DecodedVertsAreSimilar(controlReader, jittedReader)) {
char name[512]{};
ToString(name);
ToString(name, true);
ERROR_LOG(G3D, "Encountered vertexjit mismatch at %d/%d for %s", i, count, name);
if (morphcount > 1) {
printf("Morph:\n");
Expand Down Expand Up @@ -1456,8 +1456,9 @@ static const char *idxnames[4] = { "-", "u8", "u16", "?" };
static const char *weightnames[4] = { "-", "u8", "u16", "f" };
static const char *colnames[8] = { "", "?", "?", "?", "565", "5551", "4444", "8888" };

int VertexDecoder::ToString(char *output) const {
char * start = output;
int VertexDecoder::ToString(char *output, bool spaces) const {
char *start = output;

output += sprintf(output, "P: %s ", posnames[pos]);
if (nrm)
output += sprintf(output, "N: %s ", nrmnames[nrm]);
Expand All @@ -1474,15 +1475,24 @@ int VertexDecoder::ToString(char *output) const {
if (throughmode)
output += sprintf(output, " (through)");

output += sprintf(output, " (size: %i)", VertexSize());
output += sprintf(output, " (%ib)", VertexSize());

if (!spaces) {
size_t len = strlen(start);
for (int i = 0; i < len; i++) {
if (start[i] == ' ')
start[i] = '_';
}
}

return output - start;
}

std::string VertexDecoder::GetString(DebugShaderStringType stringType) {
char buffer[256];
switch (stringType) {
case SHADER_STRING_SHORT_DESC:
ToString(buffer);
ToString(buffer, true);
return std::string(buffer);
case SHADER_STRING_SOURCE_CODE:
{
Expand Down
7 changes: 6 additions & 1 deletion GPU/Common/VertexDecoderCommon.h
Original file line number Diff line number Diff line change
Expand Up @@ -431,7 +431,11 @@ class VertexDecoder {
// output must be big for safety.
// Returns number of chars written.
// Ugly for speed.
int ToString(char *output) const;
int ToString(char *output, bool spaces) const;

bool IsInSpace(const uint8_t *ptr) const {
return ptr >= (const uint8_t *)jitted_ && ptr < ((const uint8_t *)jitted_ + jittedSize_);
}

// Mutable decoder state
mutable u8 *decoded_ = nullptr;
Expand Down Expand Up @@ -507,6 +511,7 @@ class VertexDecoderJitCache : public VERTEXDECODER_JIT_BACKEND {

// Returns a pointer to the code to run.
JittedVertexDecoder Compile(const VertexDecoder &dec, int32_t *jittedSize);
bool DescribeCodePtr(const u8 *ptr, std::string &name) const;
void Clear();

void Jit_WeightsU8();
Expand Down
4 changes: 2 additions & 2 deletions GPU/Common/VertexDecoderRiscV.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -318,7 +318,7 @@ JittedVertexDecoder VertexDecoderJitCache::Compile(const VertexDecoder &dec, int
// Reset the code ptr (effectively undoing what we generated) and return zero to indicate that we failed.
ResetCodePtr(GetOffset(start));
char temp[1024]{};
dec.ToString(temp);
dec.ToString(temp, true);
ERROR_LOG(G3D, "Could not compile vertex decoder, failed at step %d: %s", i, temp);
return nullptr;
}
Expand Down Expand Up @@ -351,7 +351,7 @@ JittedVertexDecoder VertexDecoderJitCache::Compile(const VertexDecoder &dec, int

if (log) {
char temp[1024]{};
dec.ToString(temp);
dec.ToString(temp, true);
INFO_LOG(JIT, "=== %s (%d bytes) ===", temp, (int)(GetCodePtr() - start));
std::vector<std::string> lines = DisassembleRV64(start, (int)(GetCodePtr() - start));
for (auto line : lines) {
Expand Down
8 changes: 3 additions & 5 deletions GPU/GPUCommon.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1938,11 +1938,9 @@ bool GPUCommon::GetCurrentSimpleVertices(int count, std::vector<GPUDebugVertex>
}

bool GPUCommon::DescribeCodePtr(const u8 *ptr, std::string &name) {
if (drawEngineCommon_->IsCodePtrVertexDecoder(ptr)) {
name = "VertexDecoderJit";
return true;
}
return false;
// The only part of GPU emulation (other than software) that jits is the vertex decoder, currently,
// which is owned by the drawengine.
return drawEngineCommon_->DescribeCodePtr(ptr, name);
}

void GPUCommon::UpdateUVScaleOffset() {
Expand Down

0 comments on commit bf22517

Please sign in to comment.