8 changes: 7 additions & 1 deletion Source/Core/VideoCommon/VideoCommon.vcxproj.filters
Expand Up @@ -101,6 +101,9 @@
<ClCompile Include="Src\MainBase.cpp">
<Filter>Base</Filter>
</ClCompile>
<ClCompile Include="Src\PerfQueryBase.cpp">
<Filter>Base</Filter>
</ClCompile>
<ClCompile Include="Src\RenderBase.cpp">
<Filter>Base</Filter>
</ClCompile>
Expand Down Expand Up @@ -237,6 +240,9 @@
<ClInclude Include="Src\MainBase.h">
<Filter>Base</Filter>
</ClInclude>
<ClInclude Include="Src\PerfQueryBase.h">
<Filter>Base</Filter>
</ClInclude>
<ClInclude Include="Src\RenderBase.h">
<Filter>Base</Filter>
</ClInclude>
Expand Down Expand Up @@ -285,4 +291,4 @@
<UniqueIdentifier>{e2a527a2-ccc8-4ab8-a93e-dd2628c0f3b6}</UniqueIdentifier>
</Filter>
</ItemGroup>
</Project>
</Project>
2 changes: 2 additions & 0 deletions Source/Plugins/Plugin_VideoDX11/Plugin_VideoDX11.vcxproj
Expand Up @@ -199,6 +199,7 @@
<ClCompile Include="Src\LineGeometryShader.cpp" />
<ClCompile Include="Src\main.cpp" />
<ClCompile Include="Src\NativeVertexFormat.cpp" />
<ClCompile Include="Src\PerfQuery.cpp" />
<ClCompile Include="Src\PixelShaderCache.cpp" />
<ClCompile Include="Src\PointGeometryShader.cpp" />
<ClCompile Include="Src\PSTextureEncoder.cpp" />
Expand Down Expand Up @@ -228,6 +229,7 @@
<ClInclude Include="Src\Globals.h" />
<ClInclude Include="Src\LineGeometryShader.h" />
<ClInclude Include="Src\main.h" />
<ClInclude Include="Src\PerfQuery.h" />
<ClInclude Include="Src\PixelShaderCache.h" />
<ClInclude Include="Src\PointGeometryShader.h" />
<ClInclude Include="Src\PSTextureEncoder.h" />
Expand Down
Expand Up @@ -57,6 +57,9 @@
<ClCompile Include="Src\PointGeometryShader.cpp">
<Filter>Render</Filter>
</ClCompile>
<ClCompile Include="Src\PerfQuery.cpp">
<Filter>Render</Filter>
</ClCompile>
</ItemGroup>
<ItemGroup>
<ClInclude Include="Src\Globals.h" />
Expand Down Expand Up @@ -117,6 +120,9 @@
<ClInclude Include="Src\PointGeometryShader.h">
<Filter>Render</Filter>
</ClInclude>
<ClInclude Include="Src\PerfQuery.h">
<Filter>Render</Filter>
</ClInclude>
</ItemGroup>
<ItemGroup>
<Filter Include="D3D">
Expand Down
150 changes: 150 additions & 0 deletions Source/Plugins/Plugin_VideoDX11/Src/PerfQuery.cpp
@@ -0,0 +1,150 @@
#include "RenderBase.h"

#include "D3DBase.h"
#include "PerfQuery.h"

namespace DX11 {

PerfQuery::PerfQuery()
: m_query_read_pos()
, m_query_count()
{
for (int i = 0; i != ARRAYSIZE(m_query_buffer); ++i)
{
D3D11_QUERY_DESC qdesc = CD3D11_QUERY_DESC(D3D11_QUERY_OCCLUSION, 0);
D3D::device->CreateQuery(&qdesc, &m_query_buffer[i].query);
}
ResetQuery();
}

PerfQuery::~PerfQuery()
{
for (int i = 0; i != ARRAYSIZE(m_query_buffer); ++i)
{
// TODO: EndQuery?
m_query_buffer[i].query->Release();
}
}

void PerfQuery::EnableQuery(PerfQueryGroup type)
{
// Is this sane?
if (m_query_count > ARRAYSIZE(m_query_buffer) / 2)
WeakFlush();

if (ARRAYSIZE(m_query_buffer) == m_query_count)
{
// TODO
FlushOne();
ERROR_LOG(VIDEO, "flushed query buffer early!");
}

// start query
if (type == PQG_ZCOMP_ZCOMPLOC || type == PQG_ZCOMP)
{
auto& entry = m_query_buffer[(m_query_read_pos + m_query_count) % ARRAYSIZE(m_query_buffer)];

D3D::context->Begin(entry.query);
entry.query_type = type;

++m_query_count;
}
}

void PerfQuery::DisableQuery(PerfQueryGroup type)
{
// stop query
if (type == PQG_ZCOMP_ZCOMPLOC || type == PQG_ZCOMP)
{
auto& entry = m_query_buffer[(m_query_read_pos + m_query_count + ARRAYSIZE(m_query_buffer)-1) % ARRAYSIZE(m_query_buffer)];
D3D::context->End(entry.query);
}
}

void PerfQuery::ResetQuery()
{
m_query_count = 0;
std::fill_n(m_results, ARRAYSIZE(m_results), 0);
}

u32 PerfQuery::GetQueryResult(PerfQueryType type)
{
u32 result = 0;

if (type == PQ_ZCOMP_INPUT_ZCOMPLOC || type == PQ_ZCOMP_OUTPUT_ZCOMPLOC)
{
result = m_results[PQG_ZCOMP_ZCOMPLOC];
}
else if (type == PQ_ZCOMP_INPUT || type == PQ_ZCOMP_OUTPUT)
{
result = m_results[PQG_ZCOMP];
}
else if (type == PQ_BLEND_INPUT)
{
result = m_results[PQG_ZCOMP] + m_results[PQG_ZCOMP_ZCOMPLOC];
}
else if (type == PQ_EFB_COPY_CLOCKS)
{
result = m_results[PQG_EFB_COPY_CLOCKS];
}

return result / 4;
}

void PerfQuery::FlushOne()
{
auto& entry = m_query_buffer[m_query_read_pos];

UINT64 result = 0;
HRESULT hr = S_FALSE;
while (hr != S_OK)
{
// TODO: Might cause us to be stuck in an infinite loop!
hr = D3D::context->GetData(entry.query, &result, sizeof(result), 0);
}

// NOTE: Reported pixel metrics should be referenced to native resolution
m_results[entry.query_type] += (u64)result * EFB_WIDTH / g_renderer->GetTargetWidth() * EFB_HEIGHT / g_renderer->GetTargetHeight();

m_query_read_pos = (m_query_read_pos + 1) % ARRAYSIZE(m_query_buffer);
--m_query_count;
}

// TODO: could selectively flush things, but I don't think that will do much
void PerfQuery::FlushResults()
{
while (!IsFlushed())
FlushOne();
}

void PerfQuery::WeakFlush()
{
while (!IsFlushed())
{
auto& entry = m_query_buffer[m_query_read_pos];

UINT64 result = 0;
HRESULT hr = D3D::context->GetData(entry.query, &result, sizeof(result), D3D11_ASYNC_GETDATA_DONOTFLUSH);

if (hr == S_OK)
{
// NOTE: Reported pixel metrics should be referenced to native resolution
m_results[entry.query_type] += (u64)result * EFB_WIDTH / g_renderer->GetTargetWidth() * EFB_HEIGHT / g_renderer->GetTargetHeight();

m_query_read_pos = (m_query_read_pos + 1) % ARRAYSIZE(m_query_buffer);
--m_query_count;
}
else
{
break;
}
}
}

bool PerfQuery::IsFlushed() const
{
return 0 == m_query_count;
}


} // namespace
46 changes: 46 additions & 0 deletions Source/Plugins/Plugin_VideoDX11/Src/PerfQuery.h
@@ -0,0 +1,46 @@
#ifndef _PERFQUERY_H_
#define _PERFQUERY_H_

#include "PerfQueryBase.h"

namespace DX11 {

class PerfQuery : public PerfQueryBase
{
public:
PerfQuery();
~PerfQuery();

void EnableQuery(PerfQueryGroup type);
void DisableQuery(PerfQueryGroup type);
void ResetQuery();
u32 GetQueryResult(PerfQueryType type);
void FlushResults();
bool IsFlushed() const;

private:
struct ActiveQuery
{
ID3D11Query* query;
PerfQueryGroup query_type;
};

void WeakFlush();

// Only use when non-empty
void FlushOne();

// when testing in SMS: 64 was too small, 128 was ok
static const int PERF_QUERY_BUFFER_SIZE = 512;

ActiveQuery m_query_buffer[PERF_QUERY_BUFFER_SIZE];
int m_query_read_pos;

// TODO: sloppy
volatile int m_query_count;
volatile u32 m_results[PQG_NUM_MEMBERS];
};

} // namespace

#endif // _PERFQUERY_H_
1 change: 1 addition & 0 deletions Source/Plugins/Plugin_VideoDX11/Src/Render.cpp
Expand Up @@ -65,6 +65,7 @@ ID3D11RasterizerState* resetraststate = NULL;

static ID3D11Texture2D* s_screenshot_texture = NULL;


// GX pipeline state
struct
{
Expand Down
5 changes: 3 additions & 2 deletions Source/Plugins/Plugin_VideoDX11/Src/VertexManager.cpp
Expand Up @@ -208,7 +208,6 @@ void VertexManager::Draw(UINT stride)
if (IndexGenerator::GetNumLines() > 0 || IndexGenerator::GetNumPoints() > 0)
((DX11::Renderer*)g_renderer)->RestoreCull();
}

void VertexManager::vFlush()
{
if (LocalVBuffer == s_pCurBufferPointer) return;
Expand Down Expand Up @@ -274,8 +273,10 @@ void VertexManager::vFlush()
unsigned int stride = g_nativeVertexFmt->GetVertexStride();
g_nativeVertexFmt->SetupVertexPointers();
g_renderer->ApplyState(useDstAlpha);


g_perf_query->EnableQuery(bpmem.zcontrol.early_ztest ? PQG_ZCOMP_ZCOMPLOC : PQG_ZCOMP);
Draw(stride);
g_perf_query->DisableQuery(bpmem.zcontrol.early_ztest ? PQG_ZCOMP_ZCOMPLOC : PQG_ZCOMP);

GFX_DEBUGGER_PAUSE_AT(NEXT_FLUSH, true);

Expand Down
3 changes: 3 additions & 0 deletions Source/Plugins/Plugin_VideoDX11/Src/main.cpp
Expand Up @@ -42,6 +42,7 @@

#include "D3DUtil.h"
#include "D3DBase.h"
#include "PerfQuery.h"
#include "PixelShaderCache.h"
#include "TextureCache.h"
#include "VertexManager.h"
Expand Down Expand Up @@ -185,6 +186,7 @@ void VideoBackend::Video_Prepare()
g_renderer = new Renderer;
g_texture_cache = new TextureCache;
g_vertex_manager = new VertexManager;
g_perf_query = new PerfQuery;
VertexShaderCache::Init();
PixelShaderCache::Init();
D3D::InitUtils();
Expand Down Expand Up @@ -227,6 +229,7 @@ void VideoBackend::Shutdown()
D3D::ShutdownUtils();
PixelShaderCache::Shutdown();
VertexShaderCache::Shutdown();
delete g_perf_query;
delete g_vertex_manager;
delete g_texture_cache;
delete g_renderer;
Expand Down
5 changes: 3 additions & 2 deletions Source/Plugins/Plugin_VideoDX9/Src/main.cpp
Expand Up @@ -57,6 +57,7 @@

#include "ConfigManager.h"
#include "VideoBackend.h"
#include "PerfQueryBase.h"

namespace DX9
{
Expand Down Expand Up @@ -97,8 +98,6 @@ void InitBackendInfo()
g_Config.backend_info.bSupports3DVision = true;
g_Config.backend_info.bSupportsDualSourceBlend = false;
g_Config.backend_info.bSupportsFormatReinterpretation = true;


g_Config.backend_info.bSupportsPixelLighting = C_PLIGHTS + 40 <= maxConstants && C_PMATERIALS + 4 <= maxConstants;

// adapters
Expand Down Expand Up @@ -172,6 +171,7 @@ void VideoBackend::Video_Prepare()
g_vertex_manager = new VertexManager;
g_renderer = new Renderer;
g_texture_cache = new TextureCache;
g_perf_query = new PerfQueryBase;
// VideoCommon
BPInit();
Fifo_Init();
Expand Down Expand Up @@ -209,6 +209,7 @@ void VideoBackend::Shutdown()
// internal interfaces
PixelShaderCache::Shutdown();
VertexShaderCache::Shutdown();
delete g_perf_query;
delete g_texture_cache;
delete g_renderer;
delete g_vertex_manager;
Expand Down
1 change: 1 addition & 0 deletions Source/Plugins/Plugin_VideoOGL/CMakeLists.txt
Expand Up @@ -2,6 +2,7 @@ set(SRCS Src/FramebufferManager.cpp
Src/GLUtil.cpp
Src/main.cpp
Src/NativeVertexFormat.cpp
Src/PerfQuery.cpp
Src/PixelShaderCache.cpp
Src/PostProcessing.cpp
Src/RasterFont.cpp
Expand Down
2 changes: 2 additions & 0 deletions Source/Plugins/Plugin_VideoOGL/Plugin_VideoOGL.vcxproj
Expand Up @@ -200,6 +200,7 @@
<ClCompile Include="Src\GLUtil.cpp" />
<ClCompile Include="Src\main.cpp" />
<ClCompile Include="Src\NativeVertexFormat.cpp" />
<ClCompile Include="Src\PerfQuery.cpp" />
<ClCompile Include="Src\PixelShaderCache.cpp" />
<ClCompile Include="Src\PostProcessing.cpp" />
<ClCompile Include="Src\RasterFont.cpp" />
Expand All @@ -222,6 +223,7 @@
<ClInclude Include="Src\Globals.h" />
<ClInclude Include="Src\GLUtil.h" />
<ClInclude Include="Src\main.h" />
<ClInclude Include="Src\PerfQuery.h" />
<ClInclude Include="Src\PixelShaderCache.h" />
<ClInclude Include="Src\PostProcessing.h" />
<ClInclude Include="Src\RasterFont.h" />
Expand Down
Expand Up @@ -36,6 +36,9 @@
<ClCompile Include="Src\VertexShaderCache.cpp">
<Filter>Render</Filter>
</ClCompile>
<ClCompile Include="Src\PerfQuery.cpp">
<Filter>Render</Filter>
</ClCompile>
</ItemGroup>
<ItemGroup>
<ClInclude Include="Src\Globals.h" />
Expand Down Expand Up @@ -72,6 +75,9 @@
<ClInclude Include="Src\VertexShaderCache.h">
<Filter>Render</Filter>
</ClInclude>
<ClInclude Include="Src\PerfQuery.h">
<Filter>Render</Filter>
</ClInclude>
</ItemGroup>
<ItemGroup>
<None Include="CMakeLists.txt" />
Expand Down
133 changes: 133 additions & 0 deletions Source/Plugins/Plugin_VideoOGL/Src/PerfQuery.cpp
@@ -0,0 +1,133 @@
#include "RenderBase.h"
#include "GLUtil.h"
#include "PerfQuery.h"

namespace OGL
{

PerfQuery::PerfQuery()
: m_query_read_pos()
, m_query_count()
{
for (int i = 0; i != ARRAYSIZE(m_query_buffer); ++i)
glGenQueries(1, &m_query_buffer[i].query_id);

ResetQuery();
}

PerfQuery::~PerfQuery()
{
for (int i = 0; i != ARRAYSIZE(m_query_buffer); ++i)
glDeleteQueries(1, &m_query_buffer[i].query_id);
}

void PerfQuery::EnableQuery(PerfQueryGroup type)
{
// Is this sane?
if (m_query_count > ARRAYSIZE(m_query_buffer) / 2)
WeakFlush();

if (ARRAYSIZE(m_query_buffer) == m_query_count)
{
FlushOne();
//ERROR_LOG(VIDEO, "flushed query buffer early!");
}

// start query
if (type == PQG_ZCOMP_ZCOMPLOC || type == PQG_ZCOMP)
{
auto& entry = m_query_buffer[(m_query_read_pos + m_query_count) % ARRAYSIZE(m_query_buffer)];

glBeginQuery(GL_SAMPLES_PASSED, entry.query_id);
entry.query_type = type;

++m_query_count;
}
}

void PerfQuery::DisableQuery(PerfQueryGroup type)
{
// stop query
if (type == PQG_ZCOMP_ZCOMPLOC || type == PQG_ZCOMP)
{
glEndQuery(GL_SAMPLES_PASSED);
}
}

bool PerfQuery::IsFlushed() const
{
return 0 == m_query_count;
}

void PerfQuery::FlushOne()
{
auto& entry = m_query_buffer[m_query_read_pos];

GLuint result = 0;
glGetQueryObjectuiv(entry.query_id, GL_QUERY_RESULT, &result);

// NOTE: Reported pixel metrics should be referenced to native resolution
m_results[entry.query_type] += (u64)result * EFB_WIDTH / g_renderer->GetTargetWidth() * EFB_HEIGHT / g_renderer->GetTargetHeight();

m_query_read_pos = (m_query_read_pos + 1) % ARRAYSIZE(m_query_buffer);
--m_query_count;
}

// TODO: could selectively flush things, but I don't think that will do much
void PerfQuery::FlushResults()
{
while (!IsFlushed())
FlushOne();
}

void PerfQuery::WeakFlush()
{
while (!IsFlushed())
{
auto& entry = m_query_buffer[m_query_read_pos];

GLuint result = GL_FALSE;
glGetQueryObjectuiv(entry.query_id, GL_QUERY_RESULT_AVAILABLE, &result);

if (GL_TRUE == result)
{
FlushOne();
}
else
{
break;
}
}
}

void PerfQuery::ResetQuery()
{
m_query_count = 0;
std::fill_n(m_results, ARRAYSIZE(m_results), 0);
}

u32 PerfQuery::GetQueryResult(PerfQueryType type)
{
u32 result = 0;

if (type == PQ_ZCOMP_INPUT_ZCOMPLOC || type == PQ_ZCOMP_OUTPUT_ZCOMPLOC)
{
result = m_results[PQG_ZCOMP_ZCOMPLOC];
}
else if (type == PQ_ZCOMP_INPUT || type == PQ_ZCOMP_OUTPUT)
{
result = m_results[PQG_ZCOMP];
}
else if (type == PQ_BLEND_INPUT)
{
result = m_results[PQG_ZCOMP] + m_results[PQG_ZCOMP_ZCOMPLOC];
}
else if (type == PQ_EFB_COPY_CLOCKS)
{
result = m_results[PQG_EFB_COPY_CLOCKS];
}

return result / 4;
}

} // namespace
46 changes: 46 additions & 0 deletions Source/Plugins/Plugin_VideoOGL/Src/PerfQuery.h
@@ -0,0 +1,46 @@
#ifndef _PERFQUERY_H_
#define _PERFQUERY_H_

#include "PerfQueryBase.h"

namespace OGL {

class PerfQuery : public PerfQueryBase
{
public:
PerfQuery();
~PerfQuery();

void EnableQuery(PerfQueryGroup type);
void DisableQuery(PerfQueryGroup type);
void ResetQuery();
u32 GetQueryResult(PerfQueryType type);
void FlushResults();
bool IsFlushed() const;

private:
struct ActiveQuery
{
GLuint query_id;
PerfQueryGroup query_type;
};

// when testing in SMS: 64 was too small, 128 was ok
static const int PERF_QUERY_BUFFER_SIZE = 512;

void WeakFlush();
// Only use when non-empty
void FlushOne();

// This contains gl query objects with unretrieved results.
ActiveQuery m_query_buffer[PERF_QUERY_BUFFER_SIZE];
int m_query_read_pos;

// TODO: sloppy
volatile int m_query_count;
volatile u32 m_results[PQG_NUM_MEMBERS];
};

} // namespace

#endif // _PERFQUERY_H_
1 change: 1 addition & 0 deletions Source/Plugins/Plugin_VideoOGL/Src/Render.cpp
Expand Up @@ -1419,6 +1419,7 @@ void Renderer::SetDepthMode()
else
{
// if the test is disabled write is disabled too
// TODO: When PE performance metrics are being emulated via occlusion queries, we should (probably?) enable depth test with depth function ALWAYS here
glDisable(GL_DEPTH_TEST);
glDepthMask(GL_FALSE);
}
Expand Down
4 changes: 4 additions & 0 deletions Source/Plugins/Plugin_VideoOGL/Src/VertexManager.cpp
Expand Up @@ -40,6 +40,7 @@
#include "OpcodeDecoding.h"
#include "FileUtil.h"
#include "Debugger.h"
#include "PerfQueryBase.h"

#include "main.h"

Expand Down Expand Up @@ -217,7 +218,10 @@ void VertexManager::vFlush()
if (ps) PixelShaderCache::SetCurrentShader(ps->glprogid); // Lego Star Wars crashes here.
if (vs) VertexShaderCache::SetCurrentShader(vs->glprogid);

g_perf_query->EnableQuery(bpmem.zcontrol.early_ztest ? PQG_ZCOMP_ZCOMPLOC : PQG_ZCOMP);
Draw();
g_perf_query->DisableQuery(bpmem.zcontrol.early_ztest ? PQG_ZCOMP_ZCOMPLOC : PQG_ZCOMP);
//ERROR_LOG(VIDEO, "PerfQuery result: %d", g_perf_query->GetQueryResult(bpmem.zcontrol.early_ztest ? PQ_ZCOMP_OUTPUT_ZCOMPLOC : PQ_ZCOMP_OUTPUT));

// run through vertex groups again to set alpha
if (useDstAlpha && !dualSourcePossible)
Expand Down
2 changes: 2 additions & 0 deletions Source/Plugins/Plugin_VideoOGL/Src/main.cpp
Expand Up @@ -92,6 +92,7 @@ Make AA apply instantly during gameplay if possible
#include "FramebufferManager.h"
#include "Core.h"
#include "Host.h"
#include "PerfQuery.h"

#include "VideoState.h"
#include "VideoBackend.h"
Expand Down Expand Up @@ -194,6 +195,7 @@ void VideoBackend::Video_Prepare()

BPInit();
g_vertex_manager = new VertexManager;
g_perf_query = new PerfQuery;
Fifo_Init(); // must be done before OpcodeDecoder_Init()
OpcodeDecoder_Init();
VertexShaderCache::Init();
Expand Down
15 changes: 15 additions & 0 deletions Source/Plugins/Plugin_VideoSoftware/Src/BPMemLoader.cpp
Expand Up @@ -90,6 +90,21 @@ void SWBPWritten(int address, int newvalue)
SWPixelEngine::pereg.boxBottom = newvalue >> 10;
SWPixelEngine::pereg.boxTop = newvalue & 0x3ff;
break;
case BPMEM_CLEAR_PIXEL_PERF:
// TODO: I didn't test if the value written to this register affects the amount of cleared registers
SWPixelEngine::pereg.perfZcompInputZcomplocLo = 0;
SWPixelEngine::pereg.perfZcompInputZcomplocHi = 0;
SWPixelEngine::pereg.perfZcompOutputZcomplocLo = 0;
SWPixelEngine::pereg.perfZcompOutputZcomplocHi = 0;
SWPixelEngine::pereg.perfZcompInputLo = 0;
SWPixelEngine::pereg.perfZcompInputHi = 0;
SWPixelEngine::pereg.perfZcompOutputLo = 0;
SWPixelEngine::pereg.perfZcompOutputHi = 0;
SWPixelEngine::pereg.perfBlendInputLo = 0;
SWPixelEngine::pereg.perfBlendInputHi = 0;
SWPixelEngine::pereg.perfEfbCopyClocksLo = 0;
SWPixelEngine::pereg.perfEfbCopyClocksHi = 0;
break;
case BPMEM_LOADTLUT0: // This one updates bpmem.tlutXferSrc, no need to do anything here.
break;
case BPMEM_LOADTLUT1: // Load a Texture Look Up Table
Expand Down
13 changes: 10 additions & 3 deletions Source/Plugins/Plugin_VideoSoftware/Src/Rasterizer.cpp
Expand Up @@ -23,6 +23,7 @@
#include "BPMemLoader.h"
#include "XFMemLoader.h"
#include "Tev.h"
#include "SWPixelEngine.h"
#include "SWStatistics.h"
#include "SWVideoConfig.h"

Expand Down Expand Up @@ -149,9 +150,15 @@ inline void Draw(s32 x, s32 y, s32 xi, s32 yi)

if (bpmem.zcontrol.early_ztest && bpmem.zmode.testenable && g_SWVideoConfig.bZComploc)
{
// early z
if (!EfbInterface::ZCompare(x, y, z))
return;
// TODO: Test if perf regs are incremented even if test is disabled
SWPixelEngine::pereg.IncZInputQuadCount(true);
if (bpmem.zmode.testenable)
{
// early z
if (!EfbInterface::ZCompare(x, y, z))
return;
}
SWPixelEngine::pereg.IncZOutputQuadCount(true);
}

RasterBlockPixel& pixel = rasterBlock.Pixel[xi][yi];
Expand Down
4 changes: 2 additions & 2 deletions Source/Plugins/Plugin_VideoSoftware/Src/SWPixelEngine.cpp
Expand Up @@ -79,7 +79,7 @@ void Read16(u16& _uReturnValue, const u32 _iAddress)

u16 address = _iAddress & 0xFFF;

if (address <= 0x16)
if (address <= 0x2e)
_uReturnValue = ((u16*)&pereg)[address >> 1];
}

Expand Down Expand Up @@ -111,7 +111,7 @@ void Write16(const u16 _iValue, const u32 _iAddress)
}
break;
default:
if (address <= 0x16)
if (address <= 0x2e)
((u16*)&pereg)[address >> 1] = _iValue;
break;
}
Expand Down
77 changes: 77 additions & 0 deletions Source/Plugins/Plugin_VideoSoftware/Src/SWPixelEngine.h
Expand Up @@ -38,6 +38,21 @@ namespace SWPixelEngine
PE_BBOX_RIGHT = 0x012, // Flip Right
PE_BBOX_TOP = 0x014, // Flip Top
PE_BBOX_BOTTOM = 0x016, // Flip Bottom

// NOTE: Order not verified
// These indicate the number of quads that are being used as input/output for each particular stage
PE_PERF_ZCOMP_INPUT_ZCOMPLOC_L = 0x18,
PE_PERF_ZCOMP_INPUT_ZCOMPLOC_H = 0x1a,
PE_PERF_ZCOMP_OUTPUT_ZCOMPLOC_L = 0x1c,
PE_PERF_ZCOMP_OUTPUT_ZCOMPLOC_H = 0x1e,
PE_PERF_ZCOMP_INPUT_L = 0x20,
PE_PERF_ZCOMP_INPUT_H = 0x22,
PE_PERF_ZCOMP_OUTPUT_L = 0x24,
PE_PERF_ZCOMP_OUTPUT_H = 0x26,
PE_PERF_BLEND_INPUT_L = 0x28,
PE_PERF_BLEND_INPUT_H = 0x2a,
PE_PERF_EFB_COPY_CLOCKS_L = 0x2c,
PE_PERF_EFB_COPY_CLOCKS_H = 0x2e,
};

union UPEZConfReg
Expand Down Expand Up @@ -125,10 +140,72 @@ namespace SWPixelEngine
UPECtrlReg ctrl;
u16 unk0;
u16 token;

u16 boxLeft;
u16 boxRight;
u16 boxTop;
u16 boxBottom;

u16 perfZcompInputZcomplocLo;
u16 perfZcompInputZcomplocHi;
u16 perfZcompOutputZcomplocLo;
u16 perfZcompOutputZcomplocHi;
u16 perfZcompInputLo;
u16 perfZcompInputHi;
u16 perfZcompOutputLo;
u16 perfZcompOutputHi;
u16 perfBlendInputLo;
u16 perfBlendInputHi;
u16 perfEfbCopyClocksLo;
u16 perfEfbCopyClocksHi;

// NOTE: hardware doesn't process individual pixels but quads instead. Current software renderer architecture works on pixels though, so we have this "quad" hack here to only increment the registers on every fourth rendered pixel
void IncZInputQuadCount(bool early_ztest)
{
static int quad = 0;
if (++quad != 3)
return;
quad = 0;

if (early_ztest)
{
if (++perfZcompInputZcomplocLo == 0)
perfZcompInputZcomplocHi++;
}
else
{
if (++perfZcompInputLo == 0)
perfZcompInputHi++;
}
}
void IncZOutputQuadCount(bool early_ztest)
{
static int quad = 0;
if (++quad != 3)
return;
quad = 0;

if (early_ztest)
{
if (++perfZcompOutputZcomplocLo == 0)
perfZcompOutputZcomplocHi++;
}
else
{
if (++perfZcompOutputLo == 0)
perfZcompOutputHi++;
}
}
void IncBlendInputQuadCount()
{
static int quad = 0;
if (++quad != 3)
return;
quad = 0;

if (++perfBlendInputLo == 0)
perfBlendInputHi++;
}
};

extern PEReg pereg;
Expand Down
6 changes: 6 additions & 0 deletions Source/Plugins/Plugin_VideoSoftware/Src/SWmain.cpp
Expand Up @@ -225,6 +225,12 @@ u32 VideoSoftware::Video_AccessEFB(EFBAccessType type, u32 x, u32 y, u32 InputDa
return value;
}

u32 VideoSoftware::Video_GetQueryResult(PerfQueryType type)
{
// TODO:
return 0;
}

bool VideoSoftware::Video_Screenshot(const char *_szFilename)
{
return false;
Expand Down
11 changes: 9 additions & 2 deletions Source/Plugins/Plugin_VideoSoftware/Src/Tev.cpp
Expand Up @@ -20,6 +20,7 @@
#include "Tev.h"
#include "EfbInterface.h"
#include "TextureSampler.h"
#include "SWPixelEngine.h"
#include "SWStatistics.h"
#include "SWVideoConfig.h"
#include "DebugUtil.h"
Expand Down Expand Up @@ -787,8 +788,13 @@ void Tev::Draw()
bool late_ztest = !bpmem.zcontrol.early_ztest || !g_SWVideoConfig.bZComploc;
if (late_ztest && bpmem.zmode.testenable)
{
if (!EfbInterface::ZCompare(Position[0], Position[1], Position[2]))
return;
// TODO: Check against hw if these values get incremented even if depth testing is disabled
SWPixelEngine::pereg.IncZInputQuadCount(false);

if (!EfbInterface::ZCompare(Position[0], Position[1], Position[2]))
return;

SWPixelEngine::pereg.IncZOutputQuadCount(false);
}

#if ALLOW_TEV_DUMPS
Expand All @@ -812,6 +818,7 @@ void Tev::Draw()
#endif

INCSTAT(swstats.thisFrame.tevPixelsOut);
SWPixelEngine::pereg.IncBlendInputQuadCount();

EfbInterface::BlendTev(Position[0], Position[1], output);
}
Expand Down
2 changes: 2 additions & 0 deletions Source/Plugins/Plugin_VideoSoftware/Src/VideoBackend.h
Expand Up @@ -26,7 +26,9 @@ class VideoSoftware : public VideoBackend
void Video_ExitLoop();
void Video_BeginField(u32, FieldType, u32, u32);
void Video_EndField();

u32 Video_AccessEFB(EFBAccessType, u32, u32, u32);
u32 Video_GetQueryResult(PerfQueryType type);

void Video_AddMessage(const char* pstr, unsigned int milliseconds);
void Video_ClearMessages();
Expand Down