Large diffs are not rendered by default.

@@ -17,7 +17,7 @@ enum class SrcBlendFactor : u32;
enum class ZTexOp : u32;
enum class LogicOp : u32;

struct PixelShaderConstants
struct alignas(16) PixelShaderConstants
{
std::array<int4, 4> colors;
std::array<int4, 4> kcolors;
@@ -60,7 +60,7 @@ struct PixelShaderConstants
LogicOp logic_op_mode;
};

struct VertexShaderConstants
struct alignas(16) VertexShaderConstants
{
u32 components; // .x
u32 xfmem_dualTexInfo; // .y
@@ -109,7 +109,7 @@ enum class VSExpand : u32
Line,
};

struct GeometryShaderConstants
struct alignas(16) GeometryShaderConstants
{
float4 stereoparams;
float4 lineptparams;
@@ -30,6 +30,7 @@
#include "VideoCommon/VertexLoaderBase.h"
#include "VideoCommon/VertexManagerBase.h"
#include "VideoCommon/VertexShaderManager.h"
#include "VideoCommon/VideoConfig.h"
#include "VideoCommon/XFMemory.h"

namespace VertexLoaderManager
@@ -366,17 +367,33 @@ int RunVertices(int vtx_attr_group, OpcodeDecoder::Primitive primitive, int coun
vertex_shader_manager.SetVertexFormat(loader->m_native_components,
loader->m_native_vertex_format->GetVertexDeclaration());

// CPUCull's performance increase comes from encoding fewer GPU commands, not sending less data
// Therefore it's only useful to check if culling could remove a flush
const bool can_cpu_cull = g_ActiveConfig.bCPUCull &&
primitive < OpcodeDecoder::Primitive::GX_DRAW_LINES &&
!g_vertex_manager->HasSendableVertices();

// if cull mode is CULL_ALL, tell VertexManager to skip triangles and quads.
// They still need to go through vertex loading, because we need to calculate a zfreeze refrence
// slope.
bool cullall = (bpmem.genMode.cullmode == CullMode::All &&
primitive < OpcodeDecoder::Primitive::GX_DRAW_LINES);
// They still need to go through vertex loading, because we need to calculate a zfreeze
// reference slope.
const bool cullall = (bpmem.genMode.cullmode == CullMode::All &&
primitive < OpcodeDecoder::Primitive::GX_DRAW_LINES);

DataReader dst = g_vertex_manager->PrepareForAdditionalData(
primitive, count, loader->m_native_vtx_decl.stride, cullall);
const int stride = loader->m_native_vtx_decl.stride;
DataReader dst = g_vertex_manager->PrepareForAdditionalData(primitive, count, stride,
cullall || can_cpu_cull);

count = loader->RunVertices(src, dst.GetPointer(), count);

if (can_cpu_cull && !cullall)
{
if (!g_vertex_manager->AreAllVerticesCulled(loader, primitive, dst.GetPointer(), count))
{
DataReader new_dst = g_vertex_manager->DisableCullAll(stride);
memmove(new_dst.GetPointer(), dst.GetPointer(), count * stride);
}
}

g_vertex_manager->AddIndices(primitive, count);
g_vertex_manager->FlushData(count, loader->m_native_vtx_decl.stride);

@@ -104,6 +104,7 @@ VertexManagerBase::~VertexManagerBase() = default;
bool VertexManagerBase::Initialize()
{
m_index_generator.Init();
m_cpu_cull.Init();
return true;
}

@@ -117,6 +118,13 @@ void VertexManagerBase::AddIndices(OpcodeDecoder::Primitive primitive, u32 num_v
m_index_generator.AddIndices(primitive, num_vertices);
}

bool VertexManagerBase::AreAllVerticesCulled(VertexLoaderBase* loader,
OpcodeDecoder::Primitive primitive, const u8* src,
u32 count)
{
return m_cpu_cull.AreAllVerticesCulled(loader, primitive, src, count);
}

DataReader VertexManagerBase::PrepareForAdditionalData(OpcodeDecoder::Primitive primitive,
u32 count, u32 stride, bool cullall)
{
@@ -187,6 +195,16 @@ DataReader VertexManagerBase::PrepareForAdditionalData(OpcodeDecoder::Primitive
return DataReader(m_cur_buffer_pointer, m_end_buffer_pointer);
}

DataReader VertexManagerBase::DisableCullAll(u32 stride)
{
if (m_cull_all)
{
m_cull_all = false;
ResetBuffer(stride);
}
return DataReader(m_cur_buffer_pointer, m_end_buffer_pointer);
}

void VertexManagerBase::FlushData(u32 count, u32 stride)
{
m_cur_buffer_pointer += count * stride;
@@ -548,6 +566,8 @@ void VertexManagerBase::Flush()
// Now the vertices can be flushed to the GPU. Everything following the CommitBuffer() call
// must be careful to not upload any utility vertices, as the binding will be lost otherwise.
const u32 num_indices = m_index_generator.GetIndexLen();
if (num_indices == 0)
return;
u32 base_vertex, base_index;
CommitBuffer(m_index_generator.GetNumVerts(),
VertexLoaderManager::GetCurrentVertexFormat()->GetVertexStride(), num_indices,
@@ -9,6 +9,7 @@
#include "Common/BitSet.h"
#include "Common/CommonTypes.h"
#include "Common/MathUtil.h"
#include "VideoCommon/CPUCull.h"
#include "VideoCommon/IndexGenerator.h"
#include "VideoCommon/RenderState.h"
#include "VideoCommon/ShaderCache.h"
@@ -100,11 +101,18 @@ class VertexManagerBase

PrimitiveType GetCurrentPrimitiveType() const { return m_current_primitive_type; }
void AddIndices(OpcodeDecoder::Primitive primitive, u32 num_vertices);
bool AreAllVerticesCulled(VertexLoaderBase* loader, OpcodeDecoder::Primitive primitive,
const u8* src, u32 count);
virtual DataReader PrepareForAdditionalData(OpcodeDecoder::Primitive primitive, u32 count,
u32 stride, bool cullall);
/// Switch cullall off after a call to PrepareForAdditionalData with cullall true
/// Expects that you will add a nonzero number of primitives before the next flush
/// Returns whether cullall was changed (false if cullall was already off)
DataReader DisableCullAll(u32 stride);
void FlushData(u32 count, u32 stride);

void Flush();
bool HasSendableVertices() const { return !m_is_flushed && !m_cull_all; }

void DoState(PointerWrap& p);

@@ -201,6 +209,7 @@ class VertexManagerBase
bool m_cull_all = false;

IndexGenerator m_index_generator;
CPUCull m_cpu_cull;

private:
// Minimum number of draws per command buffer when attempting to preempt a readback operation.
@@ -65,6 +65,97 @@ void VertexShaderManager::Dirty()
dirty = true;
}

Common::Matrix44 VertexShaderManager::LoadProjectionMatrix()
{
const auto& rawProjection = xfmem.projection.rawProjection;

switch (xfmem.projection.type)
{
case ProjectionType::Perspective:
{
const Common::Vec2 fov_multiplier = g_freelook_camera.IsActive() ?
g_freelook_camera.GetFieldOfViewMultiplier() :
Common::Vec2{1, 1};
m_projection_matrix[0] = rawProjection[0] * g_ActiveConfig.fAspectRatioHackW * fov_multiplier.x;
m_projection_matrix[1] = 0.0f;
m_projection_matrix[2] = rawProjection[1] * g_ActiveConfig.fAspectRatioHackW * fov_multiplier.x;
m_projection_matrix[3] = 0.0f;

m_projection_matrix[4] = 0.0f;
m_projection_matrix[5] = rawProjection[2] * g_ActiveConfig.fAspectRatioHackH * fov_multiplier.y;
m_projection_matrix[6] = rawProjection[3] * g_ActiveConfig.fAspectRatioHackH * fov_multiplier.y;
m_projection_matrix[7] = 0.0f;

m_projection_matrix[8] = 0.0f;
m_projection_matrix[9] = 0.0f;
m_projection_matrix[10] = rawProjection[4];
m_projection_matrix[11] = rawProjection[5];

m_projection_matrix[12] = 0.0f;
m_projection_matrix[13] = 0.0f;

m_projection_matrix[14] = -1.0f;
m_projection_matrix[15] = 0.0f;

g_stats.gproj = m_projection_matrix;
}
break;

case ProjectionType::Orthographic:
{
m_projection_matrix[0] = rawProjection[0];
m_projection_matrix[1] = 0.0f;
m_projection_matrix[2] = 0.0f;
m_projection_matrix[3] = rawProjection[1];

m_projection_matrix[4] = 0.0f;
m_projection_matrix[5] = rawProjection[2];
m_projection_matrix[6] = 0.0f;
m_projection_matrix[7] = rawProjection[3];

m_projection_matrix[8] = 0.0f;
m_projection_matrix[9] = 0.0f;
m_projection_matrix[10] = rawProjection[4];
m_projection_matrix[11] = rawProjection[5];

m_projection_matrix[12] = 0.0f;
m_projection_matrix[13] = 0.0f;

m_projection_matrix[14] = 0.0f;
m_projection_matrix[15] = 1.0f;

g_stats.g2proj = m_projection_matrix;
g_stats.proj = rawProjection;
}
break;

default:
ERROR_LOG_FMT(VIDEO, "Unknown projection type: {}", xfmem.projection.type);
}

PRIM_LOG("Projection: {} {} {} {} {} {}", rawProjection[0], rawProjection[1], rawProjection[2],
rawProjection[3], rawProjection[4], rawProjection[5]);

auto corrected_matrix = m_viewport_correction * Common::Matrix44::FromArray(m_projection_matrix);

if (g_freelook_camera.IsActive() && xfmem.projection.type == ProjectionType::Perspective)
corrected_matrix *= g_freelook_camera.GetView();

g_freelook_camera.GetController()->SetClean();

return corrected_matrix;
}

void VertexShaderManager::SetProjectionMatrix()
{
if (m_projection_changed || g_freelook_camera.GetController()->IsDirty())
{
m_projection_changed = false;
auto corrected_matrix = LoadProjectionMatrix();
memcpy(constants.projection.data(), corrected_matrix.data.data(), 4 * sizeof(float4));
}
}

// Syncs the shader constant buffers with xfmem
// TODO: A cleaner way to control the matrices without making a mess in the parameters field
void VertexShaderManager::SetConstants(const std::vector<std::string>& textures)
@@ -317,84 +408,7 @@ void VertexShaderManager::SetConstants(const std::vector<std::string>& textures)
m_projection_changed = false;
m_projection_graphics_mod_change = !projection_actions.empty();

const auto& rawProjection = xfmem.projection.rawProjection;

switch (xfmem.projection.type)
{
case ProjectionType::Perspective:
{
const Common::Vec2 fov_multiplier = g_freelook_camera.IsActive() ?
g_freelook_camera.GetFieldOfViewMultiplier() :
Common::Vec2{1, 1};
m_projection_matrix[0] =
rawProjection[0] * g_ActiveConfig.fAspectRatioHackW * fov_multiplier.x;
m_projection_matrix[1] = 0.0f;
m_projection_matrix[2] =
rawProjection[1] * g_ActiveConfig.fAspectRatioHackW * fov_multiplier.x;
m_projection_matrix[3] = 0.0f;

m_projection_matrix[4] = 0.0f;
m_projection_matrix[5] =
rawProjection[2] * g_ActiveConfig.fAspectRatioHackH * fov_multiplier.y;
m_projection_matrix[6] =
rawProjection[3] * g_ActiveConfig.fAspectRatioHackH * fov_multiplier.y;
m_projection_matrix[7] = 0.0f;

m_projection_matrix[8] = 0.0f;
m_projection_matrix[9] = 0.0f;
m_projection_matrix[10] = rawProjection[4];
m_projection_matrix[11] = rawProjection[5];

m_projection_matrix[12] = 0.0f;
m_projection_matrix[13] = 0.0f;

m_projection_matrix[14] = -1.0f;
m_projection_matrix[15] = 0.0f;

g_stats.gproj = m_projection_matrix;
}
break;

case ProjectionType::Orthographic:
{
m_projection_matrix[0] = rawProjection[0];
m_projection_matrix[1] = 0.0f;
m_projection_matrix[2] = 0.0f;
m_projection_matrix[3] = rawProjection[1];

m_projection_matrix[4] = 0.0f;
m_projection_matrix[5] = rawProjection[2];
m_projection_matrix[6] = 0.0f;
m_projection_matrix[7] = rawProjection[3];

m_projection_matrix[8] = 0.0f;
m_projection_matrix[9] = 0.0f;
m_projection_matrix[10] = rawProjection[4];
m_projection_matrix[11] = rawProjection[5];

m_projection_matrix[12] = 0.0f;
m_projection_matrix[13] = 0.0f;

m_projection_matrix[14] = 0.0f;
m_projection_matrix[15] = 1.0f;

g_stats.g2proj = m_projection_matrix;
g_stats.proj = rawProjection;
}
break;

default:
ERROR_LOG_FMT(VIDEO, "Unknown projection type: {}", xfmem.projection.type);
}

PRIM_LOG("Projection: {} {} {} {} {} {}", rawProjection[0], rawProjection[1], rawProjection[2],
rawProjection[3], rawProjection[4], rawProjection[5]);

auto corrected_matrix =
m_viewport_correction * Common::Matrix44::FromArray(m_projection_matrix);

if (g_freelook_camera.IsActive() && xfmem.projection.type == ProjectionType::Perspective)
corrected_matrix *= g_freelook_camera.GetView();
auto corrected_matrix = LoadProjectionMatrix();

GraphicsModActionData::Projection projection{&corrected_matrix};
for (auto action : projection_actions)
@@ -404,8 +418,6 @@ void VertexShaderManager::SetConstants(const std::vector<std::string>& textures)

memcpy(constants.projection.data(), corrected_matrix.data.data(), 4 * sizeof(float4));

g_freelook_camera.GetController()->SetClean();

dirty = true;
}

@@ -24,6 +24,7 @@ class alignas(16) VertexShaderManager
void DoState(PointerWrap& p);

// constant management
void SetProjectionMatrix();
void SetConstants(const std::vector<std::string>& textures);

void InvalidateXFRange(int start, int end);
@@ -64,4 +65,6 @@ class alignas(16) VertexShaderManager
std::array<int, 2> m_minmax_lights_changed{};

Common::Matrix44 m_viewport_correction{};

Common::Matrix44 LoadProjectionMatrix();
};
@@ -113,6 +113,7 @@ void VideoConfig::Refresh()
iShaderCompilationMode = Config::Get(Config::GFX_SHADER_COMPILATION_MODE);
iShaderCompilerThreads = Config::Get(Config::GFX_SHADER_COMPILER_THREADS);
iShaderPrecompilerThreads = Config::Get(Config::GFX_SHADER_PRECOMPILER_THREADS);
bCPUCull = Config::Get(Config::GFX_CPU_CULL);

texture_filtering_mode = Config::Get(Config::GFX_ENHANCE_FORCE_TEXTURE_FILTERING);
iMaxAnisotropy = Config::Get(Config::GFX_ENHANCE_MAX_ANISOTROPY);
@@ -138,6 +138,7 @@ struct VideoConfig final
bool bPerfQueriesEnable = false;
bool bBBoxEnable = false;
bool bForceProgressive = false;
bool bCPUCull = false;

bool bEFBEmulateFormatChanges = false;
bool bSkipEFBCopyToRam = false;
@@ -423,7 +423,7 @@ struct Projection
ProjectionType type;
};

struct XFMemory
struct alignas(16) XFMemory
{
float posMatrices[256]; // 0x0000 - 0x00ff
u32 unk0[768]; // 0x0100 - 0x03ff