111 changes: 0 additions & 111 deletions Source/Core/Core/FifoPlayer/FifoPlaybackAnalyzer.cpp

This file was deleted.

25 changes: 0 additions & 25 deletions Source/Core/Core/FifoPlayer/FifoPlaybackAnalyzer.h

This file was deleted.

215 changes: 164 additions & 51 deletions Source/Core/Core/FifoPlayer/FifoPlayer.cpp
Expand Up @@ -4,6 +4,7 @@
#include "Core/FifoPlayer/FifoPlayer.h"

#include <algorithm>
#include <cstring>
#include <mutex>

#include "Common/Assert.h"
Expand All @@ -12,7 +13,6 @@
#include "Core/ConfigManager.h"
#include "Core/Core.h"
#include "Core/CoreTiming.h"
#include "Core/FifoPlayer/FifoAnalyzer.h"
#include "Core/FifoPlayer/FifoDataFile.h"
#include "Core/HW/CPU.h"
#include "Core/HW/GPFifo.h"
Expand All @@ -31,6 +31,136 @@
// TODO: Move texMem somewhere else so this isn't an issue.
#include "VideoCommon/TextureDecoder.h"

namespace
{
class FifoPlaybackAnalyzer : public OpcodeDecoder::Callback
{
public:
static void AnalyzeFrames(FifoDataFile* file, std::vector<AnalyzedFrameInfo>& frame_info);

explicit FifoPlaybackAnalyzer(const u32* cpmem) : m_cpmem(cpmem) {}

OPCODE_CALLBACK(void OnXF(u16 address, u8 count, const u8* data)) {}
OPCODE_CALLBACK(void OnCP(u8 command, u32 value)) { GetCPState().LoadCPReg(command, value); }
OPCODE_CALLBACK(void OnBP(u8 command, u32 value));
OPCODE_CALLBACK(void OnIndexedLoad(CPArray array, u32 index, u16 address, u8 size)) {}
OPCODE_CALLBACK(void OnPrimitiveCommand(OpcodeDecoder::Primitive primitive, u8 vat,
u32 vertex_size, u16 num_vertices,
const u8* vertex_data));
OPCODE_CALLBACK(void OnDisplayList(u32 address, u32 size)) {}
OPCODE_CALLBACK(void OnNop(u32 count));
OPCODE_CALLBACK(void OnUnknown(u8 opcode, const u8* data)) {}

OPCODE_CALLBACK(void OnCommand(const u8* data, u32 size));

OPCODE_CALLBACK(CPState& GetCPState()) { return m_cpmem; }

bool m_start_of_primitives = false;
bool m_end_of_primitives = false;
bool m_efb_copy = false;
// Internal state, copied to above in OnCommand
bool m_was_primitive = false;
bool m_is_primitive = false;
bool m_is_copy = false;
bool m_is_nop = false;
CPState m_cpmem;
};

void FifoPlaybackAnalyzer::AnalyzeFrames(FifoDataFile* file,
std::vector<AnalyzedFrameInfo>& frame_info)
{
FifoPlaybackAnalyzer analyzer(file->GetCPMem());
frame_info.clear();
frame_info.resize(file->GetFrameCount());

for (u32 frame_no = 0; frame_no < file->GetFrameCount(); frame_no++)
{
const FifoFrameInfo& frame = file->GetFrame(frame_no);
AnalyzedFrameInfo& analyzed = frame_info[frame_no];

u32 offset = 0;

u32 part_start = 0;
CPState cpmem;

while (offset < frame.fifoData.size())
{
const u32 cmd_size = OpcodeDecoder::RunCommand(&frame.fifoData[offset],
u32(frame.fifoData.size()) - offset, analyzer);

if (analyzer.m_start_of_primitives)
{
// Start of primitive data for an object
analyzed.AddPart(FramePartType::Commands, part_start, offset, analyzer.m_cpmem);
part_start = offset;
// Copy cpmem now, because end_of_primitives isn't triggered until the first opcode after
// primitive data, and the first opcode might update cpmem
std::memcpy(&cpmem, &analyzer.m_cpmem, sizeof(CPState));
}
if (analyzer.m_end_of_primitives)
{
// End of primitive data for an object, and thus end of the object
analyzed.AddPart(FramePartType::PrimitiveData, part_start, offset, cpmem);
part_start = offset;
}

offset += cmd_size;

if (analyzer.m_efb_copy)
{
// We increase the offset beforehand, so that the trigger EFB copy command is included.
analyzed.AddPart(FramePartType::EFBCopy, part_start, offset, analyzer.m_cpmem);
part_start = offset;
}
}

// The frame should end with an EFB copy, so part_start should have been updated to the end.
ASSERT(part_start == frame.fifoData.size());
ASSERT(offset == frame.fifoData.size());
}
}

void FifoPlaybackAnalyzer::OnBP(u8 command, u32 value)
{
if (command == BPMEM_TRIGGER_EFB_COPY)
m_is_copy = true;
}

void FifoPlaybackAnalyzer::OnPrimitiveCommand(OpcodeDecoder::Primitive primitive, u8 vat,
u32 vertex_size, u16 num_vertices,
const u8* vertex_data)
{
m_is_primitive = true;
}

void FifoPlaybackAnalyzer::OnNop(u32 count)
{
m_is_nop = true;
}

void FifoPlaybackAnalyzer::OnCommand(const u8* data, u32 size)
{
m_start_of_primitives = false;
m_end_of_primitives = false;
m_efb_copy = false;

if (!m_is_nop)
{
if (m_is_primitive && !m_was_primitive)
m_start_of_primitives = true;
else if (m_was_primitive && !m_is_primitive)
m_end_of_primitives = true;
else if (m_is_copy)
m_efb_copy = true;

m_was_primitive = m_is_primitive;
}
m_is_primitive = false;
m_is_copy = false;
m_is_nop = false;
}
} // namespace

bool IsPlayingBackFifologWithBrokenEFBCopies = false;

FifoPlayer::FifoPlayer() : m_Loop{SConfig::GetInstance().bLoopFifoReplay}
Expand Down Expand Up @@ -191,7 +321,7 @@ u32 FifoPlayer::GetMaxObjectCount() const
u32 result = 0;
for (auto& frame : m_FrameInfo)
{
const u32 count = static_cast<u32>(frame.objectStarts.size());
const u32 count = frame.part_type_counts[FramePartType::PrimitiveData];
if (count > result)
result = count;
}
Expand All @@ -202,7 +332,7 @@ u32 FifoPlayer::GetFrameObjectCount(u32 frame) const
{
if (frame < m_FrameInfo.size())
{
return static_cast<u32>(m_FrameInfo[frame].objectStarts.size());
return m_FrameInfo[frame].part_type_counts[FramePartType::PrimitiveData];
}

return 0;
Expand Down Expand Up @@ -262,55 +392,35 @@ void FifoPlayer::WriteFrame(const FifoFrameInfo& frame, const AnalyzedFrameInfo&
m_ElapsedCycles = 0;
m_FrameFifoSize = static_cast<u32>(frame.fifoData.size());

// Determine start and end objects
u32 numObjects = (u32)(info.objectStarts.size());
u32 drawStart = std::min(numObjects, m_ObjectRangeStart);
u32 drawEnd = std::min(numObjects - 1, m_ObjectRangeEnd);
u32 memory_update = 0;
u32 object_num = 0;

u32 position = 0;
u32 memoryUpdate = 0;

// Skip memory updates during frame if true
// Skip all memory updates if early memory updates are enabled, as we already wrote them
if (m_EarlyMemoryUpdates)
{
memoryUpdate = (u32)(frame.memoryUpdates.size());
memory_update = (u32)(frame.memoryUpdates.size());
}

if (numObjects > 0)
for (const FramePart& part : info.parts)
{
u32 objectNum = 0;
bool show_part;

// Write fifo data skipping objects before the draw range
while (objectNum < drawStart)
if (part.m_type == FramePartType::PrimitiveData)
{
WriteFramePart(position, info.objectStarts[objectNum], memoryUpdate, frame, info);

position = info.objectEnds[objectNum];
++objectNum;
show_part = m_ObjectRangeStart <= object_num && object_num <= m_ObjectRangeEnd;
object_num++;
}

// Write objects in draw range
if (objectNum < numObjects && drawStart <= drawEnd)
else
{
objectNum = drawEnd;
WriteFramePart(position, info.objectEnds[objectNum], memoryUpdate, frame, info);
position = info.objectEnds[objectNum];
++objectNum;
// We always include commands and EFB copies, as commands from earlier objects still apply to
// later ones (games generally do not reconfigure everything for each object)
show_part = true;
}

// Write fifo data skipping objects after the draw range
while (objectNum < numObjects)
{
WriteFramePart(position, info.objectStarts[objectNum], memoryUpdate, frame, info);

position = info.objectEnds[objectNum];
++objectNum;
}
if (show_part)
WriteFramePart(part, &memory_update, frame);
}

// Write data after the last object
WriteFramePart(position, static_cast<u32>(frame.fifoData.size()), memoryUpdate, frame, info);

FlushWGP();

// Sleep while the GPU is active
Expand All @@ -321,36 +431,39 @@ void FifoPlayer::WriteFrame(const FifoFrameInfo& frame, const AnalyzedFrameInfo&
}
}

void FifoPlayer::WriteFramePart(u32 dataStart, u32 dataEnd, u32& nextMemUpdate,
const FifoFrameInfo& frame, const AnalyzedFrameInfo& info)
void FifoPlayer::WriteFramePart(const FramePart& part, u32* next_mem_update,
const FifoFrameInfo& frame)
{
const u8* const data = frame.fifoData.data();

while (nextMemUpdate < frame.memoryUpdates.size() && dataStart < dataEnd)
u32 data_start = part.m_start;
const u32 data_end = part.m_end;

while (*next_mem_update < frame.memoryUpdates.size() && data_start < data_end)
{
const MemoryUpdate& memUpdate = info.memoryUpdates[nextMemUpdate];
const MemoryUpdate& memUpdate = frame.memoryUpdates[*next_mem_update];

if (memUpdate.fifoPosition < dataEnd)
if (memUpdate.fifoPosition < data_end)
{
if (dataStart < memUpdate.fifoPosition)
if (data_start < memUpdate.fifoPosition)
{
WriteFifo(data, dataStart, memUpdate.fifoPosition);
dataStart = memUpdate.fifoPosition;
WriteFifo(data, data_start, memUpdate.fifoPosition);
data_start = memUpdate.fifoPosition;
}

WriteMemory(memUpdate);

++nextMemUpdate;
++*next_mem_update;
}
else
{
WriteFifo(data, dataStart, dataEnd);
dataStart = dataEnd;
WriteFifo(data, data_start, data_end);
data_start = data_end;
}
}

if (dataStart < dataEnd)
WriteFifo(data, dataStart, dataEnd);
if (data_start < data_end)
WriteFifo(data, data_start, data_end);
}

void FifoPlayer::WriteAllMemoryUpdates()
Expand Down
50 changes: 40 additions & 10 deletions Source/Core/Core/FifoPlayer/FifoPlayer.h
Expand Up @@ -5,16 +5,18 @@

#include <functional>
#include <memory>
#include <set>
#include <string>
#include <vector>

#include "Common/Assert.h"
#include "Core/FifoPlayer/FifoDataFile.h"
#include "Core/FifoPlayer/FifoPlaybackAnalyzer.h"
#include "Core/PowerPC/CPUCoreBase.h"
#include "VideoCommon/CPMemory.h"
#include "VideoCommon/OpcodeDecoding.h"

class FifoDataFile;
struct MemoryUpdate;
struct AnalyzedFrameInfo;

namespace CPU
{
Expand Down Expand Up @@ -43,16 +45,46 @@ enum class State;
// 8. The output of fifoplayer would be wrong.

// To keep compatibility with old fifologs, we have this flag which signals texture cache to not
// bother
// hashing the memory and just assume the hash matched.
// bother hashing the memory and just assume the hash matched.
// At a later point proper efb copy support should be added to fiforecorder and this flag will
// change
// based on the version of the .dff file, but until then it will always be true when a fifolog is
// playing.
// change based on the version of the .dff file, but until then it will always be true when a
// fifolog is playing.

// Shitty global to fix a shitty problem
extern bool IsPlayingBackFifologWithBrokenEFBCopies;

enum class FramePartType
{
Commands,
PrimitiveData,
EFBCopy,
};

struct FramePart
{
constexpr FramePart(FramePartType type, u32 start, u32 end, const CPState& cpmem)
: m_type(type), m_start(start), m_end(end), m_cpmem(cpmem)
{
}

const FramePartType m_type;
const u32 m_start;
const u32 m_end;
const CPState m_cpmem;
};

struct AnalyzedFrameInfo
{
std::vector<FramePart> parts;
Common::EnumMap<u32, FramePartType::EFBCopy> part_type_counts;

void AddPart(FramePartType type, u32 start, u32 end, const CPState& cpmem)
{
parts.emplace_back(type, start, end, cpmem);
part_type_counts[type]++;
}
};

class FifoPlayer
{
public:
Expand Down Expand Up @@ -102,14 +134,12 @@ class FifoPlayer

private:
class CPUCore;

FifoPlayer();

CPU::State AdvanceFrame();

void WriteFrame(const FifoFrameInfo& frame, const AnalyzedFrameInfo& info);
void WriteFramePart(u32 dataStart, u32 dataEnd, u32& nextMemUpdate, const FifoFrameInfo& frame,
const AnalyzedFrameInfo& info);
void WriteFramePart(const FramePart& part, u32* next_mem_update, const FifoFrameInfo& frame);

void WriteAllMemoryUpdates();
void WriteMemory(const MemoryUpdate& memUpdate);
Expand Down
103 changes: 0 additions & 103 deletions Source/Core/Core/FifoPlayer/FifoRecordAnalyzer.cpp

This file was deleted.

15 changes: 0 additions & 15 deletions Source/Core/Core/FifoPlayer/FifoRecordAnalyzer.h

This file was deleted.

163 changes: 159 additions & 4 deletions Source/Core/Core/FifoPlayer/FifoRecorder.cpp
Expand Up @@ -6,13 +6,168 @@
#include <algorithm>
#include <cstring>

#include "Common/Logging/Log.h"
#include "Common/MsgHandler.h"
#include "Common/Thread.h"

#include "Core/ConfigManager.h"
#include "Core/FifoPlayer/FifoAnalyzer.h"
#include "Core/FifoPlayer/FifoRecordAnalyzer.h"
#include "Core/HW/Memmap.h"

#include "VideoCommon/OpcodeDecoding.h"
#include "VideoCommon/XFStructs.h"

class FifoRecorder::FifoRecordAnalyzer : public OpcodeDecoder::Callback
{
public:
explicit FifoRecordAnalyzer(FifoRecorder* owner) : m_owner(owner) {}
explicit FifoRecordAnalyzer(FifoRecorder* owner, const u32* cpmem)
: m_owner(owner), m_cpmem(cpmem)
{
}

OPCODE_CALLBACK(void OnXF(u16 address, u8 count, const u8* data)) {}
OPCODE_CALLBACK(void OnCP(u8 command, u32 value)) { GetCPState().LoadCPReg(command, value); }
OPCODE_CALLBACK(void OnBP(u8 command, u32 value)) {}
OPCODE_CALLBACK(void OnIndexedLoad(CPArray array, u32 index, u16 address, u8 size));
OPCODE_CALLBACK(void OnPrimitiveCommand(OpcodeDecoder::Primitive primitive, u8 vat,
u32 vertex_size, u16 num_vertices,
const u8* vertex_data));
OPCODE_CALLBACK(void OnDisplayList(u32 address, u32 size))
{
WARN_LOG_FMT(VIDEO,
"Unhandled display list call {:08x} {:08x}; should have been inlined earlier",
address, size);
}
OPCODE_CALLBACK(void OnNop(u32 count)) {}
OPCODE_CALLBACK(void OnUnknown(u8 opcode, const u8* data)) {}

OPCODE_CALLBACK(void OnCommand(const u8* data, u32 size)) {}

OPCODE_CALLBACK(CPState& GetCPState()) { return m_cpmem; }

private:
void ProcessVertexComponent(CPArray array_index, VertexComponentFormat array_type,
u32 component_offset, u32 vertex_size, u16 num_vertices,
const u8* vertex_data);

FifoRecorder* const m_owner;
CPState m_cpmem;
};

void FifoRecorder::FifoRecordAnalyzer::OnIndexedLoad(CPArray array, u32 index, u16 address, u8 size)
{
const u32 load_address = m_cpmem.array_bases[array] + m_cpmem.array_strides[array] * index;

m_owner->UseMemory(load_address, size * sizeof(u32), MemoryUpdate::XF_DATA);
}

// TODO: The following code is copied with modifications from VertexLoaderBase.
// Surely there's a better solution?
#include "VideoCommon/VertexLoader_Color.h"
#include "VideoCommon/VertexLoader_Normal.h"
#include "VideoCommon/VertexLoader_Position.h"
#include "VideoCommon/VertexLoader_TextCoord.h"

void FifoRecorder::FifoRecordAnalyzer::OnPrimitiveCommand(OpcodeDecoder::Primitive primitive,
u8 vat, u32 vertex_size, u16 num_vertices,
const u8* vertex_data)
{
const auto& vtx_desc = m_cpmem.vtx_desc;
const auto& vtx_attr = m_cpmem.vtx_attr[vat];

u32 offset = 0;

if (vtx_desc.low.PosMatIdx)
offset++;
for (auto texmtxidx : vtx_desc.low.TexMatIdx)
{
if (texmtxidx)
offset++;
}
const u32 pos_size = VertexLoader_Position::GetSize(vtx_desc.low.Position, vtx_attr.g0.PosFormat,
vtx_attr.g0.PosElements);
ProcessVertexComponent(CPArray::Position, vtx_desc.low.Position, offset, vertex_size,
num_vertices, vertex_data);
offset += pos_size;

const u32 norm_size =
VertexLoader_Normal::GetSize(vtx_desc.low.Normal, vtx_attr.g0.NormalFormat,
vtx_attr.g0.NormalElements, vtx_attr.g0.NormalIndex3);
ProcessVertexComponent(CPArray::Normal, vtx_desc.low.Position, offset, vertex_size, num_vertices,
vertex_data);
offset += norm_size;

for (u32 i = 0; i < vtx_desc.low.Color.Size(); i++)
{
const u32 color_size =
VertexLoader_Color::GetSize(vtx_desc.low.Color[i], vtx_attr.GetColorFormat(i));
ProcessVertexComponent(CPArray::Color0 + i, vtx_desc.low.Position, offset, vertex_size,
num_vertices, vertex_data);
offset += color_size;
}
for (u32 i = 0; i < vtx_desc.high.TexCoord.Size(); i++)
{
const u32 tc_size = VertexLoader_TextCoord::GetSize(
vtx_desc.high.TexCoord[i], vtx_attr.GetTexFormat(i), vtx_attr.GetTexElements(i));
ProcessVertexComponent(CPArray::TexCoord0 + i, vtx_desc.low.Position, offset, vertex_size,
num_vertices, vertex_data);
offset += tc_size;
}

ASSERT(offset == vertex_size);
}

// If a component is indexed, the array it indexes into for data must be saved.
void FifoRecorder::FifoRecordAnalyzer::ProcessVertexComponent(CPArray array_index,
VertexComponentFormat array_type,
u32 component_offset, u32 vertex_size,
u16 num_vertices,
const u8* vertex_data)
{
// Skip if not indexed array
if (!IsIndexed(array_type))
return;

u16 max_index = 0;

// Determine min and max indices
if (array_type == VertexComponentFormat::Index8)
{
for (u16 vertex_num = 0; vertex_num < num_vertices; vertex_num++)
{
const u8 index = vertex_data[component_offset];
vertex_data += vertex_size;

// 0xff skips the vertex
if (index != 0xff)
{
if (index > max_index)
max_index = index;
}
}
}
else
{
for (u16 vertex_num = 0; vertex_num < num_vertices; vertex_num++)
{
const u16 index = Common::swap16(&vertex_data[component_offset]);
vertex_data += vertex_size;

// 0xffff skips the vertex
if (index != 0xffff)
{
if (index > max_index)
max_index = index;
}
}
}

const u32 array_start = m_cpmem.array_bases[array_index];
const u32 array_size = m_cpmem.array_strides[array_index] * (max_index + 1);

m_owner->UseMemory(array_start, array_size, MemoryUpdate::VERTEX_STREAM);
}

static FifoRecorder instance;

FifoRecorder::FifoRecorder() = default;
Expand Down Expand Up @@ -76,7 +231,7 @@ void FifoRecorder::WriteGPCommand(const u8* data, u32 size)
{
// Assumes data contains all information for the command
// Calls FifoRecorder::UseMemory
const u32 analyzed_size = FifoAnalyzer::AnalyzeCommand(data, FifoAnalyzer::DecodeMode::Record);
const u32 analyzed_size = OpcodeDecoder::RunCommand(data, size, *m_record_analyzer);

// Make sure FifoPlayer's command analyzer agrees about the size of the command.
if (analyzed_size != size)
Expand Down Expand Up @@ -211,7 +366,7 @@ void FifoRecorder::SetVideoMemory(const u32* bpMem, const u32* cpMem, const u32*
memcpy(m_File->GetTexMem(), texMem, FifoDataFile::TEX_MEM_SIZE);
}

FifoRecordAnalyzer::Initialize(cpMem);
m_record_analyzer = std::make_unique<FifoRecordAnalyzer>(this, cpMem);
}

bool FifoRecorder::IsRecording() const
Expand Down
4 changes: 4 additions & 0 deletions Source/Core/Core/FifoPlayer/FifoRecorder.h
Expand Up @@ -8,6 +8,7 @@
#include <mutex>
#include <vector>

#include "Common/Assert.h"
#include "Core/FifoPlayer/FifoDataFile.h"

class FifoRecorder
Expand Down Expand Up @@ -47,6 +48,8 @@ class FifoRecorder
static FifoRecorder& GetInstance();

private:
class FifoRecordAnalyzer;

// Accessed from both GUI and video threads

std::recursive_mutex m_mutex;
Expand All @@ -65,6 +68,7 @@ class FifoRecorder
bool m_SkipFutureData = true;
bool m_FrameEnded = false;
FifoFrameInfo m_CurrentFrame;
std::unique_ptr<FifoRecordAnalyzer> m_record_analyzer;
std::vector<u8> m_FifoData;
std::vector<u8> m_Ram;
std::vector<u8> m_ExRam;
Expand Down
6 changes: 0 additions & 6 deletions Source/Core/DolphinLib.props
Expand Up @@ -217,11 +217,8 @@
<ClInclude Include="Core\DSP\Jit\x64\DSPJitTables.h" />
<ClInclude Include="Core\DSP\LabelMap.h" />
<ClInclude Include="Core\DSPEmulator.h" />
<ClInclude Include="Core\FifoPlayer\FifoAnalyzer.h" />
<ClInclude Include="Core\FifoPlayer\FifoDataFile.h" />
<ClInclude Include="Core\FifoPlayer\FifoPlaybackAnalyzer.h" />
<ClInclude Include="Core\FifoPlayer\FifoPlayer.h" />
<ClInclude Include="Core\FifoPlayer\FifoRecordAnalyzer.h" />
<ClInclude Include="Core\FifoPlayer\FifoRecorder.h" />
<ClInclude Include="Core\FreeLookConfig.h" />
<ClInclude Include="Core\FreeLookManager.h" />
Expand Down Expand Up @@ -815,11 +812,8 @@
<ClCompile Include="Core\DSP\Jit\x64\DSPJitUtil.cpp" />
<ClCompile Include="Core\DSP\LabelMap.cpp" />
<ClCompile Include="Core\DSPEmulator.cpp" />
<ClCompile Include="Core\FifoPlayer\FifoAnalyzer.cpp" />
<ClCompile Include="Core\FifoPlayer\FifoDataFile.cpp" />
<ClCompile Include="Core\FifoPlayer\FifoPlaybackAnalyzer.cpp" />
<ClCompile Include="Core\FifoPlayer\FifoPlayer.cpp" />
<ClCompile Include="Core\FifoPlayer\FifoRecordAnalyzer.cpp" />
<ClCompile Include="Core\FifoPlayer\FifoRecorder.cpp" />
<ClCompile Include="Core\FreeLookConfig.cpp" />
<ClCompile Include="Core\FreeLookManager.cpp" />
Expand Down
685 changes: 379 additions & 306 deletions Source/Core/DolphinQt/FIFO/FIFOAnalyzer.cpp

Large diffs are not rendered by default.

10 changes: 7 additions & 3 deletions Source/Core/DolphinQt/FIFO/FIFOAnalyzer.h
Expand Up @@ -58,15 +58,19 @@ class FIFOAnalyzer final : public QWidget

struct SearchResult
{
constexpr SearchResult(u32 frame, u32 object, u32 cmd)
: m_frame(frame), m_object(object), m_cmd(cmd)
constexpr SearchResult(u32 frame, u32 object_idx, u32 cmd)
: m_frame(frame), m_object_idx(object_idx), m_cmd(cmd)
{
}
const u32 m_frame;
const u32 m_object;
// Index in tree view. Does not correspond with object numbers or part numbers.
const u32 m_object_idx;
const u32 m_cmd;
};

// Offsets from the start of the first part in an object for each command within the currently
// selected object.
std::vector<int> m_object_data_offsets;

std::vector<SearchResult> m_search_results;
};
15 changes: 8 additions & 7 deletions Source/Core/DolphinQt/FIFO/FIFOPlayerWindow.cpp
Expand Up @@ -21,7 +21,6 @@

#include "Core/Core.h"
#include "Core/FifoPlayer/FifoDataFile.h"
#include "Core/FifoPlayer/FifoPlaybackAnalyzer.h"
#include "Core/FifoPlayer/FifoPlayer.h"
#include "Core/FifoPlayer/FifoRecorder.h"

Expand Down Expand Up @@ -151,18 +150,18 @@ void FIFOPlayerWindow::CreateWidgets()
layout->addWidget(recording_group);
layout->addWidget(m_button_box);

QWidget* main_widget = new QWidget(this);
main_widget->setLayout(layout);
m_main_widget = new QWidget(this);
m_main_widget->setLayout(layout);

auto* tab_widget = new QTabWidget(this);
m_tab_widget = new QTabWidget(this);

m_analyzer = new FIFOAnalyzer;

tab_widget->addTab(main_widget, tr("Play / Record"));
tab_widget->addTab(m_analyzer, tr("Analyze"));
m_tab_widget->addTab(m_main_widget, tr("Play / Record"));
m_tab_widget->addTab(m_analyzer, tr("Analyze"));

auto* tab_layout = new QVBoxLayout;
tab_layout->addWidget(tab_widget);
tab_layout->addWidget(m_tab_widget);

setLayout(tab_layout);
}
Expand Down Expand Up @@ -251,6 +250,8 @@ void FIFOPlayerWindow::OnEmulationStopped()
StopRecording();

UpdateControls();
// When emulation stops, switch away from the analyzer tab, as it no longer shows anything useful
m_tab_widget->setCurrentWidget(m_main_widget);
m_analyzer->Update();
}

Expand Down
4 changes: 4 additions & 0 deletions Source/Core/DolphinQt/FIFO/FIFOPlayerWindow.h
Expand Up @@ -12,6 +12,7 @@ class QDialogButtonBox;
class QLabel;
class QPushButton;
class QSpinBox;
class QTabWidget;
class FIFOAnalyzer;

class FIFOPlayerWindow : public QWidget
Expand Down Expand Up @@ -64,6 +65,9 @@ class FIFOPlayerWindow : public QWidget
QCheckBox* m_early_memory_updates;
QDialogButtonBox* m_button_box;

QWidget* m_main_widget;
QTabWidget* m_tab_widget;

FIFOAnalyzer* m_analyzer;
Core::State m_emu_state = Core::State::Uninitialized;
};
1 change: 0 additions & 1 deletion Source/Core/DolphinTool/CMakeLists.txt
Expand Up @@ -12,7 +12,6 @@ set_target_properties(dolphin-tool PROPERTIES OUTPUT_NAME dolphin-tool)

target_link_libraries(dolphin-tool
PRIVATE
core
discio
videocommon
cpp-optparse
Expand Down
118 changes: 70 additions & 48 deletions Source/Core/VideoBackends/D3D/D3DNativeVertexFormat.cpp
Expand Up @@ -3,6 +3,8 @@

#include <array>

#include "Common/EnumMap.h"

#include "VideoBackends/D3D/D3DBase.h"
#include "VideoBackends/D3D/D3DRender.h"
#include "VideoBackends/D3D/D3DState.h"
Expand All @@ -20,55 +22,75 @@ Renderer::CreateNativeVertexFormat(const PortableVertexDeclaration& vtx_decl)
return std::make_unique<D3DVertexFormat>(vtx_decl);
}

static const DXGI_FORMAT d3d_format_lookup[5 * 4 * 2] = {
// float formats
DXGI_FORMAT_R8_UNORM,
DXGI_FORMAT_R8_SNORM,
DXGI_FORMAT_R16_UNORM,
DXGI_FORMAT_R16_SNORM,
DXGI_FORMAT_R32_FLOAT,
DXGI_FORMAT_R8G8_UNORM,
DXGI_FORMAT_R8G8_SNORM,
DXGI_FORMAT_R16G16_UNORM,
DXGI_FORMAT_R16G16_SNORM,
DXGI_FORMAT_R32G32_FLOAT,
DXGI_FORMAT_UNKNOWN,
DXGI_FORMAT_UNKNOWN,
DXGI_FORMAT_UNKNOWN,
DXGI_FORMAT_UNKNOWN,
DXGI_FORMAT_R32G32B32_FLOAT,
DXGI_FORMAT_R8G8B8A8_UNORM,
DXGI_FORMAT_R8G8B8A8_SNORM,
DXGI_FORMAT_R16G16B16A16_UNORM,
DXGI_FORMAT_R16G16B16A16_SNORM,
DXGI_FORMAT_R32G32B32A32_FLOAT,

// integer formats
DXGI_FORMAT_R8_UINT,
DXGI_FORMAT_R8_SINT,
DXGI_FORMAT_R16_UINT,
DXGI_FORMAT_R16_SINT,
DXGI_FORMAT_UNKNOWN,
DXGI_FORMAT_R8G8_UINT,
DXGI_FORMAT_R8G8_SINT,
DXGI_FORMAT_R16G16_UINT,
DXGI_FORMAT_R16G16_SINT,
DXGI_FORMAT_UNKNOWN,
DXGI_FORMAT_UNKNOWN,
DXGI_FORMAT_UNKNOWN,
DXGI_FORMAT_UNKNOWN,
DXGI_FORMAT_UNKNOWN,
DXGI_FORMAT_UNKNOWN,
DXGI_FORMAT_R8G8B8A8_UINT,
DXGI_FORMAT_R8G8B8A8_SINT,
DXGI_FORMAT_R16G16B16A16_UINT,
DXGI_FORMAT_R16G16B16A16_SINT,
DXGI_FORMAT_UNKNOWN,
};

DXGI_FORMAT VarToD3D(VarType t, int size, bool integer)
DXGI_FORMAT VarToD3D(ComponentFormat t, int size, bool integer)
{
DXGI_FORMAT retval = d3d_format_lookup[(int)t + 5 * (size - 1) + 5 * 4 * (int)integer];
using FormatMap = Common::EnumMap<DXGI_FORMAT, ComponentFormat::Float>;
static constexpr auto f = [](FormatMap a) { return a; }; // Deduction helper

static constexpr std::array<FormatMap, 4> d3d_float_format_lookup = {
f({
DXGI_FORMAT_R8_UNORM,
DXGI_FORMAT_R8_SNORM,
DXGI_FORMAT_R16_UNORM,
DXGI_FORMAT_R16_SNORM,
DXGI_FORMAT_R32_FLOAT,
}),
f({
DXGI_FORMAT_R8G8_UNORM,
DXGI_FORMAT_R8G8_SNORM,
DXGI_FORMAT_R16G16_UNORM,
DXGI_FORMAT_R16G16_SNORM,
DXGI_FORMAT_R32G32_FLOAT,
}),
f({
DXGI_FORMAT_UNKNOWN,
DXGI_FORMAT_UNKNOWN,
DXGI_FORMAT_UNKNOWN,
DXGI_FORMAT_UNKNOWN,
DXGI_FORMAT_R32G32B32_FLOAT,
}),
f({
DXGI_FORMAT_R8G8B8A8_UNORM,
DXGI_FORMAT_R8G8B8A8_SNORM,
DXGI_FORMAT_R16G16B16A16_UNORM,
DXGI_FORMAT_R16G16B16A16_SNORM,
DXGI_FORMAT_R32G32B32A32_FLOAT,
}),
};

static constexpr std::array<FormatMap, 4> d3d_integer_format_lookup = {
f({
DXGI_FORMAT_R8_UINT,
DXGI_FORMAT_R8_SINT,
DXGI_FORMAT_R16_UINT,
DXGI_FORMAT_R16_SINT,
DXGI_FORMAT_UNKNOWN,
}),
f({
DXGI_FORMAT_R8G8_UINT,
DXGI_FORMAT_R8G8_SINT,
DXGI_FORMAT_R16G16_UINT,
DXGI_FORMAT_R16G16_SINT,
DXGI_FORMAT_UNKNOWN,
}),
f({
DXGI_FORMAT_UNKNOWN,
DXGI_FORMAT_UNKNOWN,
DXGI_FORMAT_UNKNOWN,
DXGI_FORMAT_UNKNOWN,
DXGI_FORMAT_UNKNOWN,
}),
f({
DXGI_FORMAT_R8G8B8A8_UINT,
DXGI_FORMAT_R8G8B8A8_SINT,
DXGI_FORMAT_R16G16B16A16_UINT,
DXGI_FORMAT_R16G16B16A16_SINT,
DXGI_FORMAT_UNKNOWN,
}),
};

DXGI_FORMAT retval =
integer ? d3d_integer_format_lookup[size - 1][t] : d3d_float_format_lookup[size - 1][t];
if (retval == DXGI_FORMAT_UNKNOWN)
{
PanicAlertFmt("VarToD3D: Invalid type/size combo {}, {}, {}", t, size, integer);
Expand Down
50 changes: 27 additions & 23 deletions Source/Core/VideoBackends/D3D12/DX12VertexFormat.cpp
Expand Up @@ -4,39 +4,43 @@
#include "VideoBackends/D3D12/DX12VertexFormat.h"

#include "Common/Assert.h"
#include "Common/EnumMap.h"

#include "VideoCommon/VertexLoaderManager.h"
#include "VideoCommon/VertexShaderGen.h"

namespace DX12
{
static DXGI_FORMAT VarToDXGIFormat(VarType t, u32 components, bool integer)
static DXGI_FORMAT VarToDXGIFormat(ComponentFormat t, u32 components, bool integer)
{
using ComponentArray = std::array<DXGI_FORMAT, 4>;
static constexpr auto f = [](ComponentArray a) { return a; }; // Deduction helper

// NOTE: 3-component formats are not valid.
static const DXGI_FORMAT float_type_lookup[][4] = {
{DXGI_FORMAT_R8_UNORM, DXGI_FORMAT_R8G8_UNORM, DXGI_FORMAT_R8G8B8A8_UNORM,
DXGI_FORMAT_R8G8B8A8_UNORM}, // VAR_UNSIGNED_BYTE
{DXGI_FORMAT_R8_SNORM, DXGI_FORMAT_R8G8_SNORM, DXGI_FORMAT_R8G8B8A8_SNORM,
DXGI_FORMAT_R8G8B8A8_SNORM}, // VAR_BYTE
{DXGI_FORMAT_R16_UNORM, DXGI_FORMAT_R16G16_UNORM, DXGI_FORMAT_R16G16B16A16_UNORM,
DXGI_FORMAT_R16G16B16A16_UNORM}, // VAR_UNSIGNED_SHORT
{DXGI_FORMAT_R16_SNORM, DXGI_FORMAT_R16G16_SNORM, DXGI_FORMAT_R16G16B16A16_SNORM,
DXGI_FORMAT_R16G16B16A16_SNORM}, // VAR_SHORT
{DXGI_FORMAT_R32_FLOAT, DXGI_FORMAT_R32G32_FLOAT, DXGI_FORMAT_R32G32B32_FLOAT,
DXGI_FORMAT_R32G32B32A32_FLOAT} // VAR_FLOAT
static constexpr Common::EnumMap<ComponentArray, ComponentFormat::Float> float_type_lookup = {
f({DXGI_FORMAT_R8_UNORM, DXGI_FORMAT_R8G8_UNORM, DXGI_FORMAT_R8G8B8A8_UNORM,
DXGI_FORMAT_R8G8B8A8_UNORM}), // UByte
f({DXGI_FORMAT_R8_SNORM, DXGI_FORMAT_R8G8_SNORM, DXGI_FORMAT_R8G8B8A8_SNORM,
DXGI_FORMAT_R8G8B8A8_SNORM}), // Byte
f({DXGI_FORMAT_R16_UNORM, DXGI_FORMAT_R16G16_UNORM, DXGI_FORMAT_R16G16B16A16_UNORM,
DXGI_FORMAT_R16G16B16A16_UNORM}), // UShort
f({DXGI_FORMAT_R16_SNORM, DXGI_FORMAT_R16G16_SNORM, DXGI_FORMAT_R16G16B16A16_SNORM,
DXGI_FORMAT_R16G16B16A16_SNORM}), // Short
f({DXGI_FORMAT_R32_FLOAT, DXGI_FORMAT_R32G32_FLOAT, DXGI_FORMAT_R32G32B32_FLOAT,
DXGI_FORMAT_R32G32B32A32_FLOAT}), // Float
};

static const DXGI_FORMAT integer_type_lookup[][4] = {
{DXGI_FORMAT_R8_UINT, DXGI_FORMAT_R8G8_UINT, DXGI_FORMAT_R8G8B8A8_UINT,
DXGI_FORMAT_R8G8B8A8_UINT}, // VAR_UNSIGNED_BYTE
{DXGI_FORMAT_R8_SINT, DXGI_FORMAT_R8G8_SINT, DXGI_FORMAT_R8G8B8A8_SINT,
DXGI_FORMAT_R8G8B8A8_SINT}, // VAR_BYTE
{DXGI_FORMAT_R16_UINT, DXGI_FORMAT_R16G16_UINT, DXGI_FORMAT_R16G16B16A16_UINT,
DXGI_FORMAT_R16G16B16A16_UINT}, // VAR_UNSIGNED_SHORT
{DXGI_FORMAT_R16_SINT, DXGI_FORMAT_R16G16_SINT, DXGI_FORMAT_R16G16B16A16_SINT,
DXGI_FORMAT_R16G16B16A16_SINT}, // VAR_SHORT
{DXGI_FORMAT_R32_FLOAT, DXGI_FORMAT_R32G32_FLOAT, DXGI_FORMAT_R32G32B32_FLOAT,
DXGI_FORMAT_R32G32B32A32_FLOAT} // VAR_FLOAT
static constexpr Common::EnumMap<ComponentArray, ComponentFormat::Float> integer_type_lookup = {
f({DXGI_FORMAT_R8_UINT, DXGI_FORMAT_R8G8_UINT, DXGI_FORMAT_R8G8B8A8_UINT,
DXGI_FORMAT_R8G8B8A8_UINT}), // UByte
f({DXGI_FORMAT_R8_SINT, DXGI_FORMAT_R8G8_SINT, DXGI_FORMAT_R8G8B8A8_SINT,
DXGI_FORMAT_R8G8B8A8_SINT}), // Byte
f({DXGI_FORMAT_R16_UINT, DXGI_FORMAT_R16G16_UINT, DXGI_FORMAT_R16G16B16A16_UINT,
DXGI_FORMAT_R16G16B16A16_UINT}), // UShort
f({DXGI_FORMAT_R16_SINT, DXGI_FORMAT_R16G16_SINT, DXGI_FORMAT_R16G16B16A16_SINT,
DXGI_FORMAT_R16G16B16A16_SINT}), // Short
f({DXGI_FORMAT_R32_FLOAT, DXGI_FORMAT_R32G32_FLOAT, DXGI_FORMAT_R32G32B32_FLOAT,
DXGI_FORMAT_R32G32B32A32_FLOAT}), // Float
};

ASSERT(components > 0 && components <= 4);
Expand Down
8 changes: 5 additions & 3 deletions Source/Core/VideoBackends/OGL/OGLNativeVertexFormat.cpp
Expand Up @@ -2,6 +2,7 @@
// SPDX-License-Identifier: GPL-2.0-or-later

#include "Common/CommonTypes.h"
#include "Common/EnumMap.h"
#include "Common/GL/GLUtil.h"
#include "Common/MsgHandler.h"

Expand All @@ -23,10 +24,11 @@ Renderer::CreateNativeVertexFormat(const PortableVertexDeclaration& vtx_decl)
return std::make_unique<GLVertexFormat>(vtx_decl);
}

static inline GLuint VarToGL(VarType t)
static inline GLuint VarToGL(ComponentFormat t)
{
static const GLuint lookup[5] = {GL_UNSIGNED_BYTE, GL_BYTE, GL_UNSIGNED_SHORT, GL_SHORT,
GL_FLOAT};
static constexpr Common::EnumMap<GLuint, ComponentFormat::Float> lookup = {
GL_UNSIGNED_BYTE, GL_BYTE, GL_UNSIGNED_SHORT, GL_SHORT, GL_FLOAT,
};
return lookup[t];
}

Expand Down
31 changes: 16 additions & 15 deletions Source/Core/VideoBackends/Software/SWVertexLoader.cpp
Expand Up @@ -36,28 +36,29 @@ void SWVertexLoader::DrawCurrentBatch(u32 base_index, u32 num_indices, u32 base_
{
DebugUtil::OnObjectBegin();

u8 primitiveType = 0;
using OpcodeDecoder::Primitive;
Primitive primitive_type = Primitive::GX_DRAW_QUADS;
switch (m_current_primitive_type)
{
case PrimitiveType::Points:
primitiveType = OpcodeDecoder::GX_DRAW_POINTS;
primitive_type = Primitive::GX_DRAW_POINTS;
break;
case PrimitiveType::Lines:
primitiveType = OpcodeDecoder::GX_DRAW_LINES;
primitive_type = Primitive::GX_DRAW_LINES;
break;
case PrimitiveType::Triangles:
primitiveType = OpcodeDecoder::GX_DRAW_TRIANGLES;
primitive_type = Primitive::GX_DRAW_TRIANGLES;
break;
case PrimitiveType::TriangleStrip:
primitiveType = OpcodeDecoder::GX_DRAW_TRIANGLE_STRIP;
primitive_type = Primitive::GX_DRAW_TRIANGLE_STRIP;
break;
}

// Flush bounding box here because software overrides the base function
if (g_renderer->IsBBoxEnabled())
g_renderer->BBoxFlush();

m_setup_unit.Init(primitiveType);
m_setup_unit.Init(primitive_type);

// set all states with are stored within video sw
for (int i = 0; i < 4; i++)
Expand All @@ -74,7 +75,7 @@ void SWVertexLoader::DrawCurrentBatch(u32 base_index, u32 num_indices, u32 base_
memset(static_cast<void*>(&m_vertex), 0, sizeof(m_vertex));

// parse the videocommon format to our own struct format (m_vertex)
SetFormat(g_main_cp_state.last_id, primitiveType);
SetFormat();
ParseVertex(VertexLoaderManager::GetCurrentVertexFormat()->GetVertexDeclaration(), index);

// transform this vertex so that it can be used for rasterization (outVertex)
Expand All @@ -98,7 +99,7 @@ void SWVertexLoader::DrawCurrentBatch(u32 base_index, u32 num_indices, u32 base_
DebugUtil::OnObjectEnd();
}

void SWVertexLoader::SetFormat(u8 attributeIndex, u8 primitiveType)
void SWVertexLoader::SetFormat()
{
// matrix index from xf regs or cp memory?
if (xfmem.MatrixIndexA.PosNormalMtxIdx != g_main_cp_state.matrix_index_a.PosNormalMtxIdx ||
Expand Down Expand Up @@ -144,32 +145,32 @@ static void ReadVertexAttribute(T* dst, DataReader src, const AttributeFormat& f
if (format.enable)
{
src.Skip(format.offset);
src.Skip(base_component * (1 << (format.type >> 1)));
src.Skip(base_component * GetElementSize(format.type));

int i;
for (i = 0; i < std::min(format.components - base_component, components); i++)
{
int i_dst = reverse ? components - i - 1 : i;
switch (format.type)
{
case VAR_UNSIGNED_BYTE:
case ComponentFormat::UByte:
dst[i_dst] = ReadNormalized<T, u8>(src.Read<u8, swap>());
break;
case VAR_BYTE:
case ComponentFormat::Byte:
dst[i_dst] = ReadNormalized<T, s8>(src.Read<s8, swap>());
break;
case VAR_UNSIGNED_SHORT:
case ComponentFormat::UShort:
dst[i_dst] = ReadNormalized<T, u16>(src.Read<u16, swap>());
break;
case VAR_SHORT:
case ComponentFormat::Short:
dst[i_dst] = ReadNormalized<T, s16>(src.Read<s16, swap>());
break;
case VAR_FLOAT:
case ComponentFormat::Float:
dst[i_dst] = ReadNormalized<T, float>(src.Read<float, swap>());
break;
}

ASSERT_MSG(VIDEO, !format.integer || format.type != VAR_FLOAT,
ASSERT_MSG(VIDEO, !format.integer || format.type != ComponentFormat::Float,
"only non-float values are allowed to be streamed as integer");
}
for (; i < components; i++)
Expand Down
2 changes: 1 addition & 1 deletion Source/Core/VideoBackends/Software/SWVertexLoader.h
Expand Up @@ -22,7 +22,7 @@ class SWVertexLoader final : public VertexManagerBase
protected:
void DrawCurrentBatch(u32 base_index, u32 num_indices, u32 base_vertex) override;

void SetFormat(u8 attributeIndex, u8 primitiveType);
void SetFormat();
void ParseVertex(const PortableVertexDeclaration& vdec, int index);

InputVertexData m_vertex{};
Expand Down
21 changes: 11 additions & 10 deletions Source/Core/VideoBackends/Software/SetupUnit.cpp
Expand Up @@ -9,9 +9,9 @@
#include "VideoBackends/Software/Clipper.h"
#include "VideoCommon/OpcodeDecoding.h"

void SetupUnit::Init(u8 primitiveType)
void SetupUnit::Init(OpcodeDecoder::Primitive primitive_type)
{
m_PrimType = primitiveType;
m_PrimType = primitive_type;

m_VertexCounter = 0;
m_VertPointer[0] = &m_Vertices[0];
Expand All @@ -28,31 +28,32 @@ OutputVertexData* SetupUnit::GetVertex()

void SetupUnit::SetupVertex()
{
using OpcodeDecoder::Primitive;
switch (m_PrimType)
{
case OpcodeDecoder::GX_DRAW_QUADS:
case Primitive::GX_DRAW_QUADS:
SetupQuad();
break;
case OpcodeDecoder::GX_DRAW_QUADS_2:
case Primitive::GX_DRAW_QUADS_2:
WARN_LOG_FMT(VIDEO, "Non-standard primitive drawing command GL_DRAW_QUADS_2");
SetupQuad();
break;
case OpcodeDecoder::GX_DRAW_TRIANGLES:
case Primitive::GX_DRAW_TRIANGLES:
SetupTriangle();
break;
case OpcodeDecoder::GX_DRAW_TRIANGLE_STRIP:
case Primitive::GX_DRAW_TRIANGLE_STRIP:
SetupTriStrip();
break;
case OpcodeDecoder::GX_DRAW_TRIANGLE_FAN:
case Primitive::GX_DRAW_TRIANGLE_FAN:
SetupTriFan();
break;
case OpcodeDecoder::GX_DRAW_LINES:
case Primitive::GX_DRAW_LINES:
SetupLine();
break;
case OpcodeDecoder::GX_DRAW_LINE_STRIP:
case Primitive::GX_DRAW_LINE_STRIP:
SetupLineStrip();
break;
case OpcodeDecoder::GX_DRAW_POINTS:
case Primitive::GX_DRAW_POINTS:
SetupPoint();
break;
}
Expand Down
9 changes: 7 additions & 2 deletions Source/Core/VideoBackends/Software/SetupUnit.h
Expand Up @@ -6,9 +6,14 @@
#include "Common/CommonTypes.h"
#include "VideoBackends/Software/NativeVertexFormat.h"

namespace OpcodeDecoder
{
enum class Primitive : u8;
}

class SetupUnit
{
u8 m_PrimType = 0;
OpcodeDecoder::Primitive m_PrimType{};
int m_VertexCounter = 0;

OutputVertexData m_Vertices[3];
Expand All @@ -24,7 +29,7 @@ class SetupUnit
void SetupPoint();

public:
void Init(u8 primitiveType);
void Init(OpcodeDecoder::Primitive primitive_type);

OutputVertexData* GetVertex();

Expand Down
50 changes: 27 additions & 23 deletions Source/Core/VideoBackends/Vulkan/VKVertexFormat.cpp
Expand Up @@ -4,6 +4,7 @@
#include "VideoBackends/Vulkan/VKVertexFormat.h"

#include "Common/Assert.h"
#include "Common/EnumMap.h"

#include "VideoBackends/Vulkan/CommandBufferManager.h"
#include "VideoBackends/Vulkan/ObjectCache.h"
Expand All @@ -13,32 +14,35 @@

namespace Vulkan
{
static VkFormat VarToVkFormat(VarType t, uint32_t components, bool integer)
static VkFormat VarToVkFormat(ComponentFormat t, uint32_t components, bool integer)
{
static const VkFormat float_type_lookup[][4] = {
{VK_FORMAT_R8_UNORM, VK_FORMAT_R8G8_UNORM, VK_FORMAT_R8G8B8_UNORM,
VK_FORMAT_R8G8B8A8_UNORM}, // VAR_UNSIGNED_BYTE
{VK_FORMAT_R8_SNORM, VK_FORMAT_R8G8_SNORM, VK_FORMAT_R8G8B8_SNORM,
VK_FORMAT_R8G8B8A8_SNORM}, // VAR_BYTE
{VK_FORMAT_R16_UNORM, VK_FORMAT_R16G16_UNORM, VK_FORMAT_R16G16B16_UNORM,
VK_FORMAT_R16G16B16A16_UNORM}, // VAR_UNSIGNED_SHORT
{VK_FORMAT_R16_SNORM, VK_FORMAT_R16G16_SNORM, VK_FORMAT_R16G16B16_SNORM,
VK_FORMAT_R16G16B16A16_SNORM}, // VAR_SHORT
{VK_FORMAT_R32_SFLOAT, VK_FORMAT_R32G32_SFLOAT, VK_FORMAT_R32G32B32_SFLOAT,
VK_FORMAT_R32G32B32A32_SFLOAT} // VAR_FLOAT
using ComponentArray = std::array<VkFormat, 4>;
static constexpr auto f = [](ComponentArray a) { return a; }; // Deduction helper

static constexpr Common::EnumMap<ComponentArray, ComponentFormat::Float> float_type_lookup = {
f({VK_FORMAT_R8_UNORM, VK_FORMAT_R8G8_UNORM, VK_FORMAT_R8G8B8_UNORM,
VK_FORMAT_R8G8B8A8_UNORM}), // UByte
f({VK_FORMAT_R8_SNORM, VK_FORMAT_R8G8_SNORM, VK_FORMAT_R8G8B8_SNORM,
VK_FORMAT_R8G8B8A8_SNORM}), // Byte
f({VK_FORMAT_R16_UNORM, VK_FORMAT_R16G16_UNORM, VK_FORMAT_R16G16B16_UNORM,
VK_FORMAT_R16G16B16A16_UNORM}), // UShort
f({VK_FORMAT_R16_SNORM, VK_FORMAT_R16G16_SNORM, VK_FORMAT_R16G16B16_SNORM,
VK_FORMAT_R16G16B16A16_SNORM}), // Short
f({VK_FORMAT_R32_SFLOAT, VK_FORMAT_R32G32_SFLOAT, VK_FORMAT_R32G32B32_SFLOAT,
VK_FORMAT_R32G32B32A32_SFLOAT}), // Float
};

static const VkFormat integer_type_lookup[][4] = {
{VK_FORMAT_R8_UINT, VK_FORMAT_R8G8_UINT, VK_FORMAT_R8G8B8_UINT,
VK_FORMAT_R8G8B8A8_UINT}, // VAR_UNSIGNED_BYTE
{VK_FORMAT_R8_SINT, VK_FORMAT_R8G8_SINT, VK_FORMAT_R8G8B8_SINT,
VK_FORMAT_R8G8B8A8_SINT}, // VAR_BYTE
{VK_FORMAT_R16_UINT, VK_FORMAT_R16G16_UINT, VK_FORMAT_R16G16B16_UINT,
VK_FORMAT_R16G16B16A16_UINT}, // VAR_UNSIGNED_SHORT
{VK_FORMAT_R16_SINT, VK_FORMAT_R16G16_SINT, VK_FORMAT_R16G16B16_SINT,
VK_FORMAT_R16G16B16A16_SINT}, // VAR_SHORT
{VK_FORMAT_R32_SFLOAT, VK_FORMAT_R32G32_SFLOAT, VK_FORMAT_R32G32B32_SFLOAT,
VK_FORMAT_R32G32B32A32_SFLOAT} // VAR_FLOAT
static constexpr Common::EnumMap<ComponentArray, ComponentFormat::Float> integer_type_lookup = {
f({VK_FORMAT_R8_UINT, VK_FORMAT_R8G8_UINT, VK_FORMAT_R8G8B8_UINT,
VK_FORMAT_R8G8B8A8_UINT}), // UByte
f({VK_FORMAT_R8_SINT, VK_FORMAT_R8G8_SINT, VK_FORMAT_R8G8B8_SINT,
VK_FORMAT_R8G8B8A8_SINT}), // Byte
f({VK_FORMAT_R16_UINT, VK_FORMAT_R16G16_UINT, VK_FORMAT_R16G16B16_UINT,
VK_FORMAT_R16G16B16A16_UINT}), // UShort
f({VK_FORMAT_R16_SINT, VK_FORMAT_R16G16_SINT, VK_FORMAT_R16G16B16_SINT,
VK_FORMAT_R16G16B16A16_SINT}), // Short
f({VK_FORMAT_R32_SFLOAT, VK_FORMAT_R32G32_SFLOAT, VK_FORMAT_R32G32B32_SFLOAT,
VK_FORMAT_R32G32B32A32_SFLOAT}), // Float
};

ASSERT(components > 0 && components <= 4);
Expand Down
175 changes: 168 additions & 7 deletions Source/Core/VideoCommon/BPMemory.h
Expand Up @@ -258,7 +258,7 @@ enum class TevBias : u32
{
Zero = 0,
AddHalf = 1,
Subhalf = 2,
SubHalf = 2,
Compare = 3
};
template <>
Expand Down Expand Up @@ -491,6 +491,94 @@ struct fmt::formatter<TevStageCombiner::ColorCombiner>
template <typename FormatContext>
auto format(const TevStageCombiner::ColorCombiner& cc, FormatContext& ctx)
{
auto out = ctx.out();
if (cc.bias != TevBias::Compare)
{
// Generate an equation view, simplifying out addition of zero and multiplication by 1
// dest = (d (OP) ((1 - c)*a + c*b) + bias) * scale
// or equivalently and more readably when the terms are not constants:
// dest = (d (OP) lerp(a, b, c) + bias) * scale
// Note that lerping is more complex than the first form shows; see PixelShaderGen's
// WriteTevRegular for more details.

static constexpr Common::EnumMap<const char*, TevColorArg::Zero> alt_names = {
"prev.rgb", "prev.aaa", "c0.rgb", "c0.aaa", "c1.rgb", "c1.aaa", "c2.rgb", "c2.aaa",
"tex.rgb", "tex.aaa", "ras.rgb", "ras.aaa", "1", ".5", "konst.rgb", "0",
};

const bool has_d = cc.d != TevColorArg::Zero;
// If c is one, (1 - c) is zero, so (1-c)*a is zero
const bool has_ac = cc.a != TevColorArg::Zero && cc.c != TevColorArg::One;
// If either b or c is zero, b*c is zero
const bool has_bc = cc.b != TevColorArg::Zero && cc.c != TevColorArg::Zero;
const bool has_bias = cc.bias != TevBias::Zero; // != Compare is already known
const bool has_scale = cc.scale != TevScale::Scale1;

const char op = (cc.op == TevOp::Sub ? '-' : '+');

if (cc.dest == TevOutput::Prev)
out = format_to(out, "dest.rgb = ");
else
out = format_to(out, "{:n}.rgb = ", cc.dest);

if (has_scale)
out = format_to(out, "(");
if (has_d)
out = format_to(out, "{}", alt_names[cc.d]);
if (has_ac || has_bc)
{
if (has_d)
out = format_to(out, " {} ", op);
else if (cc.op == TevOp::Sub)
out = format_to(out, "{}", op);
if (has_ac && has_bc)
{
if (cc.c == TevColorArg::Half)
{
// has_a and has_b imply that c is not Zero or One, and Half is the only remaining
// numeric constant. This results in an average.
out = format_to(out, "({} + {})/2", alt_names[cc.a], alt_names[cc.b]);
}
else
{
out = format_to(out, "lerp({}, {}, {})", alt_names[cc.a], alt_names[cc.b],
alt_names[cc.c]);
}
}
else if (has_ac)
{
if (cc.c == TevColorArg::Zero)
out = format_to(out, "{}", alt_names[cc.a]);
else if (cc.c == TevColorArg::Half) // 1 - .5 is .5
out = format_to(out, ".5*{}", alt_names[cc.a]);
else
out = format_to(out, "(1 - {})*{}", alt_names[cc.c], alt_names[cc.a]);
}
else // has_bc
{
if (cc.c == TevColorArg::One)
out = format_to(out, "{}", alt_names[cc.b]);
else
out = format_to(out, "{}*{}", alt_names[cc.c], alt_names[cc.b]);
}
}
if (has_bias)
{
if (has_ac || has_bc || has_d)
out = format_to(out, cc.bias == TevBias::AddHalf ? " + .5" : " - .5");
else
out = format_to(out, cc.bias == TevBias::AddHalf ? ".5" : "-.5");
}
else
{
// If nothing has been written so far, add a zero
if (!(has_ac || has_bc || has_d))
out = format_to(out, "0");
}
if (has_scale)
out = format_to(out, ") * {:n}", cc.scale);
out = format_to(out, "\n\n");
}
return format_to(ctx.out(),
"a: {}\n"
"b: {}\n"
Expand All @@ -512,7 +600,80 @@ struct fmt::formatter<TevStageCombiner::AlphaCombiner>
template <typename FormatContext>
auto format(const TevStageCombiner::AlphaCombiner& ac, FormatContext& ctx)
{
return format_to(ctx.out(),
auto out = ctx.out();
if (ac.bias != TevBias::Compare)
{
// Generate an equation view, simplifying out addition of zero and multiplication by 1
// dest = (d (OP) ((1 - c)*a + c*b) + bias) * scale
// or equivalently and more readably when the terms are not constants:
// dest = (d (OP) lerp(a, b, c) + bias) * scale
// Note that lerping is more complex than the first form shows; see PixelShaderGen's
// WriteTevRegular for more details.

// We don't need an alt_names map here, unlike the color combiner, as the only special term is
// Zero, and we we filter that out below. However, we do need to append ".a" to all
// parameters, to make it explicit that these are operations on the alpha term instead of the
// 4-element vector. We also need to use the :n specifier so that the numeric ID isn't shown.

const bool has_d = ac.d != TevAlphaArg::Zero;
// There is no c value for alpha that results in (1 - c) always being zero
const bool has_ac = ac.a != TevAlphaArg::Zero;
// If either b or c is zero, b*c is zero
const bool has_bc = ac.b != TevAlphaArg::Zero && ac.c != TevAlphaArg::Zero;
const bool has_bias = ac.bias != TevBias::Zero; // != Compare is already known
const bool has_scale = ac.scale != TevScale::Scale1;

const char op = (ac.op == TevOp::Sub ? '-' : '+');

if (ac.dest == TevOutput::Prev)
out = format_to(out, "dest.a = ");
else
out = format_to(out, "{:n}.a = ", ac.dest);

if (has_scale)
out = format_to(out, "(");
if (has_d)
out = format_to(out, "{:n}.a", ac.d);
if (has_ac || has_bc)
{
if (has_d)
out = format_to(out, " {} ", op);
else if (ac.op == TevOp::Sub)
out = format_to(out, "{}", op);
if (has_ac && has_bc)
{
out = format_to(out, "lerp({:n}.a, {:n}.a, {:n}.a)", ac.a, ac.b, ac.c);
}
else if (has_ac)
{
if (ac.c == TevAlphaArg::Zero)
out = format_to(out, "{:n}.a", ac.a);
else
out = format_to(out, "(1 - {:n}.a)*{:n}.a", ac.c, ac.a);
}
else // has_bc
{
out = format_to(out, "{:n}.a*{:n}.a", ac.c, ac.b);
}
}
if (has_bias)
{
if (has_ac || has_bc || has_d)
out = format_to(out, ac.bias == TevBias::AddHalf ? " + .5" : " - .5");
else
out = format_to(out, ac.bias == TevBias::AddHalf ? ".5" : "-.5");
}
else
{
// If nothing has been written so far, add a zero
if (!(has_ac || has_bc || has_d))
out = format_to(out, "0");
}
if (has_scale)
out = format_to(out, ") * {:n}", ac.scale);
out = format_to(out, "\n\n");
}
return format_to(out,
"a: {}\n"
"b: {}\n"
"c: {}\n"
Expand Down Expand Up @@ -756,14 +917,14 @@ struct fmt::formatter<LODType> : EnumFormatter<LODType::Diagonal>
formatter() : EnumFormatter({"Edge LOD", "Diagonal LOD"}) {}
};

enum class MaxAnsio
enum class MaxAniso
{
One = 0,
Two = 1,
Four = 2,
};
template <>
struct fmt::formatter<MaxAnsio> : EnumFormatter<MaxAnsio::Four>
struct fmt::formatter<MaxAniso> : EnumFormatter<MaxAniso::Four>
{
formatter() : EnumFormatter({"1", "2", "4"}) {}
};
Expand All @@ -777,7 +938,7 @@ union TexMode0
BitField<7, 1, FilterMode> min_filter;
BitField<8, 1, LODType> diag_lod;
BitField<9, 8, s32> lod_bias;
BitField<19, 2, MaxAnsio> max_aniso;
BitField<19, 2, MaxAniso> max_aniso;
BitField<21, 1, bool, u32> lod_clamp;
u32 hex;
};
Expand Down Expand Up @@ -2205,7 +2366,7 @@ struct BPMemory

extern BPMemory bpmem;

void LoadBPReg(u32 value0, int cycles_into_future);
void LoadBPRegPreprocess(u32 value0, int cycles_into_future);
void LoadBPReg(u8 reg, u32 value, int cycles_into_future);
void LoadBPRegPreprocess(u8 reg, u32 value, int cycles_into_future);

std::pair<std::string, std::string> GetBPRegInfo(u8 cmd, u32 cmddata);
22 changes: 10 additions & 12 deletions Source/Core/VideoCommon/BPStructs.cpp
Expand Up @@ -716,29 +716,27 @@ static void BPWritten(const BPCmd& bp, int cycles_into_future)
bp.newvalue);
}

// Call browser: OpcodeDecoding.cpp ExecuteDisplayList > Decode() > LoadBPReg()
void LoadBPReg(u32 value0, int cycles_into_future)
// Call browser: OpcodeDecoding.cpp RunCallback::OnBP()
void LoadBPReg(u8 reg, u32 value, int cycles_into_future)
{
int regNum = value0 >> 24;
int oldval = ((u32*)&bpmem)[regNum];
int newval = (oldval & ~bpmem.bpMask) | (value0 & bpmem.bpMask);
int oldval = ((u32*)&bpmem)[reg];
int newval = (oldval & ~bpmem.bpMask) | (value & bpmem.bpMask);
int changes = (oldval ^ newval) & 0xFFFFFF;

BPCmd bp = {regNum, changes, newval};
BPCmd bp = {reg, changes, newval};

// Reset the mask register if we're not trying to set it ourselves.
if (regNum != BPMEM_BP_MASK)
if (reg != BPMEM_BP_MASK)
bpmem.bpMask = 0xFFFFFF;

BPWritten(bp, cycles_into_future);
}

void LoadBPRegPreprocess(u32 value0, int cycles_into_future)
void LoadBPRegPreprocess(u8 reg, u32 value, int cycles_into_future)
{
int regNum = value0 >> 24;
// masking could hypothetically be a problem
u32 newval = value0 & 0xffffff;
switch (regNum)
// masking via BPMEM_BP_MASK could hypothetically be a problem
u32 newval = value & 0xffffff;
switch (reg)
{
case BPMEM_SETDRAWDONE:
if ((newval & 0xff) == 0x02)
Expand Down
174 changes: 168 additions & 6 deletions Source/Core/VideoCommon/CPMemory.cpp
Expand Up @@ -2,7 +2,14 @@
// SPDX-License-Identifier: GPL-2.0-or-later

#include "VideoCommon/CPMemory.h"

#include <cstring>

#include "Common/ChunkFile.h"
#include "Common/Logging/Log.h"
#include "Core/DolphinAnalytics.h"
#include "VideoCommon/CommandProcessor.h"
#include "VideoCommon/VertexLoaderManager.h"

// CP state
CPState g_main_cp_state;
Expand All @@ -22,13 +29,13 @@ void DoCPState(PointerWrap& p)
if (p.mode == PointerWrap::MODE_READ)
{
CopyPreprocessCPStateFromMain();
g_main_cp_state.bases_dirty = true;
VertexLoaderManager::g_bases_dirty = true;
}
}

void CopyPreprocessCPStateFromMain()
{
memcpy(&g_preprocess_cp_state, &g_main_cp_state, sizeof(CPState));
std::memcpy(&g_preprocess_cp_state, &g_main_cp_state, sizeof(CPState));
}

std::pair<std::string, std::string> GetCPRegInfo(u8 cmd, u32 value)
Expand Down Expand Up @@ -62,12 +69,167 @@ std::pair<std::string, std::string> GetCPRegInfo(u8 cmd, u32 value)
return std::make_pair(fmt::format("CP_VAT_REG_C - Format {}", cmd & CP_VAT_MASK),
fmt::to_string(UVAT_group2{.Hex = value}));
case ARRAY_BASE:
return std::make_pair(fmt::format("ARRAY_BASE Array {}", cmd & CP_ARRAY_MASK),
fmt::format("Base address {:08x}", value));
return std::make_pair(
fmt::format("ARRAY_BASE Array {}", static_cast<CPArray>(cmd & CP_ARRAY_MASK)),
fmt::format("Base address {:08x}", value));
case ARRAY_STRIDE:
return std::make_pair(fmt::format("ARRAY_STRIDE Array {}", cmd - ARRAY_STRIDE),
fmt::format("Stride {:02x}", value & 0xff));
return std::make_pair(
fmt::format("ARRAY_STRIDE Array {}", static_cast<CPArray>(cmd & CP_ARRAY_MASK)),
fmt::format("Stride {:02x}", value & 0xff));
default:
return std::make_pair(fmt::format("Invalid CP register {:02x} = {:08x}", cmd, value), "");
}
}

CPState::CPState(const u32* memory) : CPState()
{
matrix_index_a.Hex = memory[MATINDEX_A];
matrix_index_b.Hex = memory[MATINDEX_B];
vtx_desc.low.Hex = memory[VCD_LO];
vtx_desc.high.Hex = memory[VCD_HI];

for (u32 i = 0; i < CP_NUM_VAT_REG; i++)
{
vtx_attr[i].g0.Hex = memory[CP_VAT_REG_A + i];
vtx_attr[i].g1.Hex = memory[CP_VAT_REG_B + i];
vtx_attr[i].g2.Hex = memory[CP_VAT_REG_C + i];
}

for (u32 i = 0; i < CP_NUM_ARRAYS; i++)
{
array_bases[static_cast<CPArray>(i)] = memory[ARRAY_BASE + i];
array_strides[static_cast<CPArray>(i)] = memory[ARRAY_STRIDE + i];
}
}

void CPState::LoadCPReg(u8 sub_cmd, u32 value)
{
switch (sub_cmd & CP_COMMAND_MASK)
{
case UNKNOWN_00:
case UNKNOWN_10:
case UNKNOWN_20:
if (!(sub_cmd == UNKNOWN_20 && value == 0))
{
// All titles using libogc or the official SDK issue 0x20 with value=0 on startup
DolphinAnalytics::Instance().ReportGameQuirk(GameQuirk::USES_CP_PERF_COMMAND);
DEBUG_LOG_FMT(VIDEO, "Unknown CP command possibly relating to perf queries used: {:02x}",
sub_cmd);
}
break;

case MATINDEX_A:
if (sub_cmd != MATINDEX_A)
{
DolphinAnalytics::Instance().ReportGameQuirk(GameQuirk::USES_MAYBE_INVALID_CP_COMMAND);
WARN_LOG_FMT(VIDEO,
"CP MATINDEX_A: an exact value of {:02x} was expected "
"but instead a value of {:02x} was seen",
MATINDEX_A, sub_cmd);
}

matrix_index_a.Hex = value;
break;

case MATINDEX_B:
if (sub_cmd != MATINDEX_B)
{
DolphinAnalytics::Instance().ReportGameQuirk(GameQuirk::USES_MAYBE_INVALID_CP_COMMAND);
WARN_LOG_FMT(VIDEO,
"CP MATINDEX_B: an exact value of {:02x} was expected "
"but instead a value of {:02x} was seen",
MATINDEX_B, sub_cmd);
}

matrix_index_b.Hex = value;
break;

case VCD_LO:
if (sub_cmd != VCD_LO) // Stricter than YAGCD
{
DolphinAnalytics::Instance().ReportGameQuirk(GameQuirk::USES_MAYBE_INVALID_CP_COMMAND);
WARN_LOG_FMT(VIDEO,
"CP VCD_LO: an exact value of {:02x} was expected "
"but instead a value of {:02x} was seen",
VCD_LO, sub_cmd);
}

vtx_desc.low.Hex = value;
break;

case VCD_HI:
if (sub_cmd != VCD_HI) // Stricter than YAGCD
{
DolphinAnalytics::Instance().ReportGameQuirk(GameQuirk::USES_MAYBE_INVALID_CP_COMMAND);
WARN_LOG_FMT(VIDEO,
"CP VCD_HI: an exact value of {:02x} was expected "
"but instead a value of {:02x} was seen",
VCD_HI, sub_cmd);
}

vtx_desc.high.Hex = value;
break;

case CP_VAT_REG_A:
if ((sub_cmd - CP_VAT_REG_A) >= CP_NUM_VAT_REG)
{
DolphinAnalytics::Instance().ReportGameQuirk(GameQuirk::USES_MAYBE_INVALID_CP_COMMAND);
WARN_LOG_FMT(VIDEO, "CP_VAT_REG_A: Invalid VAT {}", sub_cmd - CP_VAT_REG_A);
}
vtx_attr[sub_cmd & CP_VAT_MASK].g0.Hex = value;
break;

case CP_VAT_REG_B:
if ((sub_cmd - CP_VAT_REG_B) >= CP_NUM_VAT_REG)
{
DolphinAnalytics::Instance().ReportGameQuirk(GameQuirk::USES_MAYBE_INVALID_CP_COMMAND);
WARN_LOG_FMT(VIDEO, "CP_VAT_REG_B: Invalid VAT {}", sub_cmd - CP_VAT_REG_B);
}
vtx_attr[sub_cmd & CP_VAT_MASK].g1.Hex = value;
break;

case CP_VAT_REG_C:
if ((sub_cmd - CP_VAT_REG_C) >= CP_NUM_VAT_REG)
{
DolphinAnalytics::Instance().ReportGameQuirk(GameQuirk::USES_MAYBE_INVALID_CP_COMMAND);
WARN_LOG_FMT(VIDEO, "CP_VAT_REG_C: Invalid VAT {}", sub_cmd - CP_VAT_REG_C);
}
vtx_attr[sub_cmd & CP_VAT_MASK].g2.Hex = value;
break;

// Pointers to vertex arrays in GC RAM
case ARRAY_BASE:
array_bases[static_cast<CPArray>(sub_cmd & CP_ARRAY_MASK)] =
value & CommandProcessor::GetPhysicalAddressMask();
break;

case ARRAY_STRIDE:
array_strides[static_cast<CPArray>(sub_cmd & CP_ARRAY_MASK)] = value & 0xFF;
break;

default:
DolphinAnalytics::Instance().ReportGameQuirk(GameQuirk::USES_UNKNOWN_CP_COMMAND);
WARN_LOG_FMT(VIDEO, "Unknown CP register {:02x} set to {:08x}", sub_cmd, value);
}
}

void CPState::FillCPMemoryArray(u32* memory) const
{
memory[MATINDEX_A] = matrix_index_a.Hex;
memory[MATINDEX_B] = matrix_index_b.Hex;
memory[VCD_LO] = vtx_desc.low.Hex;
memory[VCD_HI] = vtx_desc.high.Hex;

for (int i = 0; i < CP_NUM_VAT_REG; ++i)
{
memory[CP_VAT_REG_A + i] = vtx_attr[i].g0.Hex;
memory[CP_VAT_REG_B + i] = vtx_attr[i].g1.Hex;
memory[CP_VAT_REG_C + i] = vtx_attr[i].g2.Hex;
}

for (int i = 0; i < CP_NUM_ARRAYS; ++i)
{
memory[ARRAY_BASE + i] = array_bases[static_cast<CPArray>(i)];
memory[ARRAY_STRIDE + i] = array_strides[static_cast<CPArray>(i)];
}
}
79 changes: 50 additions & 29 deletions Source/Core/VideoCommon/CPMemory.h
Expand Up @@ -5,12 +5,14 @@

#include <array>
#include <string>
#include <type_traits>
#include <utility>

#include "Common/BitField.h"
#include "Common/BitSet.h"
#include "Common/CommonTypes.h"
#include "Common/EnumFormatter.h"
#include "Common/EnumMap.h"
#include "Common/MsgHandler.h"

enum
Expand Down Expand Up @@ -53,24 +55,46 @@ enum
};

// Vertex array numbers
enum
enum class CPArray : u8
{
ARRAY_POSITION = 0,
ARRAY_NORMAL = 1,
ARRAY_COLOR0 = 2,
NUM_COLOR_ARRAYS = 2,
ARRAY_TEXCOORD0 = 4,
NUM_TEXCOORD_ARRAYS = 8,
Position = 0,
Normal = 1,

Color0 = 2,
Color1 = 3,

ARRAY_XF_A = 12, // Usually used for position matrices
ARRAY_XF_B = 13, // Usually used for normal matrices
ARRAY_XF_C = 14, // Usually used for tex coord matrices
ARRAY_XF_D = 15, // Usually used for light objects
TexCoord0 = 4,
TexCoord1 = 5,
TexCoord2 = 6,
TexCoord3 = 7,
TexCoord4 = 8,
TexCoord5 = 9,
TexCoord6 = 10,
TexCoord7 = 11,

// Number of arrays related to vertex components (position, normal, color, tex coord)
// Excludes the 4 arrays used for indexed XF loads
NUM_VERTEX_COMPONENT_ARRAYS = 12,
XF_A = 12, // Usually used for position matrices
XF_B = 13, // Usually used for normal matrices
XF_C = 14, // Usually used for tex coord matrices
XF_D = 15, // Usually used for light objects
};
template <>
struct fmt::formatter<CPArray> : EnumFormatter<CPArray::XF_D>
{
static constexpr array_type names = {"Position", "Normal", "Color 0", "Color 1",
"Tex Coord 0", "Tex Coord 1", "Tex Coord 2", "Tex Coord 3",
"Tex Coord 4", "Tex Coord 5", "Tex Coord 6", "Tex Coord 7",
"XF A", "XF B", "XF C", "XF D"};
formatter() : EnumFormatter(names) {}
};
// Intended for offsetting from Color0/TexCoord0
constexpr CPArray operator+(CPArray array, u8 offset)
{
return static_cast<CPArray>(static_cast<u8>(array) + offset);
}

// Number of arrays related to vertex components (position, normal, color, tex coord)
// Excludes the 4 arrays used for indexed XF loads
constexpr u8 NUM_VERTEX_COMPONENT_ARRAYS = 12;

// Vertex components
enum class VertexComponentFormat
Expand Down Expand Up @@ -607,32 +631,29 @@ class VertexLoaderBase;
// STATE_TO_SAVE
struct CPState final
{
u32 array_bases[CP_NUM_ARRAYS]{};
u32 array_strides[CP_NUM_ARRAYS]{};
CPState() = default;
explicit CPState(const u32* memory);

// Mutates the CP state based on the given command and value.
void LoadCPReg(u8 sub_cmd, u32 value);
// Fills memory with data from CP regs. There should be space for 0x100 values in memory.
void FillCPMemoryArray(u32* memory) const;

Common::EnumMap<u32, CPArray::XF_D> array_bases;
Common::EnumMap<u32, CPArray::XF_D> array_strides;
TMatrixIndexA matrix_index_a{};
TMatrixIndexB matrix_index_b{};
TVtxDesc vtx_desc;
// Most games only use the first VtxAttr and simply reconfigure it all the time as needed.
VAT vtx_attr[CP_NUM_VAT_REG]{};

// Attributes that actually belong to VertexLoaderManager:
BitSet32 attr_dirty{};
bool bases_dirty = false;
VertexLoaderBase* vertex_loaders[CP_NUM_VAT_REG]{};
int last_id = 0;
std::array<VAT, CP_NUM_VAT_REG> vtx_attr{};
};
static_assert(std::is_trivially_copyable_v<CPState>);

class PointerWrap;

extern CPState g_main_cp_state;
extern CPState g_preprocess_cp_state;

// Might move this into its own file later.
void LoadCPReg(u32 SubCmd, u32 Value, bool is_preprocess = false);

// Fills memory with data from CP regs
void FillCPMemoryArray(u32* memory);

void DoCPState(PointerWrap& p);

void CopyPreprocessCPStateFromMain();
Expand Down
7 changes: 4 additions & 3 deletions Source/Core/VideoCommon/CommandProcessor.cpp
Expand Up @@ -5,6 +5,7 @@

#include <atomic>
#include <cstring>
#include <fmt/format.h>

#include "Common/Assert.h"
#include "Common/ChunkFile.h"
Expand Down Expand Up @@ -607,18 +608,18 @@ void SetCpClearRegister()
{
}

void HandleUnknownOpcode(u8 cmd_byte, void* buffer, bool preprocess)
void HandleUnknownOpcode(u8 cmd_byte, const u8* buffer, bool preprocess)
{
// TODO(Omega): Maybe dump FIFO to file on this error
PanicAlertFmtT("GFX FIFO: Unknown Opcode ({0:#04x} @ {1}, {2}).\n"
PanicAlertFmtT("GFX FIFO: Unknown Opcode ({0:#04x} @ {1}, preprocess={2}).\n"
"This means one of the following:\n"
"* The emulated GPU got desynced, disabling dual core can help\n"
"* Command stream corrupted by some spurious memory bug\n"
"* This really is an unknown opcode (unlikely)\n"
"* Some other sort of bug\n\n"
"Further errors will be sent to the Video Backend log and\n"
"Dolphin will now likely crash or hang. Enjoy.",
cmd_byte, buffer, preprocess ? "preprocess=true" : "preprocess=false");
cmd_byte, fmt::ptr(buffer), preprocess);

{
PanicAlertFmt("Illegal command {:02x}\n"
Expand Down
2 changes: 1 addition & 1 deletion Source/Core/VideoCommon/CommandProcessor.h
Expand Up @@ -169,7 +169,7 @@ void SetCpClearRegister();
void SetCpControlRegister();
void SetCpStatusRegister();

void HandleUnknownOpcode(u8 cmd_byte, void* buffer, bool preprocess);
void HandleUnknownOpcode(u8 cmd_byte, const u8* buffer, bool preprocess);

u32 GetPhysicalAddressMask();

Expand Down
14 changes: 7 additions & 7 deletions Source/Core/VideoCommon/Fifo.cpp
Expand Up @@ -273,8 +273,8 @@ static void ReadDataFromFifoOnCPU(u32 readPtr)
}
}
Memory::CopyFromEmu(s_video_buffer_write_ptr, readPtr, len);
s_video_buffer_pp_read_ptr = OpcodeDecoder::Run<true>(
DataReader(s_video_buffer_pp_read_ptr, write_ptr + len), nullptr, false);
s_video_buffer_pp_read_ptr = OpcodeDecoder::RunFifo<true>(
DataReader(s_video_buffer_pp_read_ptr, write_ptr + len), nullptr);
// This would have to be locked if the GPU thread didn't spin.
s_video_buffer_write_ptr = write_ptr + len;
}
Expand Down Expand Up @@ -316,7 +316,7 @@ void RunGpuLoop()
if (write_ptr > seen_ptr)
{
s_video_buffer_read_ptr =
OpcodeDecoder::Run(DataReader(s_video_buffer_read_ptr, write_ptr), nullptr, false);
OpcodeDecoder::RunFifo(DataReader(s_video_buffer_read_ptr, write_ptr), nullptr);
s_video_buffer_seen_ptr = write_ptr;
}
}
Expand Down Expand Up @@ -349,8 +349,8 @@ void RunGpuLoop()
fifo.CPReadWriteDistance.load(std::memory_order_relaxed) - 32);

u8* write_ptr = s_video_buffer_write_ptr;
s_video_buffer_read_ptr = OpcodeDecoder::Run(
DataReader(s_video_buffer_read_ptr, write_ptr), &cyclesExecuted, false);
s_video_buffer_read_ptr = OpcodeDecoder::RunFifo(
DataReader(s_video_buffer_read_ptr, write_ptr), &cyclesExecuted);

fifo.CPReadPointer.store(readPtr, std::memory_order_relaxed);
fifo.CPReadWriteDistance.fetch_sub(32, std::memory_order_seq_cst);
Expand Down Expand Up @@ -466,8 +466,8 @@ static int RunGpuOnCpu(int ticks)
}
ReadDataFromFifo(fifo.CPReadPointer.load(std::memory_order_relaxed));
u32 cycles = 0;
s_video_buffer_read_ptr = OpcodeDecoder::Run(
DataReader(s_video_buffer_read_ptr, s_video_buffer_write_ptr), &cycles, false);
s_video_buffer_read_ptr = OpcodeDecoder::RunFifo(
DataReader(s_video_buffer_read_ptr, s_video_buffer_write_ptr), &cycles);
available_ticks -= cycles;
}

Expand Down
4 changes: 2 additions & 2 deletions Source/Core/VideoCommon/FramebufferManager.cpp
Expand Up @@ -837,12 +837,12 @@ bool FramebufferManager::CompilePokePipelines()
{
PortableVertexDeclaration vtx_decl = {};
vtx_decl.position.enable = true;
vtx_decl.position.type = VAR_FLOAT;
vtx_decl.position.type = ComponentFormat::Float;
vtx_decl.position.components = 4;
vtx_decl.position.integer = false;
vtx_decl.position.offset = offsetof(EFBPokeVertex, position);
vtx_decl.colors[0].enable = true;
vtx_decl.colors[0].type = VAR_UNSIGNED_BYTE;
vtx_decl.colors[0].type = ComponentFormat::UByte;
vtx_decl.colors[0].components = 4;
vtx_decl.colors[0].integer = false;
vtx_decl.colors[0].offset = offsetof(EFBPokeVertex, color);
Expand Down
21 changes: 12 additions & 9 deletions Source/Core/VideoCommon/GeometryShaderGen.cpp
Expand Up @@ -6,25 +6,29 @@
#include <cmath>

#include "Common/CommonTypes.h"
#include "Common/EnumMap.h"
#include "VideoCommon/DriverDetails.h"
#include "VideoCommon/LightingShaderGen.h"
#include "VideoCommon/VideoCommon.h"
#include "VideoCommon/VideoConfig.h"
#include "VideoCommon/XFMemory.h"

constexpr std::array<const char*, 4> primitives_ogl{
constexpr Common::EnumMap<const char*, PrimitiveType::TriangleStrip> primitives_ogl{
"points",
"lines",
"triangles",
"triangles",
};
constexpr std::array<const char*, 4> primitives_d3d{
constexpr Common::EnumMap<const char*, PrimitiveType::TriangleStrip> primitives_d3d{
"point",
"line",
"triangle",
"triangle",
};

constexpr Common::EnumMap<u32, PrimitiveType::TriangleStrip> vertex_in_map{1u, 2u, 3u, 3u};
constexpr Common::EnumMap<u32, PrimitiveType::TriangleStrip> vertex_out_map{4u, 4u, 4u, 3u};

bool geometry_shader_uid_data::IsPassthrough() const
{
const bool stereo = g_ActiveConfig.stereo_mode != StereoMode::Off;
Expand Down Expand Up @@ -61,9 +65,8 @@ ShaderCode GenerateGeometryShaderCode(APIType api_type, const ShaderHostConfig&
const bool ssaa = host_config.ssaa;
const bool stereo = host_config.stereo;
const auto primitive_type = static_cast<PrimitiveType>(uid_data->primitive_type);
const auto primitive_type_index = static_cast<unsigned>(uid_data->primitive_type);
const auto vertex_in = std::min(static_cast<unsigned>(primitive_type_index) + 1, 3u);
u32 vertex_out = primitive_type == PrimitiveType::TriangleStrip ? 3 : 4;
const u32 vertex_in = vertex_in_map[primitive_type];
u32 vertex_out = vertex_out_map[primitive_type];

if (wireframe)
vertex_out++;
Expand All @@ -73,14 +76,14 @@ ShaderCode GenerateGeometryShaderCode(APIType api_type, const ShaderHostConfig&
// Insert layout parameters
if (host_config.backend_gs_instancing)
{
out.Write("layout({}, invocations = {}) in;\n", primitives_ogl[primitive_type_index],
out.Write("layout({}, invocations = {}) in;\n", primitives_ogl[primitive_type],
stereo ? 2 : 1);
out.Write("layout({}_strip, max_vertices = {}) out;\n", wireframe ? "line" : "triangle",
vertex_out);
}
else
{
out.Write("layout({}) in;\n", primitives_ogl[primitive_type_index]);
out.Write("layout({}) in;\n", primitives_ogl[primitive_type]);
out.Write("layout({}_strip, max_vertices = {}) out;\n", wireframe ? "line" : "triangle",
stereo ? vertex_out * 2 : vertex_out);
}
Expand Down Expand Up @@ -139,13 +142,13 @@ ShaderCode GenerateGeometryShaderCode(APIType api_type, const ShaderHostConfig&
out.Write("[maxvertexcount({})]\n[instance({})]\n", vertex_out, stereo ? 2 : 1);
out.Write("void main({} VS_OUTPUT o[{}], inout {}Stream<VertexData> output, in uint "
"InstanceID : SV_GSInstanceID)\n{{\n",
primitives_d3d[primitive_type_index], vertex_in, wireframe ? "Line" : "Triangle");
primitives_d3d[primitive_type], vertex_in, wireframe ? "Line" : "Triangle");
}
else
{
out.Write("[maxvertexcount({})]\n", stereo ? vertex_out * 2 : vertex_out);
out.Write("void main({} VS_OUTPUT o[{}], inout {}Stream<VertexData> output)\n{{\n",
primitives_d3d[primitive_type_index], vertex_in, wireframe ? "Line" : "Triangle");
primitives_d3d[primitive_type], vertex_in, wireframe ? "Line" : "Triangle");
}

out.Write("\tVertexData ps;\n");
Expand Down
30 changes: 16 additions & 14 deletions Source/Core/VideoCommon/IndexGenerator.cpp
Expand Up @@ -202,25 +202,27 @@ u16* AddPoints(u16* index_ptr, u32 num_verts, u32 index)

void IndexGenerator::Init()
{
using OpcodeDecoder::Primitive;

if (g_Config.backend_info.bSupportsPrimitiveRestart)
{
m_primitive_table[OpcodeDecoder::GX_DRAW_QUADS] = AddQuads<true>;
m_primitive_table[OpcodeDecoder::GX_DRAW_QUADS_2] = AddQuads_nonstandard<true>;
m_primitive_table[OpcodeDecoder::GX_DRAW_TRIANGLES] = AddList<true>;
m_primitive_table[OpcodeDecoder::GX_DRAW_TRIANGLE_STRIP] = AddStrip<true>;
m_primitive_table[OpcodeDecoder::GX_DRAW_TRIANGLE_FAN] = AddFan<true>;
m_primitive_table[Primitive::GX_DRAW_QUADS] = AddQuads<true>;
m_primitive_table[Primitive::GX_DRAW_QUADS_2] = AddQuads_nonstandard<true>;
m_primitive_table[Primitive::GX_DRAW_TRIANGLES] = AddList<true>;
m_primitive_table[Primitive::GX_DRAW_TRIANGLE_STRIP] = AddStrip<true>;
m_primitive_table[Primitive::GX_DRAW_TRIANGLE_FAN] = AddFan<true>;
}
else
{
m_primitive_table[OpcodeDecoder::GX_DRAW_QUADS] = AddQuads<false>;
m_primitive_table[OpcodeDecoder::GX_DRAW_QUADS_2] = AddQuads_nonstandard<false>;
m_primitive_table[OpcodeDecoder::GX_DRAW_TRIANGLES] = AddList<false>;
m_primitive_table[OpcodeDecoder::GX_DRAW_TRIANGLE_STRIP] = AddStrip<false>;
m_primitive_table[OpcodeDecoder::GX_DRAW_TRIANGLE_FAN] = AddFan<false>;
m_primitive_table[Primitive::GX_DRAW_QUADS] = AddQuads<false>;
m_primitive_table[Primitive::GX_DRAW_QUADS_2] = AddQuads_nonstandard<false>;
m_primitive_table[Primitive::GX_DRAW_TRIANGLES] = AddList<false>;
m_primitive_table[Primitive::GX_DRAW_TRIANGLE_STRIP] = AddStrip<false>;
m_primitive_table[Primitive::GX_DRAW_TRIANGLE_FAN] = AddFan<false>;
}
m_primitive_table[OpcodeDecoder::GX_DRAW_LINES] = AddLineList;
m_primitive_table[OpcodeDecoder::GX_DRAW_LINE_STRIP] = AddLineStrip;
m_primitive_table[OpcodeDecoder::GX_DRAW_POINTS] = AddPoints;
m_primitive_table[Primitive::GX_DRAW_LINES] = AddLineList;
m_primitive_table[Primitive::GX_DRAW_LINE_STRIP] = AddLineStrip;
m_primitive_table[Primitive::GX_DRAW_POINTS] = AddPoints;
}

void IndexGenerator::Start(u16* index_ptr)
Expand All @@ -230,7 +232,7 @@ void IndexGenerator::Start(u16* index_ptr)
m_base_index = 0;
}

void IndexGenerator::AddIndices(int primitive, u32 num_vertices)
void IndexGenerator::AddIndices(OpcodeDecoder::Primitive primitive, u32 num_vertices)
{
m_index_buffer_current =
m_primitive_table[primitive](m_index_buffer_current, num_vertices, m_base_index);
Expand Down
7 changes: 4 additions & 3 deletions Source/Core/VideoCommon/IndexGenerator.h
Expand Up @@ -6,16 +6,17 @@

#pragma once

#include <array>
#include "Common/CommonTypes.h"
#include "Common/EnumMap.h"
#include "VideoCommon/OpcodeDecoding.h"

class IndexGenerator
{
public:
void Init();
void Start(u16* index_ptr);

void AddIndices(int primitive, u32 num_vertices);
void AddIndices(OpcodeDecoder::Primitive primitive, u32 num_vertices);

void AddExternalIndices(const u16* indices, u32 num_indices, u32 num_vertices);

Expand All @@ -30,5 +31,5 @@ class IndexGenerator
u32 m_base_index = 0;

using PrimitiveFunction = u16* (*)(u16*, u32, u32);
std::array<PrimitiveFunction, 8> m_primitive_table{};
Common::EnumMap<PrimitiveFunction, OpcodeDecoder::Primitive::GX_DRAW_POINTS> m_primitive_table{};
};
12 changes: 2 additions & 10 deletions Source/Core/VideoCommon/NativeVertexFormat.h
Expand Up @@ -8,6 +8,7 @@

#include "Common/CommonTypes.h"
#include "Common/Hash.h"
#include "VideoCommon/CPMemory.h"

// m_components
enum
Expand Down Expand Up @@ -45,18 +46,9 @@ enum
VB_HAS_UVTEXMTXSHIFT = 13,
};

enum VarType
{
VAR_UNSIGNED_BYTE, // GX_U8 = 0
VAR_BYTE, // GX_S8 = 1
VAR_UNSIGNED_SHORT, // GX_U16 = 2
VAR_SHORT, // GX_S16 = 3
VAR_FLOAT, // GX_F32 = 4
};

struct AttributeFormat
{
VarType type;
ComponentFormat type;
int components;
int offset;
bool enable;
Expand Down
384 changes: 185 additions & 199 deletions Source/Core/VideoCommon/OpcodeDecoding.cpp

Large diffs are not rendered by default.

254 changes: 244 additions & 10 deletions Source/Core/VideoCommon/OpcodeDecoding.h
Expand Up @@ -3,16 +3,25 @@

#pragma once

#include <type_traits>

#include "Common/Assert.h"
#include "Common/CommonTypes.h"
#include "Common/EnumFormatter.h"
#include "Common/Inline.h"
#include "Common/Swap.h"
#include "VideoCommon/CPMemory.h"
#include "VideoCommon/VertexLoaderBase.h"

struct CPState;
class DataReader;

namespace OpcodeDecoder
{
// Global flag to signal if FifoRecorder is active.
extern bool g_record_fifo_data;

enum
enum class Opcode
{
GX_NOP = 0x00,
GX_UNKNOWN_RESET = 0x01,
Expand All @@ -27,20 +36,20 @@ enum

GX_CMD_CALL_DL = 0x40,
GX_CMD_UNKNOWN_METRICS = 0x44,
GX_CMD_INVL_VC = 0x48
};
GX_CMD_INVL_VC = 0x48,

enum
{
GX_PRIMITIVE_MASK = 0x78,
GX_PRIMITIVE_SHIFT = 3,
GX_VAT_MASK = 0x07
GX_PRIMITIVE_START = 0x80,
GX_PRIMITIVE_END = 0xbf,
};

constexpr u8 GX_PRIMITIVE_MASK = 0x78;
constexpr u32 GX_PRIMITIVE_SHIFT = 3;
constexpr u8 GX_VAT_MASK = 0x07;

// These values are the values extracted using GX_PRIMITIVE_MASK
// and GX_PRIMITIVE_SHIFT.
// GX_DRAW_QUADS_2 behaves the same way as GX_DRAW_QUADS.
enum
enum class Primitive : u8
{
GX_DRAW_QUADS = 0x0, // 0x80
GX_DRAW_QUADS_2 = 0x1, // 0x88
Expand All @@ -54,7 +63,232 @@ enum

void Init();

// Interface for the Run and RunCommand functions below.
// The functions themselves are templates so that the compiler generates separate versions for each
// callback (with the callback functions inlined), so the callback doesn't actually need to be
// publicly inherited.
// Compilers don't generate warnings for failed inlining with virtual functions, so this define
// allows disabling the use of virtual functions to generate those warnings. However, this means
// that missing functions will generate errors on their use in RunCommand, instead of in the
// subclass, which can be confusing.
#define OPCODE_CALLBACK_USE_INHERITANCE

#ifdef OPCODE_CALLBACK_USE_INHERITANCE
#define OPCODE_CALLBACK(sig) DOLPHIN_FORCE_INLINE sig override
#define OPCODE_CALLBACK_NOINLINE(sig) sig override
#else
#define OPCODE_CALLBACK(sig) DOLPHIN_FORCE_INLINE sig
#define OPCODE_CALLBACK_NOINLINE(sig) sig
#endif
class Callback
{
#ifdef OPCODE_CALLBACK_USE_INHERITANCE
public:
virtual ~Callback() = default;

// Called on any XF command.
virtual void OnXF(u16 address, u8 count, const u8* data) = 0;
// Called on any CP command.
// Subclasses should update the CP state with GetCPState().LoadCPReg(command, value) so that
// primitive commands decode properly.
virtual void OnCP(u8 command, u32 value) = 0;
// Called on any BP command.
virtual void OnBP(u8 command, u32 value) = 0;
// Called on any indexed XF load command.
virtual void OnIndexedLoad(CPArray array, u32 index, u16 address, u8 size) = 0;
// Called on any primitive command.
virtual void OnPrimitiveCommand(OpcodeDecoder::Primitive primitive, u8 vat, u32 vertex_size,
u16 num_vertices, const u8* vertex_data) = 0;
// Called on a display list.
virtual void OnDisplayList(u32 address, u32 size) = 0;
// Called on any NOP commands (which are all merged into a single call).
virtual void OnNop(u32 count) = 0;
// Called on an unknown opcode, or an opcode that is known but not implemented.
// data[0] is opcode.
virtual void OnUnknown(u8 opcode, const u8* data) = 0;

// Called on ANY command. The first byte of data is the opcode. Size will be at least 1.
// This function is called after one of the above functions is called.
virtual void OnCommand(const u8* data, u32 size) = 0;

// Get the current CP state. Needed for vertex decoding; will also be mutated for CP commands.
virtual CPState& GetCPState() = 0;
#endif
};

namespace detail
{
// Main logic; split so that the main RunCommand can call OnCommand with the returned size.
template <typename T, typename = std::enable_if_t<std::is_base_of_v<Callback, T>>>
static DOLPHIN_FORCE_INLINE u32 RunCommand(const u8* data, u32 available, T& callback)
{
if (available < 1)
return 0;

const Opcode cmd = static_cast<Opcode>(data[0]);

switch (cmd)
{
case Opcode::GX_NOP:
{
u32 count = 1;
while (count < available && static_cast<Opcode>(data[count]) == Opcode::GX_NOP)
count++;
callback.OnNop(count);
return count;
}

case Opcode::GX_LOAD_CP_REG:
{
if (available < 6)
return 0;

const u8 cmd2 = data[1];
const u32 value = Common::swap32(&data[2]);

callback.OnCP(cmd2, value);

return 6;
}

case Opcode::GX_LOAD_XF_REG:
{
if (available < 5)
return 0;

const u32 cmd2 = Common::swap32(&data[1]);
const u16 base_address = cmd2 & 0xffff;

const u16 stream_size_temp = cmd2 >> 16;
ASSERT(stream_size_temp < 16);
const u8 stream_size = (stream_size_temp & 0xf) + 1;

if (available < u32(5 + stream_size * 4))
return 0;

callback.OnXF(base_address, stream_size, &data[5]);

return 5 + stream_size * 4;
}

case Opcode::GX_LOAD_INDX_A: // Used for position matrices
case Opcode::GX_LOAD_INDX_B: // Used for normal matrices
case Opcode::GX_LOAD_INDX_C: // Used for postmatrices
case Opcode::GX_LOAD_INDX_D: // Used for lights
{
if (available < 5)
return 0;

const u32 value = Common::swap32(&data[1]);

const u32 index = value >> 16;
const u16 address = value & 0xFFF; // TODO: check mask
const u8 size = ((value >> 12) & 0xF) + 1;

// Map the command byte to its ref array.
// GX_LOAD_INDX_A (32 = 8*4) . CPArray::XF_A (4+8 = 12)
// GX_LOAD_INDX_B (40 = 8*5) . CPArray::XF_B (5+8 = 13)
// GX_LOAD_INDX_C (48 = 8*6) . CPArray::XF_C (6+8 = 14)
// GX_LOAD_INDX_D (56 = 8*7) . CPArray::XF_D (7+8 = 15)
const auto ref_array = static_cast<CPArray>((static_cast<u8>(cmd) / 8) + 8);

callback.OnIndexedLoad(ref_array, index, address, size);
return 5;
}

case Opcode::GX_CMD_CALL_DL:
{
if (available < 9)
return 0;

const u32 address = Common::swap32(&data[1]);
const u32 size = Common::swap32(&data[5]);

callback.OnDisplayList(address, size);
return 9;
}

case Opcode::GX_LOAD_BP_REG:
{
if (available < 5)
return 0;

const u8 cmd2 = data[1];
const u32 value = Common::swap24(&data[2]);

callback.OnBP(cmd2, value);

return 5;
}

default:
if (cmd >= Opcode::GX_PRIMITIVE_START && cmd <= Opcode::GX_PRIMITIVE_END)
{
if (available < 3)
return 0;

const u8 cmdbyte = static_cast<u8>(cmd);
const OpcodeDecoder::Primitive primitive = static_cast<OpcodeDecoder::Primitive>(
(cmdbyte & OpcodeDecoder::GX_PRIMITIVE_MASK) >> OpcodeDecoder::GX_PRIMITIVE_SHIFT);
const u8 vat = cmdbyte & OpcodeDecoder::GX_VAT_MASK;

const u32 vertex_size = VertexLoaderBase::GetVertexSize(callback.GetCPState().vtx_desc,
callback.GetCPState().vtx_attr[vat]);
const u16 num_vertices = Common::swap16(&data[1]);

if (available < 3 + num_vertices * vertex_size)
return 0;

callback.OnPrimitiveCommand(primitive, vat, vertex_size, num_vertices, &data[3]);

return 3 + num_vertices * vertex_size;
}
}

callback.OnUnknown(static_cast<u8>(cmd), data);
return 1;
}
} // namespace detail

template <typename T, typename = std::enable_if_t<std::is_base_of_v<Callback, T>>>
DOLPHIN_FORCE_INLINE u32 RunCommand(const u8* data, u32 available, T& callback)
{
const u32 size = detail::RunCommand(data, available, callback);
if (size > 0)
{
callback.OnCommand(data, size);
}
return size;
}

template <typename T, typename = std::enable_if_t<std::is_base_of_v<Callback, T>>>
DOLPHIN_FORCE_INLINE u32 Run(const u8* data, u32 available, T& callback)
{
u32 size = 0;
while (size < available)
{
const u32 command_size = RunCommand(&data[size], available - size, callback);
if (command_size == 0)
break;
size += command_size;
}
return size;
}

template <bool is_preprocess = false>
u8* Run(DataReader src, u32* cycles, bool in_display_list);
u8* RunFifo(DataReader src, u32* cycles);

} // namespace OpcodeDecoder

template <>
struct fmt::formatter<OpcodeDecoder::Primitive>
: EnumFormatter<OpcodeDecoder::Primitive::GX_DRAW_POINTS>
{
static constexpr array_type names = {
"GX_DRAW_QUADS", "GX_DRAW_QUADS_2 (nonstandard)",
"GX_DRAW_TRIANGLES", "GX_DRAW_TRIANGLE_STRIP",
"GX_DRAW_TRIANGLE_FAN", "GX_DRAW_LINES",
"GX_DRAW_LINE_STRIP", "GX_DRAW_POINTS",
};
formatter() : EnumFormatter(names) {}
};