This file was deleted.

This file was deleted.

@@ -4,6 +4,7 @@
#include "Core/FifoPlayer/FifoPlayer.h"

#include <algorithm>
#include <cstring>
#include <mutex>

#include "Common/Assert.h"
@@ -12,7 +13,6 @@
#include "Core/ConfigManager.h"
#include "Core/Core.h"
#include "Core/CoreTiming.h"
#include "Core/FifoPlayer/FifoAnalyzer.h"
#include "Core/FifoPlayer/FifoDataFile.h"
#include "Core/HW/CPU.h"
#include "Core/HW/GPFifo.h"
@@ -31,6 +31,136 @@
// TODO: Move texMem somewhere else so this isn't an issue.
#include "VideoCommon/TextureDecoder.h"

namespace
{
class FifoPlaybackAnalyzer : public OpcodeDecoder::Callback
{
public:
static void AnalyzeFrames(FifoDataFile* file, std::vector<AnalyzedFrameInfo>& frame_info);

explicit FifoPlaybackAnalyzer(const u32* cpmem) : m_cpmem(cpmem) {}

OPCODE_CALLBACK(void OnXF(u16 address, u8 count, const u8* data)) {}
OPCODE_CALLBACK(void OnCP(u8 command, u32 value)) { GetCPState().LoadCPReg(command, value); }
OPCODE_CALLBACK(void OnBP(u8 command, u32 value));
OPCODE_CALLBACK(void OnIndexedLoad(CPArray array, u32 index, u16 address, u8 size)) {}
OPCODE_CALLBACK(void OnPrimitiveCommand(OpcodeDecoder::Primitive primitive, u8 vat,
u32 vertex_size, u16 num_vertices,
const u8* vertex_data));
OPCODE_CALLBACK(void OnDisplayList(u32 address, u32 size)) {}
OPCODE_CALLBACK(void OnNop(u32 count));
OPCODE_CALLBACK(void OnUnknown(u8 opcode, const u8* data)) {}

OPCODE_CALLBACK(void OnCommand(const u8* data, u32 size));

OPCODE_CALLBACK(CPState& GetCPState()) { return m_cpmem; }

bool m_start_of_primitives = false;
bool m_end_of_primitives = false;
bool m_efb_copy = false;
// Internal state, copied to above in OnCommand
bool m_was_primitive = false;
bool m_is_primitive = false;
bool m_is_copy = false;
bool m_is_nop = false;
CPState m_cpmem;
};

void FifoPlaybackAnalyzer::AnalyzeFrames(FifoDataFile* file,
std::vector<AnalyzedFrameInfo>& frame_info)
{
FifoPlaybackAnalyzer analyzer(file->GetCPMem());
frame_info.clear();
frame_info.resize(file->GetFrameCount());

for (u32 frame_no = 0; frame_no < file->GetFrameCount(); frame_no++)
{
const FifoFrameInfo& frame = file->GetFrame(frame_no);
AnalyzedFrameInfo& analyzed = frame_info[frame_no];

u32 offset = 0;

u32 part_start = 0;
CPState cpmem;

while (offset < frame.fifoData.size())
{
const u32 cmd_size = OpcodeDecoder::RunCommand(&frame.fifoData[offset],
u32(frame.fifoData.size()) - offset, analyzer);

if (analyzer.m_start_of_primitives)
{
// Start of primitive data for an object
analyzed.AddPart(FramePartType::Commands, part_start, offset, analyzer.m_cpmem);
part_start = offset;
// Copy cpmem now, because end_of_primitives isn't triggered until the first opcode after
// primitive data, and the first opcode might update cpmem
std::memcpy(&cpmem, &analyzer.m_cpmem, sizeof(CPState));
}
if (analyzer.m_end_of_primitives)
{
// End of primitive data for an object, and thus end of the object
analyzed.AddPart(FramePartType::PrimitiveData, part_start, offset, cpmem);
part_start = offset;
}

offset += cmd_size;

if (analyzer.m_efb_copy)
{
// We increase the offset beforehand, so that the trigger EFB copy command is included.
analyzed.AddPart(FramePartType::EFBCopy, part_start, offset, analyzer.m_cpmem);
part_start = offset;
}
}

// The frame should end with an EFB copy, so part_start should have been updated to the end.
ASSERT(part_start == frame.fifoData.size());
ASSERT(offset == frame.fifoData.size());
}
}

void FifoPlaybackAnalyzer::OnBP(u8 command, u32 value)
{
if (command == BPMEM_TRIGGER_EFB_COPY)
m_is_copy = true;
}

void FifoPlaybackAnalyzer::OnPrimitiveCommand(OpcodeDecoder::Primitive primitive, u8 vat,
u32 vertex_size, u16 num_vertices,
const u8* vertex_data)
{
m_is_primitive = true;
}

void FifoPlaybackAnalyzer::OnNop(u32 count)
{
m_is_nop = true;
}

void FifoPlaybackAnalyzer::OnCommand(const u8* data, u32 size)
{
m_start_of_primitives = false;
m_end_of_primitives = false;
m_efb_copy = false;

if (!m_is_nop)
{
if (m_is_primitive && !m_was_primitive)
m_start_of_primitives = true;
else if (m_was_primitive && !m_is_primitive)
m_end_of_primitives = true;
else if (m_is_copy)
m_efb_copy = true;

m_was_primitive = m_is_primitive;
}
m_is_primitive = false;
m_is_copy = false;
m_is_nop = false;
}
} // namespace

bool IsPlayingBackFifologWithBrokenEFBCopies = false;

FifoPlayer::FifoPlayer() : m_Loop{SConfig::GetInstance().bLoopFifoReplay}
@@ -191,7 +321,7 @@ u32 FifoPlayer::GetMaxObjectCount() const
u32 result = 0;
for (auto& frame : m_FrameInfo)
{
const u32 count = static_cast<u32>(frame.objectStarts.size());
const u32 count = frame.part_type_counts[FramePartType::PrimitiveData];
if (count > result)
result = count;
}
@@ -202,7 +332,7 @@ u32 FifoPlayer::GetFrameObjectCount(u32 frame) const
{
if (frame < m_FrameInfo.size())
{
return static_cast<u32>(m_FrameInfo[frame].objectStarts.size());
return m_FrameInfo[frame].part_type_counts[FramePartType::PrimitiveData];
}

return 0;
@@ -262,55 +392,35 @@ void FifoPlayer::WriteFrame(const FifoFrameInfo& frame, const AnalyzedFrameInfo&
m_ElapsedCycles = 0;
m_FrameFifoSize = static_cast<u32>(frame.fifoData.size());

// Determine start and end objects
u32 numObjects = (u32)(info.objectStarts.size());
u32 drawStart = std::min(numObjects, m_ObjectRangeStart);
u32 drawEnd = std::min(numObjects - 1, m_ObjectRangeEnd);
u32 memory_update = 0;
u32 object_num = 0;

u32 position = 0;
u32 memoryUpdate = 0;

// Skip memory updates during frame if true
// Skip all memory updates if early memory updates are enabled, as we already wrote them
if (m_EarlyMemoryUpdates)
{
memoryUpdate = (u32)(frame.memoryUpdates.size());
memory_update = (u32)(frame.memoryUpdates.size());
}

if (numObjects > 0)
for (const FramePart& part : info.parts)
{
u32 objectNum = 0;
bool show_part;

// Write fifo data skipping objects before the draw range
while (objectNum < drawStart)
if (part.m_type == FramePartType::PrimitiveData)
{
WriteFramePart(position, info.objectStarts[objectNum], memoryUpdate, frame, info);

position = info.objectEnds[objectNum];
++objectNum;
show_part = m_ObjectRangeStart <= object_num && object_num <= m_ObjectRangeEnd;
object_num++;
}

// Write objects in draw range
if (objectNum < numObjects && drawStart <= drawEnd)
else
{
objectNum = drawEnd;
WriteFramePart(position, info.objectEnds[objectNum], memoryUpdate, frame, info);
position = info.objectEnds[objectNum];
++objectNum;
// We always include commands and EFB copies, as commands from earlier objects still apply to
// later ones (games generally do not reconfigure everything for each object)
show_part = true;
}

// Write fifo data skipping objects after the draw range
while (objectNum < numObjects)
{
WriteFramePart(position, info.objectStarts[objectNum], memoryUpdate, frame, info);

position = info.objectEnds[objectNum];
++objectNum;
}
if (show_part)
WriteFramePart(part, &memory_update, frame);
}

// Write data after the last object
WriteFramePart(position, static_cast<u32>(frame.fifoData.size()), memoryUpdate, frame, info);

FlushWGP();

// Sleep while the GPU is active
@@ -321,36 +431,39 @@ void FifoPlayer::WriteFrame(const FifoFrameInfo& frame, const AnalyzedFrameInfo&
}
}

void FifoPlayer::WriteFramePart(u32 dataStart, u32 dataEnd, u32& nextMemUpdate,
const FifoFrameInfo& frame, const AnalyzedFrameInfo& info)
void FifoPlayer::WriteFramePart(const FramePart& part, u32* next_mem_update,
const FifoFrameInfo& frame)
{
const u8* const data = frame.fifoData.data();

while (nextMemUpdate < frame.memoryUpdates.size() && dataStart < dataEnd)
u32 data_start = part.m_start;
const u32 data_end = part.m_end;

while (*next_mem_update < frame.memoryUpdates.size() && data_start < data_end)
{
const MemoryUpdate& memUpdate = info.memoryUpdates[nextMemUpdate];
const MemoryUpdate& memUpdate = frame.memoryUpdates[*next_mem_update];

if (memUpdate.fifoPosition < dataEnd)
if (memUpdate.fifoPosition < data_end)
{
if (dataStart < memUpdate.fifoPosition)
if (data_start < memUpdate.fifoPosition)
{
WriteFifo(data, dataStart, memUpdate.fifoPosition);
dataStart = memUpdate.fifoPosition;
WriteFifo(data, data_start, memUpdate.fifoPosition);
data_start = memUpdate.fifoPosition;
}

WriteMemory(memUpdate);

++nextMemUpdate;
++*next_mem_update;
}
else
{
WriteFifo(data, dataStart, dataEnd);
dataStart = dataEnd;
WriteFifo(data, data_start, data_end);
data_start = data_end;
}
}

if (dataStart < dataEnd)
WriteFifo(data, dataStart, dataEnd);
if (data_start < data_end)
WriteFifo(data, data_start, data_end);
}

void FifoPlayer::WriteAllMemoryUpdates()
@@ -5,16 +5,18 @@

#include <functional>
#include <memory>
#include <set>
#include <string>
#include <vector>

#include "Common/Assert.h"
#include "Core/FifoPlayer/FifoDataFile.h"
#include "Core/FifoPlayer/FifoPlaybackAnalyzer.h"
#include "Core/PowerPC/CPUCoreBase.h"
#include "VideoCommon/CPMemory.h"
#include "VideoCommon/OpcodeDecoding.h"

class FifoDataFile;
struct MemoryUpdate;
struct AnalyzedFrameInfo;

namespace CPU
{
@@ -43,16 +45,46 @@ enum class State;
// 8. The output of fifoplayer would be wrong.

// To keep compatibility with old fifologs, we have this flag which signals texture cache to not
// bother
// hashing the memory and just assume the hash matched.
// bother hashing the memory and just assume the hash matched.
// At a later point proper efb copy support should be added to fiforecorder and this flag will
// change
// based on the version of the .dff file, but until then it will always be true when a fifolog is
// playing.
// change based on the version of the .dff file, but until then it will always be true when a
// fifolog is playing.

// Shitty global to fix a shitty problem
extern bool IsPlayingBackFifologWithBrokenEFBCopies;

enum class FramePartType
{
Commands,
PrimitiveData,
EFBCopy,
};

struct FramePart
{
constexpr FramePart(FramePartType type, u32 start, u32 end, const CPState& cpmem)
: m_type(type), m_start(start), m_end(end), m_cpmem(cpmem)
{
}

const FramePartType m_type;
const u32 m_start;
const u32 m_end;
const CPState m_cpmem;
};

struct AnalyzedFrameInfo
{
std::vector<FramePart> parts;
Common::EnumMap<u32, FramePartType::EFBCopy> part_type_counts;

void AddPart(FramePartType type, u32 start, u32 end, const CPState& cpmem)
{
parts.emplace_back(type, start, end, cpmem);
part_type_counts[type]++;
}
};

class FifoPlayer
{
public:
@@ -102,14 +134,12 @@ class FifoPlayer

private:
class CPUCore;

FifoPlayer();

CPU::State AdvanceFrame();

void WriteFrame(const FifoFrameInfo& frame, const AnalyzedFrameInfo& info);
void WriteFramePart(u32 dataStart, u32 dataEnd, u32& nextMemUpdate, const FifoFrameInfo& frame,
const AnalyzedFrameInfo& info);
void WriteFramePart(const FramePart& part, u32* next_mem_update, const FifoFrameInfo& frame);

void WriteAllMemoryUpdates();
void WriteMemory(const MemoryUpdate& memUpdate);

This file was deleted.

This file was deleted.

@@ -6,13 +6,168 @@
#include <algorithm>
#include <cstring>

#include "Common/Logging/Log.h"
#include "Common/MsgHandler.h"
#include "Common/Thread.h"

#include "Core/ConfigManager.h"
#include "Core/FifoPlayer/FifoAnalyzer.h"
#include "Core/FifoPlayer/FifoRecordAnalyzer.h"
#include "Core/HW/Memmap.h"

#include "VideoCommon/OpcodeDecoding.h"
#include "VideoCommon/XFStructs.h"

class FifoRecorder::FifoRecordAnalyzer : public OpcodeDecoder::Callback
{
public:
explicit FifoRecordAnalyzer(FifoRecorder* owner) : m_owner(owner) {}
explicit FifoRecordAnalyzer(FifoRecorder* owner, const u32* cpmem)
: m_owner(owner), m_cpmem(cpmem)
{
}

OPCODE_CALLBACK(void OnXF(u16 address, u8 count, const u8* data)) {}
OPCODE_CALLBACK(void OnCP(u8 command, u32 value)) { GetCPState().LoadCPReg(command, value); }
OPCODE_CALLBACK(void OnBP(u8 command, u32 value)) {}
OPCODE_CALLBACK(void OnIndexedLoad(CPArray array, u32 index, u16 address, u8 size));
OPCODE_CALLBACK(void OnPrimitiveCommand(OpcodeDecoder::Primitive primitive, u8 vat,
u32 vertex_size, u16 num_vertices,
const u8* vertex_data));
OPCODE_CALLBACK(void OnDisplayList(u32 address, u32 size))
{
WARN_LOG_FMT(VIDEO,
"Unhandled display list call {:08x} {:08x}; should have been inlined earlier",
address, size);
}
OPCODE_CALLBACK(void OnNop(u32 count)) {}
OPCODE_CALLBACK(void OnUnknown(u8 opcode, const u8* data)) {}

OPCODE_CALLBACK(void OnCommand(const u8* data, u32 size)) {}

OPCODE_CALLBACK(CPState& GetCPState()) { return m_cpmem; }

private:
void ProcessVertexComponent(CPArray array_index, VertexComponentFormat array_type,
u32 component_offset, u32 vertex_size, u16 num_vertices,
const u8* vertex_data);

FifoRecorder* const m_owner;
CPState m_cpmem;
};

void FifoRecorder::FifoRecordAnalyzer::OnIndexedLoad(CPArray array, u32 index, u16 address, u8 size)
{
const u32 load_address = m_cpmem.array_bases[array] + m_cpmem.array_strides[array] * index;

m_owner->UseMemory(load_address, size * sizeof(u32), MemoryUpdate::XF_DATA);
}

// TODO: The following code is copied with modifications from VertexLoaderBase.
// Surely there's a better solution?
#include "VideoCommon/VertexLoader_Color.h"
#include "VideoCommon/VertexLoader_Normal.h"
#include "VideoCommon/VertexLoader_Position.h"
#include "VideoCommon/VertexLoader_TextCoord.h"

void FifoRecorder::FifoRecordAnalyzer::OnPrimitiveCommand(OpcodeDecoder::Primitive primitive,
u8 vat, u32 vertex_size, u16 num_vertices,
const u8* vertex_data)
{
const auto& vtx_desc = m_cpmem.vtx_desc;
const auto& vtx_attr = m_cpmem.vtx_attr[vat];

u32 offset = 0;

if (vtx_desc.low.PosMatIdx)
offset++;
for (auto texmtxidx : vtx_desc.low.TexMatIdx)
{
if (texmtxidx)
offset++;
}
const u32 pos_size = VertexLoader_Position::GetSize(vtx_desc.low.Position, vtx_attr.g0.PosFormat,
vtx_attr.g0.PosElements);
ProcessVertexComponent(CPArray::Position, vtx_desc.low.Position, offset, vertex_size,
num_vertices, vertex_data);
offset += pos_size;

const u32 norm_size =
VertexLoader_Normal::GetSize(vtx_desc.low.Normal, vtx_attr.g0.NormalFormat,
vtx_attr.g0.NormalElements, vtx_attr.g0.NormalIndex3);
ProcessVertexComponent(CPArray::Normal, vtx_desc.low.Position, offset, vertex_size, num_vertices,
vertex_data);
offset += norm_size;

for (u32 i = 0; i < vtx_desc.low.Color.Size(); i++)
{
const u32 color_size =
VertexLoader_Color::GetSize(vtx_desc.low.Color[i], vtx_attr.GetColorFormat(i));
ProcessVertexComponent(CPArray::Color0 + i, vtx_desc.low.Position, offset, vertex_size,
num_vertices, vertex_data);
offset += color_size;
}
for (u32 i = 0; i < vtx_desc.high.TexCoord.Size(); i++)
{
const u32 tc_size = VertexLoader_TextCoord::GetSize(
vtx_desc.high.TexCoord[i], vtx_attr.GetTexFormat(i), vtx_attr.GetTexElements(i));
ProcessVertexComponent(CPArray::TexCoord0 + i, vtx_desc.low.Position, offset, vertex_size,
num_vertices, vertex_data);
offset += tc_size;
}

ASSERT(offset == vertex_size);
}

// If a component is indexed, the array it indexes into for data must be saved.
void FifoRecorder::FifoRecordAnalyzer::ProcessVertexComponent(CPArray array_index,
VertexComponentFormat array_type,
u32 component_offset, u32 vertex_size,
u16 num_vertices,
const u8* vertex_data)
{
// Skip if not indexed array
if (!IsIndexed(array_type))
return;

u16 max_index = 0;

// Determine min and max indices
if (array_type == VertexComponentFormat::Index8)
{
for (u16 vertex_num = 0; vertex_num < num_vertices; vertex_num++)
{
const u8 index = vertex_data[component_offset];
vertex_data += vertex_size;

// 0xff skips the vertex
if (index != 0xff)
{
if (index > max_index)
max_index = index;
}
}
}
else
{
for (u16 vertex_num = 0; vertex_num < num_vertices; vertex_num++)
{
const u16 index = Common::swap16(&vertex_data[component_offset]);
vertex_data += vertex_size;

// 0xffff skips the vertex
if (index != 0xffff)
{
if (index > max_index)
max_index = index;
}
}
}

const u32 array_start = m_cpmem.array_bases[array_index];
const u32 array_size = m_cpmem.array_strides[array_index] * (max_index + 1);

m_owner->UseMemory(array_start, array_size, MemoryUpdate::VERTEX_STREAM);
}

static FifoRecorder instance;

FifoRecorder::FifoRecorder() = default;
@@ -76,7 +231,7 @@ void FifoRecorder::WriteGPCommand(const u8* data, u32 size)
{
// Assumes data contains all information for the command
// Calls FifoRecorder::UseMemory
const u32 analyzed_size = FifoAnalyzer::AnalyzeCommand(data, FifoAnalyzer::DecodeMode::Record);
const u32 analyzed_size = OpcodeDecoder::RunCommand(data, size, *m_record_analyzer);

// Make sure FifoPlayer's command analyzer agrees about the size of the command.
if (analyzed_size != size)
@@ -211,7 +366,7 @@ void FifoRecorder::SetVideoMemory(const u32* bpMem, const u32* cpMem, const u32*
memcpy(m_File->GetTexMem(), texMem, FifoDataFile::TEX_MEM_SIZE);
}

FifoRecordAnalyzer::Initialize(cpMem);
m_record_analyzer = std::make_unique<FifoRecordAnalyzer>(this, cpMem);
}

bool FifoRecorder::IsRecording() const
@@ -8,6 +8,7 @@
#include <mutex>
#include <vector>

#include "Common/Assert.h"
#include "Core/FifoPlayer/FifoDataFile.h"

class FifoRecorder
@@ -47,6 +48,8 @@ class FifoRecorder
static FifoRecorder& GetInstance();

private:
class FifoRecordAnalyzer;

// Accessed from both GUI and video threads

std::recursive_mutex m_mutex;
@@ -65,6 +68,7 @@ class FifoRecorder
bool m_SkipFutureData = true;
bool m_FrameEnded = false;
FifoFrameInfo m_CurrentFrame;
std::unique_ptr<FifoRecordAnalyzer> m_record_analyzer;
std::vector<u8> m_FifoData;
std::vector<u8> m_Ram;
std::vector<u8> m_ExRam;
@@ -217,11 +217,8 @@
<ClInclude Include="Core\DSP\Jit\x64\DSPJitTables.h" />
<ClInclude Include="Core\DSP\LabelMap.h" />
<ClInclude Include="Core\DSPEmulator.h" />
<ClInclude Include="Core\FifoPlayer\FifoAnalyzer.h" />
<ClInclude Include="Core\FifoPlayer\FifoDataFile.h" />
<ClInclude Include="Core\FifoPlayer\FifoPlaybackAnalyzer.h" />
<ClInclude Include="Core\FifoPlayer\FifoPlayer.h" />
<ClInclude Include="Core\FifoPlayer\FifoRecordAnalyzer.h" />
<ClInclude Include="Core\FifoPlayer\FifoRecorder.h" />
<ClInclude Include="Core\FreeLookConfig.h" />
<ClInclude Include="Core\FreeLookManager.h" />
@@ -815,11 +812,8 @@
<ClCompile Include="Core\DSP\Jit\x64\DSPJitUtil.cpp" />
<ClCompile Include="Core\DSP\LabelMap.cpp" />
<ClCompile Include="Core\DSPEmulator.cpp" />
<ClCompile Include="Core\FifoPlayer\FifoAnalyzer.cpp" />
<ClCompile Include="Core\FifoPlayer\FifoDataFile.cpp" />
<ClCompile Include="Core\FifoPlayer\FifoPlaybackAnalyzer.cpp" />
<ClCompile Include="Core\FifoPlayer\FifoPlayer.cpp" />
<ClCompile Include="Core\FifoPlayer\FifoRecordAnalyzer.cpp" />
<ClCompile Include="Core\FifoPlayer\FifoRecorder.cpp" />
<ClCompile Include="Core\FreeLookConfig.cpp" />
<ClCompile Include="Core\FreeLookManager.cpp" />

Large diffs are not rendered by default.

@@ -58,15 +58,19 @@ class FIFOAnalyzer final : public QWidget

struct SearchResult
{
constexpr SearchResult(u32 frame, u32 object, u32 cmd)
: m_frame(frame), m_object(object), m_cmd(cmd)
constexpr SearchResult(u32 frame, u32 object_idx, u32 cmd)
: m_frame(frame), m_object_idx(object_idx), m_cmd(cmd)
{
}
const u32 m_frame;
const u32 m_object;
// Index in tree view. Does not correspond with object numbers or part numbers.
const u32 m_object_idx;
const u32 m_cmd;
};

// Offsets from the start of the first part in an object for each command within the currently
// selected object.
std::vector<int> m_object_data_offsets;

std::vector<SearchResult> m_search_results;
};
@@ -21,7 +21,6 @@

#include "Core/Core.h"
#include "Core/FifoPlayer/FifoDataFile.h"
#include "Core/FifoPlayer/FifoPlaybackAnalyzer.h"
#include "Core/FifoPlayer/FifoPlayer.h"
#include "Core/FifoPlayer/FifoRecorder.h"

@@ -151,18 +150,18 @@ void FIFOPlayerWindow::CreateWidgets()
layout->addWidget(recording_group);
layout->addWidget(m_button_box);

QWidget* main_widget = new QWidget(this);
main_widget->setLayout(layout);
m_main_widget = new QWidget(this);
m_main_widget->setLayout(layout);

auto* tab_widget = new QTabWidget(this);
m_tab_widget = new QTabWidget(this);

m_analyzer = new FIFOAnalyzer;

tab_widget->addTab(main_widget, tr("Play / Record"));
tab_widget->addTab(m_analyzer, tr("Analyze"));
m_tab_widget->addTab(m_main_widget, tr("Play / Record"));
m_tab_widget->addTab(m_analyzer, tr("Analyze"));

auto* tab_layout = new QVBoxLayout;
tab_layout->addWidget(tab_widget);
tab_layout->addWidget(m_tab_widget);

setLayout(tab_layout);
}
@@ -251,6 +250,8 @@ void FIFOPlayerWindow::OnEmulationStopped()
StopRecording();

UpdateControls();
// When emulation stops, switch away from the analyzer tab, as it no longer shows anything useful
m_tab_widget->setCurrentWidget(m_main_widget);
m_analyzer->Update();
}

@@ -12,6 +12,7 @@ class QDialogButtonBox;
class QLabel;
class QPushButton;
class QSpinBox;
class QTabWidget;
class FIFOAnalyzer;

class FIFOPlayerWindow : public QWidget
@@ -64,6 +65,9 @@ class FIFOPlayerWindow : public QWidget
QCheckBox* m_early_memory_updates;
QDialogButtonBox* m_button_box;

QWidget* m_main_widget;
QTabWidget* m_tab_widget;

FIFOAnalyzer* m_analyzer;
Core::State m_emu_state = Core::State::Uninitialized;
};
@@ -12,7 +12,6 @@ set_target_properties(dolphin-tool PROPERTIES OUTPUT_NAME dolphin-tool)

target_link_libraries(dolphin-tool
PRIVATE
core
discio
videocommon
cpp-optparse
@@ -3,6 +3,8 @@

#include <array>

#include "Common/EnumMap.h"

#include "VideoBackends/D3D/D3DBase.h"
#include "VideoBackends/D3D/D3DRender.h"
#include "VideoBackends/D3D/D3DState.h"
@@ -20,55 +22,75 @@ Renderer::CreateNativeVertexFormat(const PortableVertexDeclaration& vtx_decl)
return std::make_unique<D3DVertexFormat>(vtx_decl);
}

static const DXGI_FORMAT d3d_format_lookup[5 * 4 * 2] = {
// float formats
DXGI_FORMAT_R8_UNORM,
DXGI_FORMAT_R8_SNORM,
DXGI_FORMAT_R16_UNORM,
DXGI_FORMAT_R16_SNORM,
DXGI_FORMAT_R32_FLOAT,
DXGI_FORMAT_R8G8_UNORM,
DXGI_FORMAT_R8G8_SNORM,
DXGI_FORMAT_R16G16_UNORM,
DXGI_FORMAT_R16G16_SNORM,
DXGI_FORMAT_R32G32_FLOAT,
DXGI_FORMAT_UNKNOWN,
DXGI_FORMAT_UNKNOWN,
DXGI_FORMAT_UNKNOWN,
DXGI_FORMAT_UNKNOWN,
DXGI_FORMAT_R32G32B32_FLOAT,
DXGI_FORMAT_R8G8B8A8_UNORM,
DXGI_FORMAT_R8G8B8A8_SNORM,
DXGI_FORMAT_R16G16B16A16_UNORM,
DXGI_FORMAT_R16G16B16A16_SNORM,
DXGI_FORMAT_R32G32B32A32_FLOAT,

// integer formats
DXGI_FORMAT_R8_UINT,
DXGI_FORMAT_R8_SINT,
DXGI_FORMAT_R16_UINT,
DXGI_FORMAT_R16_SINT,
DXGI_FORMAT_UNKNOWN,
DXGI_FORMAT_R8G8_UINT,
DXGI_FORMAT_R8G8_SINT,
DXGI_FORMAT_R16G16_UINT,
DXGI_FORMAT_R16G16_SINT,
DXGI_FORMAT_UNKNOWN,
DXGI_FORMAT_UNKNOWN,
DXGI_FORMAT_UNKNOWN,
DXGI_FORMAT_UNKNOWN,
DXGI_FORMAT_UNKNOWN,
DXGI_FORMAT_UNKNOWN,
DXGI_FORMAT_R8G8B8A8_UINT,
DXGI_FORMAT_R8G8B8A8_SINT,
DXGI_FORMAT_R16G16B16A16_UINT,
DXGI_FORMAT_R16G16B16A16_SINT,
DXGI_FORMAT_UNKNOWN,
};

DXGI_FORMAT VarToD3D(VarType t, int size, bool integer)
DXGI_FORMAT VarToD3D(ComponentFormat t, int size, bool integer)
{
DXGI_FORMAT retval = d3d_format_lookup[(int)t + 5 * (size - 1) + 5 * 4 * (int)integer];
using FormatMap = Common::EnumMap<DXGI_FORMAT, ComponentFormat::Float>;
static constexpr auto f = [](FormatMap a) { return a; }; // Deduction helper

static constexpr std::array<FormatMap, 4> d3d_float_format_lookup = {
f({
DXGI_FORMAT_R8_UNORM,
DXGI_FORMAT_R8_SNORM,
DXGI_FORMAT_R16_UNORM,
DXGI_FORMAT_R16_SNORM,
DXGI_FORMAT_R32_FLOAT,
}),
f({
DXGI_FORMAT_R8G8_UNORM,
DXGI_FORMAT_R8G8_SNORM,
DXGI_FORMAT_R16G16_UNORM,
DXGI_FORMAT_R16G16_SNORM,
DXGI_FORMAT_R32G32_FLOAT,
}),
f({
DXGI_FORMAT_UNKNOWN,
DXGI_FORMAT_UNKNOWN,
DXGI_FORMAT_UNKNOWN,
DXGI_FORMAT_UNKNOWN,
DXGI_FORMAT_R32G32B32_FLOAT,
}),
f({
DXGI_FORMAT_R8G8B8A8_UNORM,
DXGI_FORMAT_R8G8B8A8_SNORM,
DXGI_FORMAT_R16G16B16A16_UNORM,
DXGI_FORMAT_R16G16B16A16_SNORM,
DXGI_FORMAT_R32G32B32A32_FLOAT,
}),
};

static constexpr std::array<FormatMap, 4> d3d_integer_format_lookup = {
f({
DXGI_FORMAT_R8_UINT,
DXGI_FORMAT_R8_SINT,
DXGI_FORMAT_R16_UINT,
DXGI_FORMAT_R16_SINT,
DXGI_FORMAT_UNKNOWN,
}),
f({
DXGI_FORMAT_R8G8_UINT,
DXGI_FORMAT_R8G8_SINT,
DXGI_FORMAT_R16G16_UINT,
DXGI_FORMAT_R16G16_SINT,
DXGI_FORMAT_UNKNOWN,
}),
f({
DXGI_FORMAT_UNKNOWN,
DXGI_FORMAT_UNKNOWN,
DXGI_FORMAT_UNKNOWN,
DXGI_FORMAT_UNKNOWN,
DXGI_FORMAT_UNKNOWN,
}),
f({
DXGI_FORMAT_R8G8B8A8_UINT,
DXGI_FORMAT_R8G8B8A8_SINT,
DXGI_FORMAT_R16G16B16A16_UINT,
DXGI_FORMAT_R16G16B16A16_SINT,
DXGI_FORMAT_UNKNOWN,
}),
};

DXGI_FORMAT retval =
integer ? d3d_integer_format_lookup[size - 1][t] : d3d_float_format_lookup[size - 1][t];
if (retval == DXGI_FORMAT_UNKNOWN)
{
PanicAlertFmt("VarToD3D: Invalid type/size combo {}, {}, {}", t, size, integer);
@@ -4,39 +4,43 @@
#include "VideoBackends/D3D12/DX12VertexFormat.h"

#include "Common/Assert.h"
#include "Common/EnumMap.h"

#include "VideoCommon/VertexLoaderManager.h"
#include "VideoCommon/VertexShaderGen.h"

namespace DX12
{
static DXGI_FORMAT VarToDXGIFormat(VarType t, u32 components, bool integer)
static DXGI_FORMAT VarToDXGIFormat(ComponentFormat t, u32 components, bool integer)
{
using ComponentArray = std::array<DXGI_FORMAT, 4>;
static constexpr auto f = [](ComponentArray a) { return a; }; // Deduction helper

// NOTE: 3-component formats are not valid.
static const DXGI_FORMAT float_type_lookup[][4] = {
{DXGI_FORMAT_R8_UNORM, DXGI_FORMAT_R8G8_UNORM, DXGI_FORMAT_R8G8B8A8_UNORM,
DXGI_FORMAT_R8G8B8A8_UNORM}, // VAR_UNSIGNED_BYTE
{DXGI_FORMAT_R8_SNORM, DXGI_FORMAT_R8G8_SNORM, DXGI_FORMAT_R8G8B8A8_SNORM,
DXGI_FORMAT_R8G8B8A8_SNORM}, // VAR_BYTE
{DXGI_FORMAT_R16_UNORM, DXGI_FORMAT_R16G16_UNORM, DXGI_FORMAT_R16G16B16A16_UNORM,
DXGI_FORMAT_R16G16B16A16_UNORM}, // VAR_UNSIGNED_SHORT
{DXGI_FORMAT_R16_SNORM, DXGI_FORMAT_R16G16_SNORM, DXGI_FORMAT_R16G16B16A16_SNORM,
DXGI_FORMAT_R16G16B16A16_SNORM}, // VAR_SHORT
{DXGI_FORMAT_R32_FLOAT, DXGI_FORMAT_R32G32_FLOAT, DXGI_FORMAT_R32G32B32_FLOAT,
DXGI_FORMAT_R32G32B32A32_FLOAT} // VAR_FLOAT
static constexpr Common::EnumMap<ComponentArray, ComponentFormat::Float> float_type_lookup = {
f({DXGI_FORMAT_R8_UNORM, DXGI_FORMAT_R8G8_UNORM, DXGI_FORMAT_R8G8B8A8_UNORM,
DXGI_FORMAT_R8G8B8A8_UNORM}), // UByte
f({DXGI_FORMAT_R8_SNORM, DXGI_FORMAT_R8G8_SNORM, DXGI_FORMAT_R8G8B8A8_SNORM,
DXGI_FORMAT_R8G8B8A8_SNORM}), // Byte
f({DXGI_FORMAT_R16_UNORM, DXGI_FORMAT_R16G16_UNORM, DXGI_FORMAT_R16G16B16A16_UNORM,
DXGI_FORMAT_R16G16B16A16_UNORM}), // UShort
f({DXGI_FORMAT_R16_SNORM, DXGI_FORMAT_R16G16_SNORM, DXGI_FORMAT_R16G16B16A16_SNORM,
DXGI_FORMAT_R16G16B16A16_SNORM}), // Short
f({DXGI_FORMAT_R32_FLOAT, DXGI_FORMAT_R32G32_FLOAT, DXGI_FORMAT_R32G32B32_FLOAT,
DXGI_FORMAT_R32G32B32A32_FLOAT}), // Float
};

static const DXGI_FORMAT integer_type_lookup[][4] = {
{DXGI_FORMAT_R8_UINT, DXGI_FORMAT_R8G8_UINT, DXGI_FORMAT_R8G8B8A8_UINT,
DXGI_FORMAT_R8G8B8A8_UINT}, // VAR_UNSIGNED_BYTE
{DXGI_FORMAT_R8_SINT, DXGI_FORMAT_R8G8_SINT, DXGI_FORMAT_R8G8B8A8_SINT,
DXGI_FORMAT_R8G8B8A8_SINT}, // VAR_BYTE
{DXGI_FORMAT_R16_UINT, DXGI_FORMAT_R16G16_UINT, DXGI_FORMAT_R16G16B16A16_UINT,
DXGI_FORMAT_R16G16B16A16_UINT}, // VAR_UNSIGNED_SHORT
{DXGI_FORMAT_R16_SINT, DXGI_FORMAT_R16G16_SINT, DXGI_FORMAT_R16G16B16A16_SINT,
DXGI_FORMAT_R16G16B16A16_SINT}, // VAR_SHORT
{DXGI_FORMAT_R32_FLOAT, DXGI_FORMAT_R32G32_FLOAT, DXGI_FORMAT_R32G32B32_FLOAT,
DXGI_FORMAT_R32G32B32A32_FLOAT} // VAR_FLOAT
static constexpr Common::EnumMap<ComponentArray, ComponentFormat::Float> integer_type_lookup = {
f({DXGI_FORMAT_R8_UINT, DXGI_FORMAT_R8G8_UINT, DXGI_FORMAT_R8G8B8A8_UINT,
DXGI_FORMAT_R8G8B8A8_UINT}), // UByte
f({DXGI_FORMAT_R8_SINT, DXGI_FORMAT_R8G8_SINT, DXGI_FORMAT_R8G8B8A8_SINT,
DXGI_FORMAT_R8G8B8A8_SINT}), // Byte
f({DXGI_FORMAT_R16_UINT, DXGI_FORMAT_R16G16_UINT, DXGI_FORMAT_R16G16B16A16_UINT,
DXGI_FORMAT_R16G16B16A16_UINT}), // UShort
f({DXGI_FORMAT_R16_SINT, DXGI_FORMAT_R16G16_SINT, DXGI_FORMAT_R16G16B16A16_SINT,
DXGI_FORMAT_R16G16B16A16_SINT}), // Short
f({DXGI_FORMAT_R32_FLOAT, DXGI_FORMAT_R32G32_FLOAT, DXGI_FORMAT_R32G32B32_FLOAT,
DXGI_FORMAT_R32G32B32A32_FLOAT}), // Float
};

ASSERT(components > 0 && components <= 4);
@@ -2,6 +2,7 @@
// SPDX-License-Identifier: GPL-2.0-or-later

#include "Common/CommonTypes.h"
#include "Common/EnumMap.h"
#include "Common/GL/GLUtil.h"
#include "Common/MsgHandler.h"

@@ -23,10 +24,11 @@ Renderer::CreateNativeVertexFormat(const PortableVertexDeclaration& vtx_decl)
return std::make_unique<GLVertexFormat>(vtx_decl);
}

static inline GLuint VarToGL(VarType t)
static inline GLuint VarToGL(ComponentFormat t)
{
static const GLuint lookup[5] = {GL_UNSIGNED_BYTE, GL_BYTE, GL_UNSIGNED_SHORT, GL_SHORT,
GL_FLOAT};
static constexpr Common::EnumMap<GLuint, ComponentFormat::Float> lookup = {
GL_UNSIGNED_BYTE, GL_BYTE, GL_UNSIGNED_SHORT, GL_SHORT, GL_FLOAT,
};
return lookup[t];
}

@@ -36,28 +36,29 @@ void SWVertexLoader::DrawCurrentBatch(u32 base_index, u32 num_indices, u32 base_
{
DebugUtil::OnObjectBegin();

u8 primitiveType = 0;
using OpcodeDecoder::Primitive;
Primitive primitive_type = Primitive::GX_DRAW_QUADS;
switch (m_current_primitive_type)
{
case PrimitiveType::Points:
primitiveType = OpcodeDecoder::GX_DRAW_POINTS;
primitive_type = Primitive::GX_DRAW_POINTS;
break;
case PrimitiveType::Lines:
primitiveType = OpcodeDecoder::GX_DRAW_LINES;
primitive_type = Primitive::GX_DRAW_LINES;
break;
case PrimitiveType::Triangles:
primitiveType = OpcodeDecoder::GX_DRAW_TRIANGLES;
primitive_type = Primitive::GX_DRAW_TRIANGLES;
break;
case PrimitiveType::TriangleStrip:
primitiveType = OpcodeDecoder::GX_DRAW_TRIANGLE_STRIP;
primitive_type = Primitive::GX_DRAW_TRIANGLE_STRIP;
break;
}

// Flush bounding box here because software overrides the base function
if (g_renderer->IsBBoxEnabled())
g_renderer->BBoxFlush();

m_setup_unit.Init(primitiveType);
m_setup_unit.Init(primitive_type);

// set all states with are stored within video sw
for (int i = 0; i < 4; i++)
@@ -74,7 +75,7 @@ void SWVertexLoader::DrawCurrentBatch(u32 base_index, u32 num_indices, u32 base_
memset(static_cast<void*>(&m_vertex), 0, sizeof(m_vertex));

// parse the videocommon format to our own struct format (m_vertex)
SetFormat(g_main_cp_state.last_id, primitiveType);
SetFormat();
ParseVertex(VertexLoaderManager::GetCurrentVertexFormat()->GetVertexDeclaration(), index);

// transform this vertex so that it can be used for rasterization (outVertex)
@@ -98,7 +99,7 @@ void SWVertexLoader::DrawCurrentBatch(u32 base_index, u32 num_indices, u32 base_
DebugUtil::OnObjectEnd();
}

void SWVertexLoader::SetFormat(u8 attributeIndex, u8 primitiveType)
void SWVertexLoader::SetFormat()
{
// matrix index from xf regs or cp memory?
if (xfmem.MatrixIndexA.PosNormalMtxIdx != g_main_cp_state.matrix_index_a.PosNormalMtxIdx ||
@@ -144,32 +145,32 @@ static void ReadVertexAttribute(T* dst, DataReader src, const AttributeFormat& f
if (format.enable)
{
src.Skip(format.offset);
src.Skip(base_component * (1 << (format.type >> 1)));
src.Skip(base_component * GetElementSize(format.type));

int i;
for (i = 0; i < std::min(format.components - base_component, components); i++)
{
int i_dst = reverse ? components - i - 1 : i;
switch (format.type)
{
case VAR_UNSIGNED_BYTE:
case ComponentFormat::UByte:
dst[i_dst] = ReadNormalized<T, u8>(src.Read<u8, swap>());
break;
case VAR_BYTE:
case ComponentFormat::Byte:
dst[i_dst] = ReadNormalized<T, s8>(src.Read<s8, swap>());
break;
case VAR_UNSIGNED_SHORT:
case ComponentFormat::UShort:
dst[i_dst] = ReadNormalized<T, u16>(src.Read<u16, swap>());
break;
case VAR_SHORT:
case ComponentFormat::Short:
dst[i_dst] = ReadNormalized<T, s16>(src.Read<s16, swap>());
break;
case VAR_FLOAT:
case ComponentFormat::Float:
dst[i_dst] = ReadNormalized<T, float>(src.Read<float, swap>());
break;
}

ASSERT_MSG(VIDEO, !format.integer || format.type != VAR_FLOAT,
ASSERT_MSG(VIDEO, !format.integer || format.type != ComponentFormat::Float,
"only non-float values are allowed to be streamed as integer");
}
for (; i < components; i++)
@@ -22,7 +22,7 @@ class SWVertexLoader final : public VertexManagerBase
protected:
void DrawCurrentBatch(u32 base_index, u32 num_indices, u32 base_vertex) override;

void SetFormat(u8 attributeIndex, u8 primitiveType);
void SetFormat();
void ParseVertex(const PortableVertexDeclaration& vdec, int index);

InputVertexData m_vertex{};
@@ -9,9 +9,9 @@
#include "VideoBackends/Software/Clipper.h"
#include "VideoCommon/OpcodeDecoding.h"

void SetupUnit::Init(u8 primitiveType)
void SetupUnit::Init(OpcodeDecoder::Primitive primitive_type)
{
m_PrimType = primitiveType;
m_PrimType = primitive_type;

m_VertexCounter = 0;
m_VertPointer[0] = &m_Vertices[0];
@@ -28,31 +28,32 @@ OutputVertexData* SetupUnit::GetVertex()

void SetupUnit::SetupVertex()
{
using OpcodeDecoder::Primitive;
switch (m_PrimType)
{
case OpcodeDecoder::GX_DRAW_QUADS:
case Primitive::GX_DRAW_QUADS:
SetupQuad();
break;
case OpcodeDecoder::GX_DRAW_QUADS_2:
case Primitive::GX_DRAW_QUADS_2:
WARN_LOG_FMT(VIDEO, "Non-standard primitive drawing command GL_DRAW_QUADS_2");
SetupQuad();
break;
case OpcodeDecoder::GX_DRAW_TRIANGLES:
case Primitive::GX_DRAW_TRIANGLES:
SetupTriangle();
break;
case OpcodeDecoder::GX_DRAW_TRIANGLE_STRIP:
case Primitive::GX_DRAW_TRIANGLE_STRIP:
SetupTriStrip();
break;
case OpcodeDecoder::GX_DRAW_TRIANGLE_FAN:
case Primitive::GX_DRAW_TRIANGLE_FAN:
SetupTriFan();
break;
case OpcodeDecoder::GX_DRAW_LINES:
case Primitive::GX_DRAW_LINES:
SetupLine();
break;
case OpcodeDecoder::GX_DRAW_LINE_STRIP:
case Primitive::GX_DRAW_LINE_STRIP:
SetupLineStrip();
break;
case OpcodeDecoder::GX_DRAW_POINTS:
case Primitive::GX_DRAW_POINTS:
SetupPoint();
break;
}
@@ -6,9 +6,14 @@
#include "Common/CommonTypes.h"
#include "VideoBackends/Software/NativeVertexFormat.h"

namespace OpcodeDecoder
{
enum class Primitive : u8;
}

class SetupUnit
{
u8 m_PrimType = 0;
OpcodeDecoder::Primitive m_PrimType{};
int m_VertexCounter = 0;

OutputVertexData m_Vertices[3];
@@ -24,7 +29,7 @@ class SetupUnit
void SetupPoint();

public:
void Init(u8 primitiveType);
void Init(OpcodeDecoder::Primitive primitive_type);

OutputVertexData* GetVertex();

@@ -4,6 +4,7 @@
#include "VideoBackends/Vulkan/VKVertexFormat.h"

#include "Common/Assert.h"
#include "Common/EnumMap.h"

#include "VideoBackends/Vulkan/CommandBufferManager.h"
#include "VideoBackends/Vulkan/ObjectCache.h"
@@ -13,32 +14,35 @@

namespace Vulkan
{
static VkFormat VarToVkFormat(VarType t, uint32_t components, bool integer)
static VkFormat VarToVkFormat(ComponentFormat t, uint32_t components, bool integer)
{
static const VkFormat float_type_lookup[][4] = {
{VK_FORMAT_R8_UNORM, VK_FORMAT_R8G8_UNORM, VK_FORMAT_R8G8B8_UNORM,
VK_FORMAT_R8G8B8A8_UNORM}, // VAR_UNSIGNED_BYTE
{VK_FORMAT_R8_SNORM, VK_FORMAT_R8G8_SNORM, VK_FORMAT_R8G8B8_SNORM,
VK_FORMAT_R8G8B8A8_SNORM}, // VAR_BYTE
{VK_FORMAT_R16_UNORM, VK_FORMAT_R16G16_UNORM, VK_FORMAT_R16G16B16_UNORM,
VK_FORMAT_R16G16B16A16_UNORM}, // VAR_UNSIGNED_SHORT
{VK_FORMAT_R16_SNORM, VK_FORMAT_R16G16_SNORM, VK_FORMAT_R16G16B16_SNORM,
VK_FORMAT_R16G16B16A16_SNORM}, // VAR_SHORT
{VK_FORMAT_R32_SFLOAT, VK_FORMAT_R32G32_SFLOAT, VK_FORMAT_R32G32B32_SFLOAT,
VK_FORMAT_R32G32B32A32_SFLOAT} // VAR_FLOAT
using ComponentArray = std::array<VkFormat, 4>;
static constexpr auto f = [](ComponentArray a) { return a; }; // Deduction helper

static constexpr Common::EnumMap<ComponentArray, ComponentFormat::Float> float_type_lookup = {
f({VK_FORMAT_R8_UNORM, VK_FORMAT_R8G8_UNORM, VK_FORMAT_R8G8B8_UNORM,
VK_FORMAT_R8G8B8A8_UNORM}), // UByte
f({VK_FORMAT_R8_SNORM, VK_FORMAT_R8G8_SNORM, VK_FORMAT_R8G8B8_SNORM,
VK_FORMAT_R8G8B8A8_SNORM}), // Byte
f({VK_FORMAT_R16_UNORM, VK_FORMAT_R16G16_UNORM, VK_FORMAT_R16G16B16_UNORM,
VK_FORMAT_R16G16B16A16_UNORM}), // UShort
f({VK_FORMAT_R16_SNORM, VK_FORMAT_R16G16_SNORM, VK_FORMAT_R16G16B16_SNORM,
VK_FORMAT_R16G16B16A16_SNORM}), // Short
f({VK_FORMAT_R32_SFLOAT, VK_FORMAT_R32G32_SFLOAT, VK_FORMAT_R32G32B32_SFLOAT,
VK_FORMAT_R32G32B32A32_SFLOAT}), // Float
};

static const VkFormat integer_type_lookup[][4] = {
{VK_FORMAT_R8_UINT, VK_FORMAT_R8G8_UINT, VK_FORMAT_R8G8B8_UINT,
VK_FORMAT_R8G8B8A8_UINT}, // VAR_UNSIGNED_BYTE
{VK_FORMAT_R8_SINT, VK_FORMAT_R8G8_SINT, VK_FORMAT_R8G8B8_SINT,
VK_FORMAT_R8G8B8A8_SINT}, // VAR_BYTE
{VK_FORMAT_R16_UINT, VK_FORMAT_R16G16_UINT, VK_FORMAT_R16G16B16_UINT,
VK_FORMAT_R16G16B16A16_UINT}, // VAR_UNSIGNED_SHORT
{VK_FORMAT_R16_SINT, VK_FORMAT_R16G16_SINT, VK_FORMAT_R16G16B16_SINT,
VK_FORMAT_R16G16B16A16_SINT}, // VAR_SHORT
{VK_FORMAT_R32_SFLOAT, VK_FORMAT_R32G32_SFLOAT, VK_FORMAT_R32G32B32_SFLOAT,
VK_FORMAT_R32G32B32A32_SFLOAT} // VAR_FLOAT
static constexpr Common::EnumMap<ComponentArray, ComponentFormat::Float> integer_type_lookup = {
f({VK_FORMAT_R8_UINT, VK_FORMAT_R8G8_UINT, VK_FORMAT_R8G8B8_UINT,
VK_FORMAT_R8G8B8A8_UINT}), // UByte
f({VK_FORMAT_R8_SINT, VK_FORMAT_R8G8_SINT, VK_FORMAT_R8G8B8_SINT,
VK_FORMAT_R8G8B8A8_SINT}), // Byte
f({VK_FORMAT_R16_UINT, VK_FORMAT_R16G16_UINT, VK_FORMAT_R16G16B16_UINT,
VK_FORMAT_R16G16B16A16_UINT}), // UShort
f({VK_FORMAT_R16_SINT, VK_FORMAT_R16G16_SINT, VK_FORMAT_R16G16B16_SINT,
VK_FORMAT_R16G16B16A16_SINT}), // Short
f({VK_FORMAT_R32_SFLOAT, VK_FORMAT_R32G32_SFLOAT, VK_FORMAT_R32G32B32_SFLOAT,
VK_FORMAT_R32G32B32A32_SFLOAT}), // Float
};

ASSERT(components > 0 && components <= 4);
@@ -258,7 +258,7 @@ enum class TevBias : u32
{
Zero = 0,
AddHalf = 1,
Subhalf = 2,
SubHalf = 2,
Compare = 3
};
template <>
@@ -491,6 +491,94 @@ struct fmt::formatter<TevStageCombiner::ColorCombiner>
template <typename FormatContext>
auto format(const TevStageCombiner::ColorCombiner& cc, FormatContext& ctx)
{
auto out = ctx.out();
if (cc.bias != TevBias::Compare)
{
// Generate an equation view, simplifying out addition of zero and multiplication by 1
// dest = (d (OP) ((1 - c)*a + c*b) + bias) * scale
// or equivalently and more readably when the terms are not constants:
// dest = (d (OP) lerp(a, b, c) + bias) * scale
// Note that lerping is more complex than the first form shows; see PixelShaderGen's
// WriteTevRegular for more details.

static constexpr Common::EnumMap<const char*, TevColorArg::Zero> alt_names = {
"prev.rgb", "prev.aaa", "c0.rgb", "c0.aaa", "c1.rgb", "c1.aaa", "c2.rgb", "c2.aaa",
"tex.rgb", "tex.aaa", "ras.rgb", "ras.aaa", "1", ".5", "konst.rgb", "0",
};

const bool has_d = cc.d != TevColorArg::Zero;
// If c is one, (1 - c) is zero, so (1-c)*a is zero
const bool has_ac = cc.a != TevColorArg::Zero && cc.c != TevColorArg::One;
// If either b or c is zero, b*c is zero
const bool has_bc = cc.b != TevColorArg::Zero && cc.c != TevColorArg::Zero;
const bool has_bias = cc.bias != TevBias::Zero; // != Compare is already known
const bool has_scale = cc.scale != TevScale::Scale1;

const char op = (cc.op == TevOp::Sub ? '-' : '+');

if (cc.dest == TevOutput::Prev)
out = format_to(out, "dest.rgb = ");
else
out = format_to(out, "{:n}.rgb = ", cc.dest);

if (has_scale)
out = format_to(out, "(");
if (has_d)
out = format_to(out, "{}", alt_names[cc.d]);
if (has_ac || has_bc)
{
if (has_d)
out = format_to(out, " {} ", op);
else if (cc.op == TevOp::Sub)
out = format_to(out, "{}", op);
if (has_ac && has_bc)
{
if (cc.c == TevColorArg::Half)
{
// has_a and has_b imply that c is not Zero or One, and Half is the only remaining
// numeric constant. This results in an average.
out = format_to(out, "({} + {})/2", alt_names[cc.a], alt_names[cc.b]);
}
else
{
out = format_to(out, "lerp({}, {}, {})", alt_names[cc.a], alt_names[cc.b],
alt_names[cc.c]);
}
}
else if (has_ac)
{
if (cc.c == TevColorArg::Zero)
out = format_to(out, "{}", alt_names[cc.a]);
else if (cc.c == TevColorArg::Half) // 1 - .5 is .5
out = format_to(out, ".5*{}", alt_names[cc.a]);
else
out = format_to(out, "(1 - {})*{}", alt_names[cc.c], alt_names[cc.a]);
}
else // has_bc
{
if (cc.c == TevColorArg::One)
out = format_to(out, "{}", alt_names[cc.b]);
else
out = format_to(out, "{}*{}", alt_names[cc.c], alt_names[cc.b]);
}
}
if (has_bias)
{
if (has_ac || has_bc || has_d)
out = format_to(out, cc.bias == TevBias::AddHalf ? " + .5" : " - .5");
else
out = format_to(out, cc.bias == TevBias::AddHalf ? ".5" : "-.5");
}
else
{
// If nothing has been written so far, add a zero
if (!(has_ac || has_bc || has_d))
out = format_to(out, "0");
}
if (has_scale)
out = format_to(out, ") * {:n}", cc.scale);
out = format_to(out, "\n\n");
}
return format_to(ctx.out(),
"a: {}\n"
"b: {}\n"
@@ -512,7 +600,80 @@ struct fmt::formatter<TevStageCombiner::AlphaCombiner>
template <typename FormatContext>
auto format(const TevStageCombiner::AlphaCombiner& ac, FormatContext& ctx)
{
return format_to(ctx.out(),
auto out = ctx.out();
if (ac.bias != TevBias::Compare)
{
// Generate an equation view, simplifying out addition of zero and multiplication by 1
// dest = (d (OP) ((1 - c)*a + c*b) + bias) * scale
// or equivalently and more readably when the terms are not constants:
// dest = (d (OP) lerp(a, b, c) + bias) * scale
// Note that lerping is more complex than the first form shows; see PixelShaderGen's
// WriteTevRegular for more details.

// We don't need an alt_names map here, unlike the color combiner, as the only special term is
// Zero, and we we filter that out below. However, we do need to append ".a" to all
// parameters, to make it explicit that these are operations on the alpha term instead of the
// 4-element vector. We also need to use the :n specifier so that the numeric ID isn't shown.

const bool has_d = ac.d != TevAlphaArg::Zero;
// There is no c value for alpha that results in (1 - c) always being zero
const bool has_ac = ac.a != TevAlphaArg::Zero;
// If either b or c is zero, b*c is zero
const bool has_bc = ac.b != TevAlphaArg::Zero && ac.c != TevAlphaArg::Zero;
const bool has_bias = ac.bias != TevBias::Zero; // != Compare is already known
const bool has_scale = ac.scale != TevScale::Scale1;

const char op = (ac.op == TevOp::Sub ? '-' : '+');

if (ac.dest == TevOutput::Prev)
out = format_to(out, "dest.a = ");
else
out = format_to(out, "{:n}.a = ", ac.dest);

if (has_scale)
out = format_to(out, "(");
if (has_d)
out = format_to(out, "{:n}.a", ac.d);
if (has_ac || has_bc)
{
if (has_d)
out = format_to(out, " {} ", op);
else if (ac.op == TevOp::Sub)
out = format_to(out, "{}", op);
if (has_ac && has_bc)
{
out = format_to(out, "lerp({:n}.a, {:n}.a, {:n}.a)", ac.a, ac.b, ac.c);
}
else if (has_ac)
{
if (ac.c == TevAlphaArg::Zero)
out = format_to(out, "{:n}.a", ac.a);
else
out = format_to(out, "(1 - {:n}.a)*{:n}.a", ac.c, ac.a);
}
else // has_bc
{
out = format_to(out, "{:n}.a*{:n}.a", ac.c, ac.b);
}
}
if (has_bias)
{
if (has_ac || has_bc || has_d)
out = format_to(out, ac.bias == TevBias::AddHalf ? " + .5" : " - .5");
else
out = format_to(out, ac.bias == TevBias::AddHalf ? ".5" : "-.5");
}
else
{
// If nothing has been written so far, add a zero
if (!(has_ac || has_bc || has_d))
out = format_to(out, "0");
}
if (has_scale)
out = format_to(out, ") * {:n}", ac.scale);
out = format_to(out, "\n\n");
}
return format_to(out,
"a: {}\n"
"b: {}\n"
"c: {}\n"
@@ -756,14 +917,14 @@ struct fmt::formatter<LODType> : EnumFormatter<LODType::Diagonal>
formatter() : EnumFormatter({"Edge LOD", "Diagonal LOD"}) {}
};

enum class MaxAnsio
enum class MaxAniso
{
One = 0,
Two = 1,
Four = 2,
};
template <>
struct fmt::formatter<MaxAnsio> : EnumFormatter<MaxAnsio::Four>
struct fmt::formatter<MaxAniso> : EnumFormatter<MaxAniso::Four>
{
formatter() : EnumFormatter({"1", "2", "4"}) {}
};
@@ -777,7 +938,7 @@ union TexMode0
BitField<7, 1, FilterMode> min_filter;
BitField<8, 1, LODType> diag_lod;
BitField<9, 8, s32> lod_bias;
BitField<19, 2, MaxAnsio> max_aniso;
BitField<19, 2, MaxAniso> max_aniso;
BitField<21, 1, bool, u32> lod_clamp;
u32 hex;
};
@@ -2205,7 +2366,7 @@ struct BPMemory

extern BPMemory bpmem;

void LoadBPReg(u32 value0, int cycles_into_future);
void LoadBPRegPreprocess(u32 value0, int cycles_into_future);
void LoadBPReg(u8 reg, u32 value, int cycles_into_future);
void LoadBPRegPreprocess(u8 reg, u32 value, int cycles_into_future);

std::pair<std::string, std::string> GetBPRegInfo(u8 cmd, u32 cmddata);
@@ -716,29 +716,27 @@ static void BPWritten(const BPCmd& bp, int cycles_into_future)
bp.newvalue);
}

// Call browser: OpcodeDecoding.cpp ExecuteDisplayList > Decode() > LoadBPReg()
void LoadBPReg(u32 value0, int cycles_into_future)
// Call browser: OpcodeDecoding.cpp RunCallback::OnBP()
void LoadBPReg(u8 reg, u32 value, int cycles_into_future)
{
int regNum = value0 >> 24;
int oldval = ((u32*)&bpmem)[regNum];
int newval = (oldval & ~bpmem.bpMask) | (value0 & bpmem.bpMask);
int oldval = ((u32*)&bpmem)[reg];
int newval = (oldval & ~bpmem.bpMask) | (value & bpmem.bpMask);
int changes = (oldval ^ newval) & 0xFFFFFF;

BPCmd bp = {regNum, changes, newval};
BPCmd bp = {reg, changes, newval};

// Reset the mask register if we're not trying to set it ourselves.
if (regNum != BPMEM_BP_MASK)
if (reg != BPMEM_BP_MASK)
bpmem.bpMask = 0xFFFFFF;

BPWritten(bp, cycles_into_future);
}

void LoadBPRegPreprocess(u32 value0, int cycles_into_future)
void LoadBPRegPreprocess(u8 reg, u32 value, int cycles_into_future)
{
int regNum = value0 >> 24;
// masking could hypothetically be a problem
u32 newval = value0 & 0xffffff;
switch (regNum)
// masking via BPMEM_BP_MASK could hypothetically be a problem
u32 newval = value & 0xffffff;
switch (reg)
{
case BPMEM_SETDRAWDONE:
if ((newval & 0xff) == 0x02)
@@ -2,7 +2,14 @@
// SPDX-License-Identifier: GPL-2.0-or-later

#include "VideoCommon/CPMemory.h"

#include <cstring>

#include "Common/ChunkFile.h"
#include "Common/Logging/Log.h"
#include "Core/DolphinAnalytics.h"
#include "VideoCommon/CommandProcessor.h"
#include "VideoCommon/VertexLoaderManager.h"

// CP state
CPState g_main_cp_state;
@@ -22,13 +29,13 @@ void DoCPState(PointerWrap& p)
if (p.mode == PointerWrap::MODE_READ)
{
CopyPreprocessCPStateFromMain();
g_main_cp_state.bases_dirty = true;
VertexLoaderManager::g_bases_dirty = true;
}
}

void CopyPreprocessCPStateFromMain()
{
memcpy(&g_preprocess_cp_state, &g_main_cp_state, sizeof(CPState));
std::memcpy(&g_preprocess_cp_state, &g_main_cp_state, sizeof(CPState));
}

std::pair<std::string, std::string> GetCPRegInfo(u8 cmd, u32 value)
@@ -62,12 +69,167 @@ std::pair<std::string, std::string> GetCPRegInfo(u8 cmd, u32 value)
return std::make_pair(fmt::format("CP_VAT_REG_C - Format {}", cmd & CP_VAT_MASK),
fmt::to_string(UVAT_group2{.Hex = value}));
case ARRAY_BASE:
return std::make_pair(fmt::format("ARRAY_BASE Array {}", cmd & CP_ARRAY_MASK),
fmt::format("Base address {:08x}", value));
return std::make_pair(
fmt::format("ARRAY_BASE Array {}", static_cast<CPArray>(cmd & CP_ARRAY_MASK)),
fmt::format("Base address {:08x}", value));
case ARRAY_STRIDE:
return std::make_pair(fmt::format("ARRAY_STRIDE Array {}", cmd - ARRAY_STRIDE),
fmt::format("Stride {:02x}", value & 0xff));
return std::make_pair(
fmt::format("ARRAY_STRIDE Array {}", static_cast<CPArray>(cmd & CP_ARRAY_MASK)),
fmt::format("Stride {:02x}", value & 0xff));
default:
return std::make_pair(fmt::format("Invalid CP register {:02x} = {:08x}", cmd, value), "");
}
}

CPState::CPState(const u32* memory) : CPState()
{
matrix_index_a.Hex = memory[MATINDEX_A];
matrix_index_b.Hex = memory[MATINDEX_B];
vtx_desc.low.Hex = memory[VCD_LO];
vtx_desc.high.Hex = memory[VCD_HI];

for (u32 i = 0; i < CP_NUM_VAT_REG; i++)
{
vtx_attr[i].g0.Hex = memory[CP_VAT_REG_A + i];
vtx_attr[i].g1.Hex = memory[CP_VAT_REG_B + i];
vtx_attr[i].g2.Hex = memory[CP_VAT_REG_C + i];
}

for (u32 i = 0; i < CP_NUM_ARRAYS; i++)
{
array_bases[static_cast<CPArray>(i)] = memory[ARRAY_BASE + i];
array_strides[static_cast<CPArray>(i)] = memory[ARRAY_STRIDE + i];
}
}

void CPState::LoadCPReg(u8 sub_cmd, u32 value)
{
switch (sub_cmd & CP_COMMAND_MASK)
{
case UNKNOWN_00:
case UNKNOWN_10:
case UNKNOWN_20:
if (!(sub_cmd == UNKNOWN_20 && value == 0))
{
// All titles using libogc or the official SDK issue 0x20 with value=0 on startup
DolphinAnalytics::Instance().ReportGameQuirk(GameQuirk::USES_CP_PERF_COMMAND);
DEBUG_LOG_FMT(VIDEO, "Unknown CP command possibly relating to perf queries used: {:02x}",
sub_cmd);
}
break;

case MATINDEX_A:
if (sub_cmd != MATINDEX_A)
{
DolphinAnalytics::Instance().ReportGameQuirk(GameQuirk::USES_MAYBE_INVALID_CP_COMMAND);
WARN_LOG_FMT(VIDEO,
"CP MATINDEX_A: an exact value of {:02x} was expected "
"but instead a value of {:02x} was seen",
MATINDEX_A, sub_cmd);
}

matrix_index_a.Hex = value;
break;

case MATINDEX_B:
if (sub_cmd != MATINDEX_B)
{
DolphinAnalytics::Instance().ReportGameQuirk(GameQuirk::USES_MAYBE_INVALID_CP_COMMAND);
WARN_LOG_FMT(VIDEO,
"CP MATINDEX_B: an exact value of {:02x} was expected "
"but instead a value of {:02x} was seen",
MATINDEX_B, sub_cmd);
}

matrix_index_b.Hex = value;
break;

case VCD_LO:
if (sub_cmd != VCD_LO) // Stricter than YAGCD
{
DolphinAnalytics::Instance().ReportGameQuirk(GameQuirk::USES_MAYBE_INVALID_CP_COMMAND);
WARN_LOG_FMT(VIDEO,
"CP VCD_LO: an exact value of {:02x} was expected "
"but instead a value of {:02x} was seen",
VCD_LO, sub_cmd);
}

vtx_desc.low.Hex = value;
break;

case VCD_HI:
if (sub_cmd != VCD_HI) // Stricter than YAGCD
{
DolphinAnalytics::Instance().ReportGameQuirk(GameQuirk::USES_MAYBE_INVALID_CP_COMMAND);
WARN_LOG_FMT(VIDEO,
"CP VCD_HI: an exact value of {:02x} was expected "
"but instead a value of {:02x} was seen",
VCD_HI, sub_cmd);
}

vtx_desc.high.Hex = value;
break;

case CP_VAT_REG_A:
if ((sub_cmd - CP_VAT_REG_A) >= CP_NUM_VAT_REG)
{
DolphinAnalytics::Instance().ReportGameQuirk(GameQuirk::USES_MAYBE_INVALID_CP_COMMAND);
WARN_LOG_FMT(VIDEO, "CP_VAT_REG_A: Invalid VAT {}", sub_cmd - CP_VAT_REG_A);
}
vtx_attr[sub_cmd & CP_VAT_MASK].g0.Hex = value;
break;

case CP_VAT_REG_B:
if ((sub_cmd - CP_VAT_REG_B) >= CP_NUM_VAT_REG)
{
DolphinAnalytics::Instance().ReportGameQuirk(GameQuirk::USES_MAYBE_INVALID_CP_COMMAND);
WARN_LOG_FMT(VIDEO, "CP_VAT_REG_B: Invalid VAT {}", sub_cmd - CP_VAT_REG_B);
}
vtx_attr[sub_cmd & CP_VAT_MASK].g1.Hex = value;
break;

case CP_VAT_REG_C:
if ((sub_cmd - CP_VAT_REG_C) >= CP_NUM_VAT_REG)
{
DolphinAnalytics::Instance().ReportGameQuirk(GameQuirk::USES_MAYBE_INVALID_CP_COMMAND);
WARN_LOG_FMT(VIDEO, "CP_VAT_REG_C: Invalid VAT {}", sub_cmd - CP_VAT_REG_C);
}
vtx_attr[sub_cmd & CP_VAT_MASK].g2.Hex = value;
break;

// Pointers to vertex arrays in GC RAM
case ARRAY_BASE:
array_bases[static_cast<CPArray>(sub_cmd & CP_ARRAY_MASK)] =
value & CommandProcessor::GetPhysicalAddressMask();
break;

case ARRAY_STRIDE:
array_strides[static_cast<CPArray>(sub_cmd & CP_ARRAY_MASK)] = value & 0xFF;
break;

default:
DolphinAnalytics::Instance().ReportGameQuirk(GameQuirk::USES_UNKNOWN_CP_COMMAND);
WARN_LOG_FMT(VIDEO, "Unknown CP register {:02x} set to {:08x}", sub_cmd, value);
}
}

void CPState::FillCPMemoryArray(u32* memory) const
{
memory[MATINDEX_A] = matrix_index_a.Hex;
memory[MATINDEX_B] = matrix_index_b.Hex;
memory[VCD_LO] = vtx_desc.low.Hex;
memory[VCD_HI] = vtx_desc.high.Hex;

for (int i = 0; i < CP_NUM_VAT_REG; ++i)
{
memory[CP_VAT_REG_A + i] = vtx_attr[i].g0.Hex;
memory[CP_VAT_REG_B + i] = vtx_attr[i].g1.Hex;
memory[CP_VAT_REG_C + i] = vtx_attr[i].g2.Hex;
}

for (int i = 0; i < CP_NUM_ARRAYS; ++i)
{
memory[ARRAY_BASE + i] = array_bases[static_cast<CPArray>(i)];
memory[ARRAY_STRIDE + i] = array_strides[static_cast<CPArray>(i)];
}
}
@@ -5,12 +5,14 @@

#include <array>
#include <string>
#include <type_traits>
#include <utility>

#include "Common/BitField.h"
#include "Common/BitSet.h"
#include "Common/CommonTypes.h"
#include "Common/EnumFormatter.h"
#include "Common/EnumMap.h"
#include "Common/MsgHandler.h"

enum
@@ -53,24 +55,46 @@ enum
};

// Vertex array numbers
enum
enum class CPArray : u8
{
ARRAY_POSITION = 0,
ARRAY_NORMAL = 1,
ARRAY_COLOR0 = 2,
NUM_COLOR_ARRAYS = 2,
ARRAY_TEXCOORD0 = 4,
NUM_TEXCOORD_ARRAYS = 8,
Position = 0,
Normal = 1,

Color0 = 2,
Color1 = 3,

ARRAY_XF_A = 12, // Usually used for position matrices
ARRAY_XF_B = 13, // Usually used for normal matrices
ARRAY_XF_C = 14, // Usually used for tex coord matrices
ARRAY_XF_D = 15, // Usually used for light objects
TexCoord0 = 4,
TexCoord1 = 5,
TexCoord2 = 6,
TexCoord3 = 7,
TexCoord4 = 8,
TexCoord5 = 9,
TexCoord6 = 10,
TexCoord7 = 11,

// Number of arrays related to vertex components (position, normal, color, tex coord)
// Excludes the 4 arrays used for indexed XF loads
NUM_VERTEX_COMPONENT_ARRAYS = 12,
XF_A = 12, // Usually used for position matrices
XF_B = 13, // Usually used for normal matrices
XF_C = 14, // Usually used for tex coord matrices
XF_D = 15, // Usually used for light objects
};
template <>
struct fmt::formatter<CPArray> : EnumFormatter<CPArray::XF_D>
{
static constexpr array_type names = {"Position", "Normal", "Color 0", "Color 1",
"Tex Coord 0", "Tex Coord 1", "Tex Coord 2", "Tex Coord 3",
"Tex Coord 4", "Tex Coord 5", "Tex Coord 6", "Tex Coord 7",
"XF A", "XF B", "XF C", "XF D"};
formatter() : EnumFormatter(names) {}
};
// Intended for offsetting from Color0/TexCoord0
constexpr CPArray operator+(CPArray array, u8 offset)
{
return static_cast<CPArray>(static_cast<u8>(array) + offset);
}

// Number of arrays related to vertex components (position, normal, color, tex coord)
// Excludes the 4 arrays used for indexed XF loads
constexpr u8 NUM_VERTEX_COMPONENT_ARRAYS = 12;

// Vertex components
enum class VertexComponentFormat
@@ -607,32 +631,29 @@ class VertexLoaderBase;
// STATE_TO_SAVE
struct CPState final
{
u32 array_bases[CP_NUM_ARRAYS]{};
u32 array_strides[CP_NUM_ARRAYS]{};
CPState() = default;
explicit CPState(const u32* memory);

// Mutates the CP state based on the given command and value.
void LoadCPReg(u8 sub_cmd, u32 value);
// Fills memory with data from CP regs. There should be space for 0x100 values in memory.
void FillCPMemoryArray(u32* memory) const;

Common::EnumMap<u32, CPArray::XF_D> array_bases;
Common::EnumMap<u32, CPArray::XF_D> array_strides;
TMatrixIndexA matrix_index_a{};
TMatrixIndexB matrix_index_b{};
TVtxDesc vtx_desc;
// Most games only use the first VtxAttr and simply reconfigure it all the time as needed.
VAT vtx_attr[CP_NUM_VAT_REG]{};

// Attributes that actually belong to VertexLoaderManager:
BitSet32 attr_dirty{};
bool bases_dirty = false;
VertexLoaderBase* vertex_loaders[CP_NUM_VAT_REG]{};
int last_id = 0;
std::array<VAT, CP_NUM_VAT_REG> vtx_attr{};
};
static_assert(std::is_trivially_copyable_v<CPState>);

class PointerWrap;

extern CPState g_main_cp_state;
extern CPState g_preprocess_cp_state;

// Might move this into its own file later.
void LoadCPReg(u32 SubCmd, u32 Value, bool is_preprocess = false);

// Fills memory with data from CP regs
void FillCPMemoryArray(u32* memory);

void DoCPState(PointerWrap& p);

void CopyPreprocessCPStateFromMain();
@@ -5,6 +5,7 @@

#include <atomic>
#include <cstring>
#include <fmt/format.h>

#include "Common/Assert.h"
#include "Common/ChunkFile.h"
@@ -607,18 +608,18 @@ void SetCpClearRegister()
{
}

void HandleUnknownOpcode(u8 cmd_byte, void* buffer, bool preprocess)
void HandleUnknownOpcode(u8 cmd_byte, const u8* buffer, bool preprocess)
{
// TODO(Omega): Maybe dump FIFO to file on this error
PanicAlertFmtT("GFX FIFO: Unknown Opcode ({0:#04x} @ {1}, {2}).\n"
PanicAlertFmtT("GFX FIFO: Unknown Opcode ({0:#04x} @ {1}, preprocess={2}).\n"
"This means one of the following:\n"
"* The emulated GPU got desynced, disabling dual core can help\n"
"* Command stream corrupted by some spurious memory bug\n"
"* This really is an unknown opcode (unlikely)\n"
"* Some other sort of bug\n\n"
"Further errors will be sent to the Video Backend log and\n"
"Dolphin will now likely crash or hang. Enjoy.",
cmd_byte, buffer, preprocess ? "preprocess=true" : "preprocess=false");
cmd_byte, fmt::ptr(buffer), preprocess);

{
PanicAlertFmt("Illegal command {:02x}\n"
@@ -169,7 +169,7 @@ void SetCpClearRegister();
void SetCpControlRegister();
void SetCpStatusRegister();

void HandleUnknownOpcode(u8 cmd_byte, void* buffer, bool preprocess);
void HandleUnknownOpcode(u8 cmd_byte, const u8* buffer, bool preprocess);

u32 GetPhysicalAddressMask();

@@ -273,8 +273,8 @@ static void ReadDataFromFifoOnCPU(u32 readPtr)
}
}
Memory::CopyFromEmu(s_video_buffer_write_ptr, readPtr, len);
s_video_buffer_pp_read_ptr = OpcodeDecoder::Run<true>(
DataReader(s_video_buffer_pp_read_ptr, write_ptr + len), nullptr, false);
s_video_buffer_pp_read_ptr = OpcodeDecoder::RunFifo<true>(
DataReader(s_video_buffer_pp_read_ptr, write_ptr + len), nullptr);
// This would have to be locked if the GPU thread didn't spin.
s_video_buffer_write_ptr = write_ptr + len;
}
@@ -316,7 +316,7 @@ void RunGpuLoop()
if (write_ptr > seen_ptr)
{
s_video_buffer_read_ptr =
OpcodeDecoder::Run(DataReader(s_video_buffer_read_ptr, write_ptr), nullptr, false);
OpcodeDecoder::RunFifo(DataReader(s_video_buffer_read_ptr, write_ptr), nullptr);
s_video_buffer_seen_ptr = write_ptr;
}
}
@@ -349,8 +349,8 @@ void RunGpuLoop()
fifo.CPReadWriteDistance.load(std::memory_order_relaxed) - 32);

u8* write_ptr = s_video_buffer_write_ptr;
s_video_buffer_read_ptr = OpcodeDecoder::Run(
DataReader(s_video_buffer_read_ptr, write_ptr), &cyclesExecuted, false);
s_video_buffer_read_ptr = OpcodeDecoder::RunFifo(
DataReader(s_video_buffer_read_ptr, write_ptr), &cyclesExecuted);

fifo.CPReadPointer.store(readPtr, std::memory_order_relaxed);
fifo.CPReadWriteDistance.fetch_sub(32, std::memory_order_seq_cst);
@@ -466,8 +466,8 @@ static int RunGpuOnCpu(int ticks)
}
ReadDataFromFifo(fifo.CPReadPointer.load(std::memory_order_relaxed));
u32 cycles = 0;
s_video_buffer_read_ptr = OpcodeDecoder::Run(
DataReader(s_video_buffer_read_ptr, s_video_buffer_write_ptr), &cycles, false);
s_video_buffer_read_ptr = OpcodeDecoder::RunFifo(
DataReader(s_video_buffer_read_ptr, s_video_buffer_write_ptr), &cycles);
available_ticks -= cycles;
}

@@ -837,12 +837,12 @@ bool FramebufferManager::CompilePokePipelines()
{
PortableVertexDeclaration vtx_decl = {};
vtx_decl.position.enable = true;
vtx_decl.position.type = VAR_FLOAT;
vtx_decl.position.type = ComponentFormat::Float;
vtx_decl.position.components = 4;
vtx_decl.position.integer = false;
vtx_decl.position.offset = offsetof(EFBPokeVertex, position);
vtx_decl.colors[0].enable = true;
vtx_decl.colors[0].type = VAR_UNSIGNED_BYTE;
vtx_decl.colors[0].type = ComponentFormat::UByte;
vtx_decl.colors[0].components = 4;
vtx_decl.colors[0].integer = false;
vtx_decl.colors[0].offset = offsetof(EFBPokeVertex, color);
@@ -6,25 +6,29 @@
#include <cmath>

#include "Common/CommonTypes.h"
#include "Common/EnumMap.h"
#include "VideoCommon/DriverDetails.h"
#include "VideoCommon/LightingShaderGen.h"
#include "VideoCommon/VideoCommon.h"
#include "VideoCommon/VideoConfig.h"
#include "VideoCommon/XFMemory.h"

constexpr std::array<const char*, 4> primitives_ogl{
constexpr Common::EnumMap<const char*, PrimitiveType::TriangleStrip> primitives_ogl{
"points",
"lines",
"triangles",
"triangles",
};
constexpr std::array<const char*, 4> primitives_d3d{
constexpr Common::EnumMap<const char*, PrimitiveType::TriangleStrip> primitives_d3d{
"point",
"line",
"triangle",
"triangle",
};

constexpr Common::EnumMap<u32, PrimitiveType::TriangleStrip> vertex_in_map{1u, 2u, 3u, 3u};
constexpr Common::EnumMap<u32, PrimitiveType::TriangleStrip> vertex_out_map{4u, 4u, 4u, 3u};

bool geometry_shader_uid_data::IsPassthrough() const
{
const bool stereo = g_ActiveConfig.stereo_mode != StereoMode::Off;
@@ -61,9 +65,8 @@ ShaderCode GenerateGeometryShaderCode(APIType api_type, const ShaderHostConfig&
const bool ssaa = host_config.ssaa;
const bool stereo = host_config.stereo;
const auto primitive_type = static_cast<PrimitiveType>(uid_data->primitive_type);
const auto primitive_type_index = static_cast<unsigned>(uid_data->primitive_type);
const auto vertex_in = std::min(static_cast<unsigned>(primitive_type_index) + 1, 3u);
u32 vertex_out = primitive_type == PrimitiveType::TriangleStrip ? 3 : 4;
const u32 vertex_in = vertex_in_map[primitive_type];
u32 vertex_out = vertex_out_map[primitive_type];

if (wireframe)
vertex_out++;
@@ -73,14 +76,14 @@ ShaderCode GenerateGeometryShaderCode(APIType api_type, const ShaderHostConfig&
// Insert layout parameters
if (host_config.backend_gs_instancing)
{
out.Write("layout({}, invocations = {}) in;\n", primitives_ogl[primitive_type_index],
out.Write("layout({}, invocations = {}) in;\n", primitives_ogl[primitive_type],
stereo ? 2 : 1);
out.Write("layout({}_strip, max_vertices = {}) out;\n", wireframe ? "line" : "triangle",
vertex_out);
}
else
{
out.Write("layout({}) in;\n", primitives_ogl[primitive_type_index]);
out.Write("layout({}) in;\n", primitives_ogl[primitive_type]);
out.Write("layout({}_strip, max_vertices = {}) out;\n", wireframe ? "line" : "triangle",
stereo ? vertex_out * 2 : vertex_out);
}
@@ -139,13 +142,13 @@ ShaderCode GenerateGeometryShaderCode(APIType api_type, const ShaderHostConfig&
out.Write("[maxvertexcount({})]\n[instance({})]\n", vertex_out, stereo ? 2 : 1);
out.Write("void main({} VS_OUTPUT o[{}], inout {}Stream<VertexData> output, in uint "
"InstanceID : SV_GSInstanceID)\n{{\n",
primitives_d3d[primitive_type_index], vertex_in, wireframe ? "Line" : "Triangle");
primitives_d3d[primitive_type], vertex_in, wireframe ? "Line" : "Triangle");
}
else
{
out.Write("[maxvertexcount({})]\n", stereo ? vertex_out * 2 : vertex_out);
out.Write("void main({} VS_OUTPUT o[{}], inout {}Stream<VertexData> output)\n{{\n",
primitives_d3d[primitive_type_index], vertex_in, wireframe ? "Line" : "Triangle");
primitives_d3d[primitive_type], vertex_in, wireframe ? "Line" : "Triangle");
}

out.Write("\tVertexData ps;\n");
@@ -202,25 +202,27 @@ u16* AddPoints(u16* index_ptr, u32 num_verts, u32 index)

void IndexGenerator::Init()
{
using OpcodeDecoder::Primitive;

if (g_Config.backend_info.bSupportsPrimitiveRestart)
{
m_primitive_table[OpcodeDecoder::GX_DRAW_QUADS] = AddQuads<true>;
m_primitive_table[OpcodeDecoder::GX_DRAW_QUADS_2] = AddQuads_nonstandard<true>;
m_primitive_table[OpcodeDecoder::GX_DRAW_TRIANGLES] = AddList<true>;
m_primitive_table[OpcodeDecoder::GX_DRAW_TRIANGLE_STRIP] = AddStrip<true>;
m_primitive_table[OpcodeDecoder::GX_DRAW_TRIANGLE_FAN] = AddFan<true>;
m_primitive_table[Primitive::GX_DRAW_QUADS] = AddQuads<true>;
m_primitive_table[Primitive::GX_DRAW_QUADS_2] = AddQuads_nonstandard<true>;
m_primitive_table[Primitive::GX_DRAW_TRIANGLES] = AddList<true>;
m_primitive_table[Primitive::GX_DRAW_TRIANGLE_STRIP] = AddStrip<true>;
m_primitive_table[Primitive::GX_DRAW_TRIANGLE_FAN] = AddFan<true>;
}
else
{
m_primitive_table[OpcodeDecoder::GX_DRAW_QUADS] = AddQuads<false>;
m_primitive_table[OpcodeDecoder::GX_DRAW_QUADS_2] = AddQuads_nonstandard<false>;
m_primitive_table[OpcodeDecoder::GX_DRAW_TRIANGLES] = AddList<false>;
m_primitive_table[OpcodeDecoder::GX_DRAW_TRIANGLE_STRIP] = AddStrip<false>;
m_primitive_table[OpcodeDecoder::GX_DRAW_TRIANGLE_FAN] = AddFan<false>;
m_primitive_table[Primitive::GX_DRAW_QUADS] = AddQuads<false>;
m_primitive_table[Primitive::GX_DRAW_QUADS_2] = AddQuads_nonstandard<false>;
m_primitive_table[Primitive::GX_DRAW_TRIANGLES] = AddList<false>;
m_primitive_table[Primitive::GX_DRAW_TRIANGLE_STRIP] = AddStrip<false>;
m_primitive_table[Primitive::GX_DRAW_TRIANGLE_FAN] = AddFan<false>;
}
m_primitive_table[OpcodeDecoder::GX_DRAW_LINES] = AddLineList;
m_primitive_table[OpcodeDecoder::GX_DRAW_LINE_STRIP] = AddLineStrip;
m_primitive_table[OpcodeDecoder::GX_DRAW_POINTS] = AddPoints;
m_primitive_table[Primitive::GX_DRAW_LINES] = AddLineList;
m_primitive_table[Primitive::GX_DRAW_LINE_STRIP] = AddLineStrip;
m_primitive_table[Primitive::GX_DRAW_POINTS] = AddPoints;
}

void IndexGenerator::Start(u16* index_ptr)
@@ -230,7 +232,7 @@ void IndexGenerator::Start(u16* index_ptr)
m_base_index = 0;
}

void IndexGenerator::AddIndices(int primitive, u32 num_vertices)
void IndexGenerator::AddIndices(OpcodeDecoder::Primitive primitive, u32 num_vertices)
{
m_index_buffer_current =
m_primitive_table[primitive](m_index_buffer_current, num_vertices, m_base_index);
@@ -6,16 +6,17 @@

#pragma once

#include <array>
#include "Common/CommonTypes.h"
#include "Common/EnumMap.h"
#include "VideoCommon/OpcodeDecoding.h"

class IndexGenerator
{
public:
void Init();
void Start(u16* index_ptr);

void AddIndices(int primitive, u32 num_vertices);
void AddIndices(OpcodeDecoder::Primitive primitive, u32 num_vertices);

void AddExternalIndices(const u16* indices, u32 num_indices, u32 num_vertices);

@@ -30,5 +31,5 @@ class IndexGenerator
u32 m_base_index = 0;

using PrimitiveFunction = u16* (*)(u16*, u32, u32);
std::array<PrimitiveFunction, 8> m_primitive_table{};
Common::EnumMap<PrimitiveFunction, OpcodeDecoder::Primitive::GX_DRAW_POINTS> m_primitive_table{};
};
@@ -8,6 +8,7 @@

#include "Common/CommonTypes.h"
#include "Common/Hash.h"
#include "VideoCommon/CPMemory.h"

// m_components
enum
@@ -45,18 +46,9 @@ enum
VB_HAS_UVTEXMTXSHIFT = 13,
};

enum VarType
{
VAR_UNSIGNED_BYTE, // GX_U8 = 0
VAR_BYTE, // GX_S8 = 1
VAR_UNSIGNED_SHORT, // GX_U16 = 2
VAR_SHORT, // GX_S16 = 3
VAR_FLOAT, // GX_F32 = 4
};

struct AttributeFormat
{
VarType type;
ComponentFormat type;
int components;
int offset;
bool enable;

Large diffs are not rendered by default.

@@ -3,16 +3,25 @@

#pragma once

#include <type_traits>

#include "Common/Assert.h"
#include "Common/CommonTypes.h"
#include "Common/EnumFormatter.h"
#include "Common/Inline.h"
#include "Common/Swap.h"
#include "VideoCommon/CPMemory.h"
#include "VideoCommon/VertexLoaderBase.h"

struct CPState;
class DataReader;

namespace OpcodeDecoder
{
// Global flag to signal if FifoRecorder is active.
extern bool g_record_fifo_data;

enum
enum class Opcode
{
GX_NOP = 0x00,
GX_UNKNOWN_RESET = 0x01,
@@ -27,20 +36,20 @@ enum

GX_CMD_CALL_DL = 0x40,
GX_CMD_UNKNOWN_METRICS = 0x44,
GX_CMD_INVL_VC = 0x48
};
GX_CMD_INVL_VC = 0x48,

enum
{
GX_PRIMITIVE_MASK = 0x78,
GX_PRIMITIVE_SHIFT = 3,
GX_VAT_MASK = 0x07
GX_PRIMITIVE_START = 0x80,
GX_PRIMITIVE_END = 0xbf,
};

constexpr u8 GX_PRIMITIVE_MASK = 0x78;
constexpr u32 GX_PRIMITIVE_SHIFT = 3;
constexpr u8 GX_VAT_MASK = 0x07;

// These values are the values extracted using GX_PRIMITIVE_MASK
// and GX_PRIMITIVE_SHIFT.
// GX_DRAW_QUADS_2 behaves the same way as GX_DRAW_QUADS.
enum
enum class Primitive : u8
{
GX_DRAW_QUADS = 0x0, // 0x80
GX_DRAW_QUADS_2 = 0x1, // 0x88
@@ -54,7 +63,232 @@ enum

void Init();

// Interface for the Run and RunCommand functions below.
// The functions themselves are templates so that the compiler generates separate versions for each
// callback (with the callback functions inlined), so the callback doesn't actually need to be
// publicly inherited.
// Compilers don't generate warnings for failed inlining with virtual functions, so this define
// allows disabling the use of virtual functions to generate those warnings. However, this means
// that missing functions will generate errors on their use in RunCommand, instead of in the
// subclass, which can be confusing.
#define OPCODE_CALLBACK_USE_INHERITANCE

#ifdef OPCODE_CALLBACK_USE_INHERITANCE
#define OPCODE_CALLBACK(sig) DOLPHIN_FORCE_INLINE sig override
#define OPCODE_CALLBACK_NOINLINE(sig) sig override
#else
#define OPCODE_CALLBACK(sig) DOLPHIN_FORCE_INLINE sig
#define OPCODE_CALLBACK_NOINLINE(sig) sig
#endif
class Callback
{
#ifdef OPCODE_CALLBACK_USE_INHERITANCE
public:
virtual ~Callback() = default;

// Called on any XF command.
virtual void OnXF(u16 address, u8 count, const u8* data) = 0;
// Called on any CP command.
// Subclasses should update the CP state with GetCPState().LoadCPReg(command, value) so that
// primitive commands decode properly.
virtual void OnCP(u8 command, u32 value) = 0;
// Called on any BP command.
virtual void OnBP(u8 command, u32 value) = 0;
// Called on any indexed XF load command.
virtual void OnIndexedLoad(CPArray array, u32 index, u16 address, u8 size) = 0;
// Called on any primitive command.
virtual void OnPrimitiveCommand(OpcodeDecoder::Primitive primitive, u8 vat, u32 vertex_size,
u16 num_vertices, const u8* vertex_data) = 0;
// Called on a display list.
virtual void OnDisplayList(u32 address, u32 size) = 0;
// Called on any NOP commands (which are all merged into a single call).
virtual void OnNop(u32 count) = 0;
// Called on an unknown opcode, or an opcode that is known but not implemented.
// data[0] is opcode.
virtual void OnUnknown(u8 opcode, const u8* data) = 0;

// Called on ANY command. The first byte of data is the opcode. Size will be at least 1.
// This function is called after one of the above functions is called.
virtual void OnCommand(const u8* data, u32 size) = 0;

// Get the current CP state. Needed for vertex decoding; will also be mutated for CP commands.
virtual CPState& GetCPState() = 0;
#endif
};

namespace detail
{
// Main logic; split so that the main RunCommand can call OnCommand with the returned size.
template <typename T, typename = std::enable_if_t<std::is_base_of_v<Callback, T>>>
static DOLPHIN_FORCE_INLINE u32 RunCommand(const u8* data, u32 available, T& callback)
{
if (available < 1)
return 0;

const Opcode cmd = static_cast<Opcode>(data[0]);

switch (cmd)
{
case Opcode::GX_NOP:
{
u32 count = 1;
while (count < available && static_cast<Opcode>(data[count]) == Opcode::GX_NOP)
count++;
callback.OnNop(count);
return count;
}

case Opcode::GX_LOAD_CP_REG:
{
if (available < 6)
return 0;

const u8 cmd2 = data[1];
const u32 value = Common::swap32(&data[2]);

callback.OnCP(cmd2, value);

return 6;
}

case Opcode::GX_LOAD_XF_REG:
{
if (available < 5)
return 0;

const u32 cmd2 = Common::swap32(&data[1]);
const u16 base_address = cmd2 & 0xffff;

const u16 stream_size_temp = cmd2 >> 16;
ASSERT(stream_size_temp < 16);
const u8 stream_size = (stream_size_temp & 0xf) + 1;

if (available < u32(5 + stream_size * 4))
return 0;

callback.OnXF(base_address, stream_size, &data[5]);

return 5 + stream_size * 4;
}

case Opcode::GX_LOAD_INDX_A: // Used for position matrices
case Opcode::GX_LOAD_INDX_B: // Used for normal matrices
case Opcode::GX_LOAD_INDX_C: // Used for postmatrices
case Opcode::GX_LOAD_INDX_D: // Used for lights
{
if (available < 5)
return 0;

const u32 value = Common::swap32(&data[1]);

const u32 index = value >> 16;
const u16 address = value & 0xFFF; // TODO: check mask
const u8 size = ((value >> 12) & 0xF) + 1;

// Map the command byte to its ref array.
// GX_LOAD_INDX_A (32 = 8*4) . CPArray::XF_A (4+8 = 12)
// GX_LOAD_INDX_B (40 = 8*5) . CPArray::XF_B (5+8 = 13)
// GX_LOAD_INDX_C (48 = 8*6) . CPArray::XF_C (6+8 = 14)
// GX_LOAD_INDX_D (56 = 8*7) . CPArray::XF_D (7+8 = 15)
const auto ref_array = static_cast<CPArray>((static_cast<u8>(cmd) / 8) + 8);

callback.OnIndexedLoad(ref_array, index, address, size);
return 5;
}

case Opcode::GX_CMD_CALL_DL:
{
if (available < 9)
return 0;

const u32 address = Common::swap32(&data[1]);
const u32 size = Common::swap32(&data[5]);

callback.OnDisplayList(address, size);
return 9;
}

case Opcode::GX_LOAD_BP_REG:
{
if (available < 5)
return 0;

const u8 cmd2 = data[1];
const u32 value = Common::swap24(&data[2]);

callback.OnBP(cmd2, value);

return 5;
}

default:
if (cmd >= Opcode::GX_PRIMITIVE_START && cmd <= Opcode::GX_PRIMITIVE_END)
{
if (available < 3)
return 0;

const u8 cmdbyte = static_cast<u8>(cmd);
const OpcodeDecoder::Primitive primitive = static_cast<OpcodeDecoder::Primitive>(
(cmdbyte & OpcodeDecoder::GX_PRIMITIVE_MASK) >> OpcodeDecoder::GX_PRIMITIVE_SHIFT);
const u8 vat = cmdbyte & OpcodeDecoder::GX_VAT_MASK;

const u32 vertex_size = VertexLoaderBase::GetVertexSize(callback.GetCPState().vtx_desc,
callback.GetCPState().vtx_attr[vat]);
const u16 num_vertices = Common::swap16(&data[1]);

if (available < 3 + num_vertices * vertex_size)
return 0;

callback.OnPrimitiveCommand(primitive, vat, vertex_size, num_vertices, &data[3]);

return 3 + num_vertices * vertex_size;
}
}

callback.OnUnknown(static_cast<u8>(cmd), data);
return 1;
}
} // namespace detail

template <typename T, typename = std::enable_if_t<std::is_base_of_v<Callback, T>>>
DOLPHIN_FORCE_INLINE u32 RunCommand(const u8* data, u32 available, T& callback)
{
const u32 size = detail::RunCommand(data, available, callback);
if (size > 0)
{
callback.OnCommand(data, size);
}
return size;
}

template <typename T, typename = std::enable_if_t<std::is_base_of_v<Callback, T>>>
DOLPHIN_FORCE_INLINE u32 Run(const u8* data, u32 available, T& callback)
{
u32 size = 0;
while (size < available)
{
const u32 command_size = RunCommand(&data[size], available - size, callback);
if (command_size == 0)
break;
size += command_size;
}
return size;
}

template <bool is_preprocess = false>
u8* Run(DataReader src, u32* cycles, bool in_display_list);
u8* RunFifo(DataReader src, u32* cycles);

} // namespace OpcodeDecoder

template <>
struct fmt::formatter<OpcodeDecoder::Primitive>
: EnumFormatter<OpcodeDecoder::Primitive::GX_DRAW_POINTS>
{
static constexpr array_type names = {
"GX_DRAW_QUADS", "GX_DRAW_QUADS_2 (nonstandard)",
"GX_DRAW_TRIANGLES", "GX_DRAW_TRIANGLE_STRIP",
"GX_DRAW_TRIANGLE_FAN", "GX_DRAW_LINES",
"GX_DRAW_LINE_STRIP", "GX_DRAW_POINTS",
};
formatter() : EnumFormatter(names) {}
};