Skip to content

Commit

Permalink
Get rid of racey gpuBusy in favor of a safer version of the thing I s…
Browse files Browse the repository at this point in the history
…aid was terrible.

Either atomics are hard or I'm stupid.  Probably both.

To make it not racey, get rid of CPReadWriteDistance and just use
CPReadPointer and CPWritePointer.
  • Loading branch information
comex committed Sep 18, 2013
1 parent dd5ff30 commit c9845ea
Show file tree
Hide file tree
Showing 7 changed files with 75 additions and 111 deletions.
156 changes: 68 additions & 88 deletions Source/Core/VideoCommon/Src/CommandProcessor.cpp
Expand Up @@ -43,15 +43,13 @@ int m_bboxright;
int m_bboxbottom;
u16 m_tokenReg;

volatile bool isPossibleWaitingSetDrawDone = false;
volatile bool isHiWatermarkActive = false;
volatile bool isLoWatermarkActive = false;
volatile bool interruptSet= false;
volatile bool interruptWaiting= false;
volatile bool interruptTokenWaiting = false;
u32 interruptTokenData;
volatile bool interruptFinishWaiting = false;
volatile u32 gpuBusy = 0;

volatile u32 VITicks = CommandProcessor::m_cpClockOrigin;

Expand Down Expand Up @@ -79,7 +77,6 @@ void DoState(PointerWrap &p)

p.Do(isHiWatermarkActive);
p.Do(isLoWatermarkActive);
p.Do(isPossibleWaitingSetDrawDone);
p.Do(interruptSet);
p.Do(interruptWaiting);
p.Do(interruptTokenWaiting);
Expand Down Expand Up @@ -132,18 +129,30 @@ void Init()
interruptFinishWaiting = false;
interruptTokenWaiting = false;

isPossibleWaitingSetDrawDone = false;
isHiWatermarkActive = false;
isLoWatermarkActive = false;

et_UpdateInterrupts = CoreTiming::RegisterEvent("CPInterrupt", UpdateInterrupts_Wrapper);
}

bool GPUHasWork()
{
// In bSyncGPUAtIdleOnly mode, this is safe to call from SyncGPU, because:
// - gpuFifo->bFF_GPReadEnable/CPWritePointer/CPBreakpoint only change later in SyncGPU.
// - interruptWaiting *never* becomes true.
// - No work is done between setting the read pointer and comparing it
// against CPWritePointer/CPBreakpoint.
return gpuFifo->bFF_GPReadEnable &&
!interruptWaiting &&
Common::AtomicLoad(gpuFifo->CPReadPointer) != Common::AtomicLoad(gpuFifo->CPWritePointer) &&
!AtBreakpointGpu();
}

static void SyncGPU()
{
if (IsOnThread())
{
while (Common::AtomicLoad(CommandProcessor::gpuBusy))
while (GPUHasWork())
Common::YieldCPU();
}
if (Core::g_CoreStartupParameter.bSyncGPUAtIdleOnly)
Expand All @@ -158,7 +167,6 @@ static void SyncGPU()
PixelEngine::SetFinish_OnMainThread(0, 0);
}
cpuFifo.CPReadPointer = _gpuFifo.CPReadPointer;
cpuFifo.CPReadWriteDistance = _gpuFifo.CPReadWriteDistance;
_gpuFifo = cpuFifo;
// need another barrier here
if (interruptWaiting) {
Expand All @@ -176,23 +184,51 @@ void SyncGPUIfIdleOnly()
}
}

static u32 GetFakeRwDistance()
bool IsPossibleWaitingSetDrawDone()
{
u32 prev;
if (cpuFifo.CPWritePointer >= cpuFifo.CPReadPointer)
prev = cpuFifo.CPWritePointer - cpuFifo.CPReadPointer;
// This is called from Idle.
if (Core::g_CoreStartupParameter.bSyncGPUAtIdleOnly)
{
// Time to sync.
SyncGPU();
return false;
}
else
// why 32?
prev = cpuFifo.CPEnd - cpuFifo.CPReadPointer + cpuFifo.CPWritePointer - cpuFifo.CPBase + 32;
return std::min(prev, (u32) 16);
{
return GPUHasWork();
}
}

static u32 GetFakeReadPointer()
static u32 GetReadWriteDistance()
{
u32 result = cpuFifo.CPWritePointer - GetFakeRwDistance();
if (result < cpuFifo.CPBase)
u32 writePointer = cpuFifo.CPWritePointer;
u32 readPointer = Common::AtomicLoad(cpuFifo.SafeCPReadPointer);
u32 result = writePointer - readPointer;
if (writePointer < readPointer)
result += cpuFifo.CPEnd - cpuFifo.CPBase;
return 0;

if (Core::g_CoreStartupParameter.bSyncGPUAtIdleOnly)
{
// Pretend we've advanced further.
result = std::min(result, (u32) 32);
}

return result;
}

static u32 GetReadPointer()
{
if (Core::g_CoreStartupParameter.bSyncGPUAtIdleOnly)
{
u32 result = cpuFifo.CPWritePointer - GetReadWriteDistance();
if (result < cpuFifo.CPBase)
result += cpuFifo.CPEnd - cpuFifo.CPBase;
return result;
}
else
{
return Common::AtomicLoad(cpuFifo.SafeCPReadPointer);
}
}

void Read16(u16& _rReturnValue, const u32 _Address)
Expand Down Expand Up @@ -226,45 +262,11 @@ void Read16(u16& _rReturnValue, const u32 _Address)
case FIFO_LO_WATERMARK_HI: _rReturnValue = ReadHigh(cpuFifo.CPLoWatermark); return;

case FIFO_RW_DISTANCE_LO:
if (IsOnThread())
{
if (Core::g_CoreStartupParameter.bSyncGPUAtIdleOnly)
{
_rReturnValue = ReadLow (GetFakeRwDistance());
}
else
{
if(cpuFifo.CPWritePointer >= cpuFifo.SafeCPReadPointer)
_rReturnValue = ReadLow (cpuFifo.CPWritePointer - cpuFifo.SafeCPReadPointer);
else
_rReturnValue = ReadLow (cpuFifo.CPEnd - cpuFifo.SafeCPReadPointer + cpuFifo.CPWritePointer - cpuFifo.CPBase + 32);
}
}
else
{
_rReturnValue = ReadLow (cpuFifo.CPReadWriteDistance);
}
_rReturnValue = ReadLow(GetReadWriteDistance());
DEBUG_LOG(COMMANDPROCESSOR, "Read FIFO_RW_DISTANCE_LO : %04x", _rReturnValue);
return;
case FIFO_RW_DISTANCE_HI:
if (IsOnThread())
{
if (Core::g_CoreStartupParameter.bSyncGPUAtIdleOnly)
{
_rReturnValue = ReadHigh (GetFakeRwDistance());
}
else
{
if(cpuFifo.CPWritePointer >= cpuFifo.SafeCPReadPointer)
_rReturnValue = ReadHigh (cpuFifo.CPWritePointer - cpuFifo.SafeCPReadPointer);
else
_rReturnValue = ReadHigh (cpuFifo.CPEnd - cpuFifo.SafeCPReadPointer + cpuFifo.CPWritePointer - cpuFifo.CPBase + 32);
}
}
else
{
_rReturnValue = ReadHigh(cpuFifo.CPReadWriteDistance);
}
_rReturnValue = ReadLow(GetReadWriteDistance());
DEBUG_LOG(COMMANDPROCESSOR, "Read FIFO_RW_DISTANCE_HI : %04x", _rReturnValue);
return;
case FIFO_WRITE_POINTER_LO:
Expand All @@ -276,29 +278,11 @@ void Read16(u16& _rReturnValue, const u32 _Address)
DEBUG_LOG(COMMANDPROCESSOR, "Read FIFO_WRITE_POINTER_HI : %04x", _rReturnValue);
return;
case FIFO_READ_POINTER_LO:
if (IsOnThread())
{
if (Core::g_CoreStartupParameter.bSyncGPUAtIdleOnly)
_rReturnValue = ReadLow (GetFakeReadPointer());
else
_rReturnValue = ReadLow (cpuFifo.SafeCPReadPointer);
}
else
_rReturnValue = ReadLow (cpuFifo.CPReadPointer);
_rReturnValue = ReadLow(GetReadPointer());
DEBUG_LOG(COMMANDPROCESSOR, "Read FIFO_READ_POINTER_LO : %04x", _rReturnValue);
return;
case FIFO_READ_POINTER_HI:
if (IsOnThread())
{
if (Core::g_CoreStartupParameter.bSyncGPUAtIdleOnly)
_rReturnValue = ReadHigh (GetFakeReadPointer());
else
_rReturnValue = ReadHigh (cpuFifo.SafeCPReadPointer);
}
else
{
_rReturnValue = ReadHigh (cpuFifo.CPReadPointer);
}
_rReturnValue = ReadHigh(GetReadPointer());
DEBUG_LOG(COMMANDPROCESSOR, "Read FIFO_READ_POINTER_HI : %04x", _rReturnValue);
return;

Expand Down Expand Up @@ -503,7 +487,7 @@ void Write16(const u16 _Value, const u32 _Address)
case FIFO_RW_DISTANCE_HI:
WriteHigh((u32 &)cpuFifo.CPReadWriteDistance, _Value);
SyncGPU();
if (cpuFifo.CPReadWriteDistance == 0)
if (_Value == 0)
{
GPFifo::ResetGatherPipe();
ResetVideoBuffer();
Expand Down Expand Up @@ -553,8 +537,7 @@ void STACKALIGN GatherPipeBursted()
{
// In multibuffer mode is not allowed write in the same FIFO attached to the GPU.
// Fix Pokemon XD in DC mode.
if((ProcessorInterface::Fifo_CPUEnd == cpuFifo.CPEnd) && (ProcessorInterface::Fifo_CPUBase == cpuFifo.CPBase)
&& gpuFifo->CPReadWriteDistance > 0)
if (ProcessorInterface::Fifo_CPUEnd == cpuFifo.CPEnd && ProcessorInterface::Fifo_CPUBase == cpuFifo.CPBase)
{
SyncGPU();
}
Expand All @@ -571,12 +554,6 @@ void STACKALIGN GatherPipeBursted()
else
cpuFifo.CPWritePointer += GATHER_PIPE_SIZE;

Common::AtomicAdd(gpuFifo->CPReadWriteDistance, GATHER_PIPE_SIZE);
if (Core::g_CoreStartupParameter.bSyncGPUAtIdleOnly)
{
cpuFifo.CPReadWriteDistance += GATHER_PIPE_SIZE;
}

if (!IsOnThread())
RunGpu();

Expand All @@ -586,6 +563,8 @@ void STACKALIGN GatherPipeBursted()
SyncGPU();
}

Common::AtomicStore(gpuFifo->CPWritePointer, cpuFifo.CPWritePointer);

// check if we are in sync
_assert_msg_(COMMANDPROCESSOR, cpuFifo.CPWritePointer == ProcessorInterface::Fifo_CPUWritePointer, "FIFOs linked but out of sync");
_assert_msg_(COMMANDPROCESSOR, cpuFifo.CPBase == ProcessorInterface::Fifo_CPUBase, "FIFOs linked but out of sync");
Expand Down Expand Up @@ -635,8 +614,9 @@ void SetCpStatus(bool isCPUThread)
else
{
// overflow & underflow check
cpuFifo.bFF_HiWatermark = (cpuFifo.CPReadWriteDistance > cpuFifo.CPHiWatermark);
cpuFifo.bFF_LoWatermark = (cpuFifo.CPReadWriteDistance < cpuFifo.CPLoWatermark);
u32 distance = GetReadWriteDistance();
cpuFifo.bFF_HiWatermark = (distance > cpuFifo.CPHiWatermark);
cpuFifo.bFF_LoWatermark = (distance < cpuFifo.CPLoWatermark);
}

// breakpoint
Expand Down Expand Up @@ -725,8 +705,8 @@ void SetCpStatusRegister()
{
// Here always there is one fifo attached to the GPU
m_CPStatusReg.Breakpoint = cpuFifo.bFF_Breakpoint;
m_CPStatusReg.ReadIdle = !cpuFifo.CPReadWriteDistance || cpuFifo.CPReadPointer == cpuFifo.CPWritePointer || AtBreakpointCpu();
m_CPStatusReg.CommandIdle = !cpuFifo.CPReadWriteDistance || AtBreakpointCpu() || !cpuFifo.bFF_GPReadEnable;
m_CPStatusReg.ReadIdle = cpuFifo.CPReadPointer == cpuFifo.CPWritePointer || AtBreakpointCpu();
m_CPStatusReg.CommandIdle = cpuFifo.CPReadPointer == cpuFifo.CPWritePointer || AtBreakpointCpu() || !cpuFifo.bFF_GPReadEnable;
m_CPStatusReg.UnderflowLoWatermark = cpuFifo.bFF_LoWatermark;
m_CPStatusReg.OverflowHiWatermark = cpuFifo.bFF_HiWatermark;

Expand Down Expand Up @@ -790,10 +770,10 @@ void SetCpClearRegister()
void Update()
{
// called only when bSyncGPU is true
while (VITicks > m_cpClockOrigin && gpuBusy && IsOnThread())
while (VITicks > m_cpClockOrigin && GPUHasWork() && IsOnThread())
Common::YieldCPU();

if (gpuBusy)
if (GPUHasWork())
Common::AtomicAdd(VITicks, SystemTimers::GetTicksPerSecond() / 10000);
}
} // end of namespace CommandProcessor
4 changes: 2 additions & 2 deletions Source/Core/VideoCommon/Src/CommandProcessor.h
Expand Up @@ -18,15 +18,13 @@ namespace CommandProcessor
//extern SCPFifoStruct fifo; //This one is shared between gfx thread and emulator thread.
extern SCPFifoStruct *gpuFifo;
extern SCPFifoStruct cpuFifo;
extern volatile bool isPossibleWaitingSetDrawDone; //This one is used for sync gfx thread and emulator thread.
extern volatile bool isHiWatermarkActive;
extern volatile bool isLoWatermarkActive;
extern volatile bool interruptSet;
extern volatile bool interruptWaiting;
extern volatile bool interruptTokenWaiting;
extern u32 interruptTokenData;
extern volatile bool interruptFinishWaiting;
extern volatile u32 gpuBusy;

// internal hardware addresses
enum
Expand Down Expand Up @@ -139,7 +137,9 @@ void Init();
void Shutdown();
void DoState(PointerWrap &p);

bool GPUHasWork();
void SyncGPUIfIdleOnly();
bool IsPossibleWaitingSetDrawDone();

// Read
void Read16(u16& _rReturnValue, const u32 _Address);
Expand Down
18 changes: 2 additions & 16 deletions Source/Core/VideoCommon/Src/Fifo.cpp
Expand Up @@ -43,7 +43,6 @@ void Fifo_PauseAndLock(bool doLock, bool unpauseOnUnlock)
EmulatorState(false);
if (!Core::IsGPUThread())
m_csHWVidOccupied.lock();
_dbg_assert_(COMMON, !CommandProcessor::gpuBusy);
}
else
{
Expand Down Expand Up @@ -144,12 +143,8 @@ void RunGpuLoop()
Common::AtomicStore(CommandProcessor::VITicks, CommandProcessor::m_cpClockOrigin);

// check if we are able to run this buffer
while (GpuRunningState && !CommandProcessor::interruptWaiting && fifo.bFF_GPReadEnable && fifo.CPReadWriteDistance && !AtBreakpointGpu())
while (GpuRunningState && CommandProcessor::GPUHasWork())
{

Common::AtomicStore(CommandProcessor::gpuBusy, 1);
CommandProcessor::isPossibleWaitingSetDrawDone = fifo.bFF_GPLinkEnable ? true : false;

if (!Core::g_CoreStartupParameter.bSyncGPU || Common::AtomicLoad(CommandProcessor::VITicks) > CommandProcessor::m_cpClockOrigin)
{
u32 readPtr = fifo.CPReadPointer;
Expand All @@ -160,9 +155,6 @@ void RunGpuLoop()
else
readPtr += 32;

_assert_msg_(COMMANDPROCESSOR, (s32)fifo.CPReadWriteDistance - 32 >= 0 ,
"Negative fifo.CPReadWriteDistance = %i in FIFO Loop !\nThat can produce instability in the game. Please report it.", fifo.CPReadWriteDistance - 32);

ReadDataFromFifo(uData, 32);

cyclesExecuted = OpcodeDecoder_Run(g_bSkipCurrentFrame);
Expand All @@ -174,7 +166,6 @@ void RunGpuLoop()
Common::AtomicStore(fifo.SafeCPReadPointer, fifo.CPReadPointer);

Common::AtomicStore(fifo.CPReadPointer, readPtr);
Common::AtomicAdd(fifo.CPReadWriteDistance, -32);
}

CommandProcessor::SetCpStatus();
Expand All @@ -183,11 +174,8 @@ void RunGpuLoop()
// If we don't, s_swapRequested or s_efbAccessRequested won't be set to false
// leading the CPU thread to wait in Video_BeginField or Video_AccessEFB thus slowing things down.
VideoFifo_CheckAsyncRequest();
CommandProcessor::isPossibleWaitingSetDrawDone = false;
}

Common::AtomicStore(CommandProcessor::gpuBusy, 0);

if (EmuRunningState)
{
// NOTE(jsd): Calling SwitchToThread() on Windows 7 x64 is a hot spot, according to profiler.
Expand Down Expand Up @@ -227,7 +215,7 @@ bool AtBreakpointCpu()
void RunGpu()
{
SCPFifoStruct &fifo = *CommandProcessor::gpuFifo;
while (fifo.bFF_GPReadEnable && fifo.CPReadWriteDistance && !AtBreakpointGpu() )
while (CommandProcessor::GPUHasWork())
{
u8 *uData = Memory::GetPointer(fifo.CPReadPointer);

Expand All @@ -243,8 +231,6 @@ void RunGpu()
fifo.CPReadPointer = fifo.CPBase;
else
fifo.CPReadPointer += 32;

fifo.CPReadWriteDistance -= 32;
}
CommandProcessor::SetCpStatus();
}
3 changes: 1 addition & 2 deletions Source/Core/VideoCommon/Src/MainBase.cpp
Expand Up @@ -298,8 +298,7 @@ void VideoBackendHardware::Video_GatherPipeBursted()
bool VideoBackendHardware::Video_IsPossibleWaitingSetDrawDone()
{
// this is called from Idle
CommandProcessor::SyncGPUIfIdleOnly();
return CommandProcessor::isPossibleWaitingSetDrawDone;
return CommandProcessor::IsPossibleWaitingSetDrawDone();
}

bool VideoBackendHardware::Video_IsHiWatermarkActive()
Expand Down

0 comments on commit c9845ea

Please sign in to comment.