Large diffs are not rendered by default.

@@ -72,7 +72,7 @@ void Interpreter::lfd(UGeckoInstruction inst)
const u64 temp = PowerPC::Read_U64(address);

if (!(PowerPC::ppcState.Exceptions & EXCEPTION_DSI))
riPS0(inst.FD) = temp;
rPS(inst.FD).SetPS0(temp);
}

void Interpreter::lfdu(UGeckoInstruction inst)
@@ -89,7 +89,7 @@ void Interpreter::lfdu(UGeckoInstruction inst)

if (!(PowerPC::ppcState.Exceptions & EXCEPTION_DSI))
{
riPS0(inst.FD) = temp;
rPS(inst.FD).SetPS0(temp);
rGPR[inst.RA] = address;
}
}
@@ -108,7 +108,7 @@ void Interpreter::lfdux(UGeckoInstruction inst)

if (!(PowerPC::ppcState.Exceptions & EXCEPTION_DSI))
{
riPS0(inst.FD) = temp;
rPS(inst.FD).SetPS0(temp);
rGPR[inst.RA] = address;
}
}
@@ -126,7 +126,7 @@ void Interpreter::lfdx(UGeckoInstruction inst)
const u64 temp = PowerPC::Read_U64(address);

if (!(PowerPC::ppcState.Exceptions & EXCEPTION_DSI))
riPS0(inst.FD) = temp;
rPS(inst.FD).SetPS0(temp);
}

void Interpreter::lfs(UGeckoInstruction inst)
@@ -144,8 +144,7 @@ void Interpreter::lfs(UGeckoInstruction inst)
if (!(PowerPC::ppcState.Exceptions & EXCEPTION_DSI))
{
const u64 value = ConvertToDouble(temp);
riPS0(inst.FD) = value;
riPS1(inst.FD) = value;
rPS(inst.FD).Fill(value);
}
}

@@ -164,8 +163,7 @@ void Interpreter::lfsu(UGeckoInstruction inst)
if (!(PowerPC::ppcState.Exceptions & EXCEPTION_DSI))
{
const u64 value = ConvertToDouble(temp);
riPS0(inst.FD) = value;
riPS1(inst.FD) = value;
rPS(inst.FD).Fill(value);
rGPR[inst.RA] = address;
}
}
@@ -184,9 +182,8 @@ void Interpreter::lfsux(UGeckoInstruction inst)

if (!(PowerPC::ppcState.Exceptions & EXCEPTION_DSI))
{
u64 value = ConvertToDouble(temp);
riPS0(inst.FD) = value;
riPS1(inst.FD) = value;
const u64 value = ConvertToDouble(temp);
rPS(inst.FD).Fill(value);
rGPR[inst.RA] = address;
}
}
@@ -206,8 +203,7 @@ void Interpreter::lfsx(UGeckoInstruction inst)
if (!(PowerPC::ppcState.Exceptions & EXCEPTION_DSI))
{
const u64 value = ConvertToDouble(temp);
riPS0(inst.FD) = value;
riPS1(inst.FD) = value;
rPS(inst.FD).Fill(value);
}
}

@@ -355,7 +351,7 @@ void Interpreter::stfd(UGeckoInstruction inst)
return;
}

PowerPC::Write_U64(riPS0(inst.FS), address);
PowerPC::Write_U64(rPS(inst.FS).PS0AsU64(), address);
}

void Interpreter::stfdu(UGeckoInstruction inst)
@@ -368,7 +364,7 @@ void Interpreter::stfdu(UGeckoInstruction inst)
return;
}

PowerPC::Write_U64(riPS0(inst.FS), address);
PowerPC::Write_U64(rPS(inst.FS).PS0AsU64(), address);
if (!(PowerPC::ppcState.Exceptions & EXCEPTION_DSI))
{
rGPR[inst.RA] = address;
@@ -385,7 +381,7 @@ void Interpreter::stfs(UGeckoInstruction inst)
return;
}

PowerPC::Write_U32(ConvertToSingle(riPS0(inst.FS)), address);
PowerPC::Write_U32(ConvertToSingle(rPS(inst.FS).PS0AsU64()), address);
}

void Interpreter::stfsu(UGeckoInstruction inst)
@@ -398,7 +394,7 @@ void Interpreter::stfsu(UGeckoInstruction inst)
return;
}

PowerPC::Write_U32(ConvertToSingle(riPS0(inst.FS)), address);
PowerPC::Write_U32(ConvertToSingle(rPS(inst.FS).PS0AsU64()), address);
if (!(PowerPC::ppcState.Exceptions & EXCEPTION_DSI))
{
rGPR[inst.RA] = address;
@@ -761,7 +757,7 @@ void Interpreter::stfdux(UGeckoInstruction inst)
return;
}

PowerPC::Write_U64(riPS0(inst.FS), address);
PowerPC::Write_U64(rPS(inst.FS).PS0AsU64(), address);
if (!(PowerPC::ppcState.Exceptions & EXCEPTION_DSI))
{
rGPR[inst.RA] = address;
@@ -778,7 +774,7 @@ void Interpreter::stfdx(UGeckoInstruction inst)
return;
}

PowerPC::Write_U64(riPS0(inst.FS), address);
PowerPC::Write_U64(rPS(inst.FS).PS0AsU64(), address);
}

// Stores Floating points into Integers indeXed
@@ -792,7 +788,7 @@ void Interpreter::stfiwx(UGeckoInstruction inst)
return;
}

PowerPC::Write_U32((u32)riPS0(inst.FS), address);
PowerPC::Write_U32(rPS(inst.FS).PS0AsU32(), address);
}

void Interpreter::stfsux(UGeckoInstruction inst)
@@ -805,7 +801,7 @@ void Interpreter::stfsux(UGeckoInstruction inst)
return;
}

PowerPC::Write_U32(ConvertToSingle(riPS0(inst.FS)), address);
PowerPC::Write_U32(ConvertToSingle(rPS(inst.FS).PS0AsU64()), address);
if (!(PowerPC::ppcState.Exceptions & EXCEPTION_DSI))
{
rGPR[inst.RA] = address;
@@ -822,7 +818,7 @@ void Interpreter::stfsx(UGeckoInstruction inst)
return;
}

PowerPC::Write_U32(ConvertToSingle(riPS0(inst.FS)), address);
PowerPC::Write_U32(ConvertToSingle(rPS(inst.FS).PS0AsU64()), address);
}

void Interpreter::sthbrx(UGeckoInstruction inst)
@@ -176,8 +176,8 @@ void Interpreter::Helper_Quantize(u32 addr, u32 instI, u32 instRS, u32 instW)
const EQuantizeType stType = gqr.st_type;
const unsigned int stScale = gqr.st_scale;

const double ps0 = rPS0(instRS);
const double ps1 = rPS1(instRS);
const double ps0 = rPS(instRS).PS0AsDouble();
const double ps1 = rPS(instRS).PS1AsDouble();

switch (stType)
{
@@ -301,8 +301,7 @@ void Interpreter::Helper_Dequantize(u32 addr, u32 instI, u32 instRD, u32 instW)
return;
}

rPS0(instRD) = ps0;
rPS1(instRD) = ps1;
rPS(instRD).SetBoth(ps0, ps1);
}

void Interpreter::psq_l(UGeckoInstruction inst)

Large diffs are not rendered by default.

@@ -98,7 +98,7 @@ void Interpreter::mtfsfx(UGeckoInstruction inst)
m |= (0xFU << (i * 4));
}

FPSCR = (FPSCR.Hex & ~m) | (static_cast<u32>(riPS0(inst.FB)) & m);
FPSCR = (FPSCR.Hex & ~m) | (static_cast<u32>(rPS(inst.FB).PS0AsU64()) & m);
FPSCRtoFPUSettings(FPSCR);

if (inst.Rc)
@@ -555,7 +555,7 @@ void Interpreter::mffsx(UGeckoInstruction inst)
// TODO(ector): grab all overflow flags etc and set them in FPSCR

UpdateFPSCR();
riPS0(inst.FD) = 0xFFF8000000000000 | FPSCR.Hex;
rPS(inst.FD).SetPS0(UINT64_C(0xFFF8000000000000) | FPSCR.Hex);

if (inst.Rc)
Helper_UpdateCR1();
@@ -34,7 +34,7 @@ const X64Reg* FPURegCache::GetAllocationOrder(size_t* count) const

OpArg FPURegCache::GetDefaultLocation(preg_t preg) const
{
return PPCSTATE(ps[preg][0]);
return PPCSTATE(ps[preg].ps0);
}

BitSet32 FPURegCache::GetRegUtilization() const
@@ -452,7 +452,7 @@ ARM64Reg Arm64FPRCache::R(size_t preg, RegType type)
// Load the high 64bits from the file and insert them in to the high 64bits of the host
// register
ARM64Reg tmp_reg = GetReg();
m_float_emit->LDR(64, INDEX_UNSIGNED, tmp_reg, PPC_REG, PPCSTATE_OFF(ps[preg][1]));
m_float_emit->LDR(64, INDEX_UNSIGNED, tmp_reg, PPC_REG, PPCSTATE_OFF(ps[preg].ps1));
m_float_emit->INS(64, host_reg, 1, tmp_reg, 0);
UnlockRegister(tmp_reg);

@@ -506,7 +506,7 @@ ARM64Reg Arm64FPRCache::R(size_t preg, RegType type)
reg.Load(host_reg, REG_LOWER_PAIR);
}
reg.SetDirty(false);
m_float_emit->LDR(load_size, INDEX_UNSIGNED, host_reg, PPC_REG, PPCSTATE_OFF(ps[preg][0]));
m_float_emit->LDR(load_size, INDEX_UNSIGNED, host_reg, PPC_REG, PPCSTATE_OFF(ps[preg].ps0));
return host_reg;
}
default:
@@ -554,15 +554,15 @@ ARM64Reg Arm64FPRCache::RW(size_t preg, RegType type)
// We are doing a full 128bit store because it takes 2 cycles on a Cortex-A57 to do a 128bit
// store.
// It would take longer to do an insert to a temporary and a 64bit store than to just do this.
m_float_emit->STR(128, INDEX_UNSIGNED, flush_reg, PPC_REG, PPCSTATE_OFF(ps[preg][0]));
m_float_emit->STR(128, INDEX_UNSIGNED, flush_reg, PPC_REG, PPCSTATE_OFF(ps[preg].ps0));
break;
case REG_DUP_SINGLE:
flush_reg = GetReg();
m_float_emit->FCVT(64, 32, EncodeRegToDouble(flush_reg), EncodeRegToDouble(host_reg));
// fall through
case REG_DUP:
// Store PSR1 (which is equal to PSR0) in memory.
m_float_emit->STR(64, INDEX_UNSIGNED, flush_reg, PPC_REG, PPCSTATE_OFF(ps[preg][1]));
m_float_emit->STR(64, INDEX_UNSIGNED, flush_reg, PPC_REG, PPCSTATE_OFF(ps[preg].ps1));
break;
default:
// All other types doesn't store anything in PSR1.
@@ -687,7 +687,7 @@ void Arm64FPRCache::FlushRegister(size_t preg, bool maintain_state)
store_size = 64;

if (dirty)
m_float_emit->STR(store_size, INDEX_UNSIGNED, host_reg, PPC_REG, PPCSTATE_OFF(ps[preg][0]));
m_float_emit->STR(store_size, INDEX_UNSIGNED, host_reg, PPC_REG, PPCSTATE_OFF(ps[preg].ps0));

if (!maintain_state)
{
@@ -702,9 +702,9 @@ void Arm64FPRCache::FlushRegister(size_t preg, bool maintain_state)
// If the paired registers were at the start of ppcState we could do an STP here.
// Too bad moving them would break savestate compatibility between x86_64 and AArch64
// m_float_emit->STP(64, INDEX_SIGNED, host_reg, host_reg, PPC_REG,
// PPCSTATE_OFF(ps[preg][0]));
m_float_emit->STR(64, INDEX_UNSIGNED, host_reg, PPC_REG, PPCSTATE_OFF(ps[preg][0]));
m_float_emit->STR(64, INDEX_UNSIGNED, host_reg, PPC_REG, PPCSTATE_OFF(ps[preg][1]));
// PPCSTATE_OFF(ps[preg].ps0));
m_float_emit->STR(64, INDEX_UNSIGNED, host_reg, PPC_REG, PPCSTATE_OFF(ps[preg].ps0));
m_float_emit->STR(64, INDEX_UNSIGNED, host_reg, PPC_REG, PPCSTATE_OFF(ps[preg].ps1));
}

if (!maintain_state)
@@ -24,7 +24,7 @@ static const Arm64Gen::ARM64Reg DISPATCHER_PC =

// Some asserts to make sure we will be able to load everything
static_assert(PPCSTATE_OFF(spr[1023]) <= 16380, "LDR(32bit) can't reach the last SPR");
static_assert((PPCSTATE_OFF(ps[0][0]) % 8) == 0,
static_assert((PPCSTATE_OFF(ps[0].ps0) % 8) == 0,
"LDR(64bit VFP) requires FPRs to be 8 byte aligned");
static_assert(PPCSTATE_OFF(xer_ca) < 4096, "STRB can't store xer_ca!");
static_assert(PPCSTATE_OFF(xer_so_ov) < 4096, "STRB can't store xer_so_ov!");
@@ -4,13 +4,15 @@

#include "Core/PowerPC/PowerPC.h"

#include <algorithm>
#include <cstring>
#include <istream>
#include <ostream>
#include <type_traits>
#include <vector>

#include "Common/Assert.h"
#include "Common/BitUtils.h"
#include "Common/ChunkFile.h"
#include "Common/CommonTypes.h"
#include "Common/FPURoundMode.h"
@@ -42,6 +44,27 @@ MemChecks memchecks;
PPCDebugInterface debug_interface;

static CoreTiming::EventType* s_invalidate_cache_thread_safe;

double PairedSingle::PS0AsDouble() const
{
return Common::BitCast<double>(ps0);
}

double PairedSingle::PS1AsDouble() const
{
return Common::BitCast<double>(ps1);
}

void PairedSingle::SetPS0(double value)
{
ps0 = Common::BitCast<u64>(value);
}

void PairedSingle::SetPS1(double value)
{
ps1 = Common::BitCast<u64>(value);
}

static void InvalidateCacheThreadSafe(u64 userdata, s64 cyclesLate)
{
ppcState.iCache.Invalidate(static_cast<u32>(userdata));
@@ -135,10 +158,11 @@ void DoState(PointerWrap& p)

static void ResetRegisters()
{
memset(ppcState.ps, 0, sizeof(ppcState.ps));
memset(ppcState.sr, 0, sizeof(ppcState.sr));
memset(ppcState.gpr, 0, sizeof(ppcState.gpr));
memset(ppcState.spr, 0, sizeof(ppcState.spr));
std::fill(std::begin(ppcState.ps), std::end(ppcState.ps), PairedSingle{});
std::fill(std::begin(ppcState.sr), std::end(ppcState.sr), 0U);
std::fill(std::begin(ppcState.gpr), std::end(ppcState.gpr), 0U);
std::fill(std::begin(ppcState.spr), std::end(ppcState.spr), 0U);

/*
0x00080200 = lonestar 2.0
0x00088202 = lonestar 2.2
@@ -8,6 +8,7 @@
#include <cstddef>
#include <iosfwd>
#include <tuple>
#include <type_traits>
#include <vector>

#include "Common/CommonTypes.h"
@@ -57,6 +58,43 @@ struct TLBEntry
u8 recent = 0;
};

struct PairedSingle
{
u64 PS0AsU64() const { return ps0; }
u64 PS1AsU64() const { return ps1; }

u32 PS0AsU32() const { return static_cast<u32>(ps0); }
u32 PS1AsU32() const { return static_cast<u32>(ps1); }

double PS0AsDouble() const;
double PS1AsDouble() const;

void SetPS0(u64 value) { ps0 = value; }
void SetPS0(double value);

void SetPS1(u64 value) { ps1 = value; }
void SetPS1(double value);

void SetBoth(u64 lhs, u64 rhs)
{
SetPS0(lhs);
SetPS1(rhs);
}
void SetBoth(double lhs, double rhs)
{
SetPS0(lhs);
SetPS1(rhs);
}

void Fill(u64 value) { SetBoth(value, value); }
void Fill(double value) { SetBoth(value, value); }

u64 ps0 = 0;
u64 ps1 = 0;
};
// Paired single must be standard layout in order for offsetof to work, which is used by the JITs
static_assert(std::is_standard_layout<PairedSingle>(), "PairedSingle must be standard layout");

// This contains the entire state of the emulated PowerPC "Gekko" CPU.
struct PowerPCState
{
@@ -114,7 +152,7 @@ struct PowerPCState
// The paired singles are strange : PS0 is stored in the full 64 bits of each FPR
// but ps calculations are only done in 32-bit precision, and PS1 is only 32 bits.
// Since we want to use SIMD, SSE2 is the only viable alternative - 2x double.
alignas(16) u64 ps[32][2];
alignas(16) PairedSingle ps[32];

u32 sr[16]; // Segment registers.

@@ -212,11 +250,7 @@ void UpdatePerformanceMonitor(u32 cycles, u32 num_load_stores, u32 num_fp_inst);
#define TL PowerPC::ppcState.spr[SPR_TL]
#define TU PowerPC::ppcState.spr[SPR_TU]

#define rPS0(i) (*(double*)(&PowerPC::ppcState.ps[i][0]))
#define rPS1(i) (*(double*)(&PowerPC::ppcState.ps[i][1]))

#define riPS0(i) (*(u64*)(&PowerPC::ppcState.ps[i][0]))
#define riPS1(i) (*(u64*)(&PowerPC::ppcState.ps[i][1]))
#define rPS(i) (PowerPC::ppcState.ps[(i)])

enum CRBits
{
@@ -227,11 +227,11 @@ void RegisterWidget::PopulateTable()
[i](u64 value) { GPR(i) = value; });

// Floating point registers (double)
AddRegister(i, 2, RegisterType::fpr, "f" + std::to_string(i), [i] { return riPS0(i); },
[i](u64 value) { riPS0(i) = value; });
AddRegister(i, 2, RegisterType::fpr, "f" + std::to_string(i), [i] { return rPS(i).PS0AsU64(); },
[i](u64 value) { rPS(i).SetPS0(value); });

AddRegister(i, 4, RegisterType::fpr, "", [i] { return riPS1(i); },
[i](u64 value) { riPS1(i) = value; });
AddRegister(i, 4, RegisterType::fpr, "", [i] { return rPS(i).PS1AsU64(); },
[i](u64 value) { rPS(i).SetPS1(value); });
}

for (int i = 0; i < 8; i++)