Expand Up
@@ -502,18 +502,19 @@ void JitArm64::GenerateFPRF(bool single)
void JitArm64::GenerateQuantizedLoads ()
{
// X0 is the address
// X1 is the scale
// X2 is a temporary
// X0 is a temporary
// X1 is the address
// X2 is the scale
// X3 is a temporary (used in EmitBackpatchRoutine)
// X30 is LR
// Q0 is the return
// Q1 is a temporary
ARM64Reg addr_reg = ARM64Reg::X0;
ARM64Reg scale_reg = ARM64Reg::X1;
BitSet32 gprs_to_push = CALLER_SAVED_GPRS & ~BitSet32{2 , 3 };
ARM64Reg temp_reg = ARM64Reg::X0;
ARM64Reg addr_reg = ARM64Reg::X1;
ARM64Reg scale_reg = ARM64Reg::X2;
BitSet32 gprs_to_push = CALLER_SAVED_GPRS & ~BitSet32{0 , 3 };
if (!jo.memcheck )
gprs_to_push &= ~BitSet32{0 };
gprs_to_push &= ~BitSet32{1 };
BitSet32 fprs_to_push = BitSet32 (0xFFFFFFFF ) & ~BitSet32{0 , 1 };
ARM64FloatEmitter float_emit (this );
Expand All
@@ -526,7 +527,7 @@ void JitArm64::GenerateQuantizedLoads()
BackPatchInfo::FLAG_PAIR | BackPatchInfo::FLAG_SIZE_32;
EmitBackpatchRoutine (flags, MemAccessMode::Auto, ARM64Reg::D0, addr_reg,
gprs_to_push & ~BitSet32{1 }, fprs_to_push, true );
gprs_to_push & ~BitSet32{DecodeReg (scale_reg) }, fprs_to_push, true );
RET (ARM64Reg::X30);
}
Expand All
@@ -542,8 +543,8 @@ void JitArm64::GenerateQuantizedLoads()
float_emit.UXTL (16 , ARM64Reg::D0, ARM64Reg::D0);
float_emit.UCVTF (32 , ARM64Reg::D0, ARM64Reg::D0);
const s32 load_offset = MOVPage2R (ARM64Reg::X2 , &m_dequantizeTableS);
ADD (scale_reg, ARM64Reg::X2 , scale_reg, ArithOption (scale_reg, ShiftType::LSL, 3 ));
const s32 load_offset = MOVPage2R (ARM64Reg::X0 , &m_dequantizeTableS);
ADD (scale_reg, ARM64Reg::X0 , scale_reg, ArithOption (scale_reg, ShiftType::LSL, 3 ));
float_emit.LDR (32 , IndexType::Unsigned, ARM64Reg::D1, scale_reg, load_offset);
float_emit.FMUL (32 , ARM64Reg::D0, ARM64Reg::D0, ARM64Reg::D1, 0 );
RET (ARM64Reg::X30);
Expand All
@@ -560,8 +561,8 @@ void JitArm64::GenerateQuantizedLoads()
float_emit.SXTL (16 , ARM64Reg::D0, ARM64Reg::D0);
float_emit.SCVTF (32 , ARM64Reg::D0, ARM64Reg::D0);
const s32 load_offset = MOVPage2R (ARM64Reg::X2 , &m_dequantizeTableS);
ADD (scale_reg, ARM64Reg::X2 , scale_reg, ArithOption (scale_reg, ShiftType::LSL, 3 ));
const s32 load_offset = MOVPage2R (temp_reg , &m_dequantizeTableS);
ADD (scale_reg, temp_reg , scale_reg, ArithOption (scale_reg, ShiftType::LSL, 3 ));
float_emit.LDR (32 , IndexType::Unsigned, ARM64Reg::D1, scale_reg, load_offset);
float_emit.FMUL (32 , ARM64Reg::D0, ARM64Reg::D0, ARM64Reg::D1, 0 );
RET (ARM64Reg::X30);
Expand All
@@ -577,8 +578,8 @@ void JitArm64::GenerateQuantizedLoads()
float_emit.UXTL (16 , ARM64Reg::D0, ARM64Reg::D0);
float_emit.UCVTF (32 , ARM64Reg::D0, ARM64Reg::D0);
const s32 load_offset = MOVPage2R (ARM64Reg::X2 , &m_dequantizeTableS);
ADD (scale_reg, ARM64Reg::X2 , scale_reg, ArithOption (scale_reg, ShiftType::LSL, 3 ));
const s32 load_offset = MOVPage2R (temp_reg , &m_dequantizeTableS);
ADD (scale_reg, temp_reg , scale_reg, ArithOption (scale_reg, ShiftType::LSL, 3 ));
float_emit.LDR (32 , IndexType::Unsigned, ARM64Reg::D1, scale_reg, load_offset);
float_emit.FMUL (32 , ARM64Reg::D0, ARM64Reg::D0, ARM64Reg::D1, 0 );
RET (ARM64Reg::X30);
Expand All
@@ -594,8 +595,8 @@ void JitArm64::GenerateQuantizedLoads()
float_emit.SXTL (16 , ARM64Reg::D0, ARM64Reg::D0);
float_emit.SCVTF (32 , ARM64Reg::D0, ARM64Reg::D0);
const s32 load_offset = MOVPage2R (ARM64Reg::X2 , &m_dequantizeTableS);
ADD (scale_reg, ARM64Reg::X2 , scale_reg, ArithOption (scale_reg, ShiftType::LSL, 3 ));
const s32 load_offset = MOVPage2R (temp_reg , &m_dequantizeTableS);
ADD (scale_reg, temp_reg , scale_reg, ArithOption (scale_reg, ShiftType::LSL, 3 ));
float_emit.LDR (32 , IndexType::Unsigned, ARM64Reg::D1, scale_reg, load_offset);
float_emit.FMUL (32 , ARM64Reg::D0, ARM64Reg::D0, ARM64Reg::D1, 0 );
RET (ARM64Reg::X30);
Expand All
@@ -607,7 +608,7 @@ void JitArm64::GenerateQuantizedLoads()
BackPatchInfo::FLAG_LOAD | BackPatchInfo::FLAG_FLOAT | BackPatchInfo::FLAG_SIZE_32;
EmitBackpatchRoutine (flags, MemAccessMode::Auto, ARM64Reg::D0, addr_reg,
gprs_to_push & ~BitSet32{1 }, fprs_to_push, true );
gprs_to_push & ~BitSet32{DecodeReg (scale_reg) }, fprs_to_push, true );
RET (ARM64Reg::X30);
}
Expand All
@@ -623,8 +624,8 @@ void JitArm64::GenerateQuantizedLoads()
float_emit.UXTL (16 , ARM64Reg::D0, ARM64Reg::D0);
float_emit.UCVTF (32 , ARM64Reg::D0, ARM64Reg::D0);
const s32 load_offset = MOVPage2R (ARM64Reg::X2 , &m_dequantizeTableS);
ADD (scale_reg, ARM64Reg::X2 , scale_reg, ArithOption (scale_reg, ShiftType::LSL, 3 ));
const s32 load_offset = MOVPage2R (temp_reg , &m_dequantizeTableS);
ADD (scale_reg, temp_reg , scale_reg, ArithOption (scale_reg, ShiftType::LSL, 3 ));
float_emit.LDR (32 , IndexType::Unsigned, ARM64Reg::D1, scale_reg, load_offset);
float_emit.FMUL (32 , ARM64Reg::D0, ARM64Reg::D0, ARM64Reg::D1, 0 );
RET (ARM64Reg::X30);
Expand All
@@ -641,8 +642,8 @@ void JitArm64::GenerateQuantizedLoads()
float_emit.SXTL (16 , ARM64Reg::D0, ARM64Reg::D0);
float_emit.SCVTF (32 , ARM64Reg::D0, ARM64Reg::D0);
const s32 load_offset = MOVPage2R (ARM64Reg::X2 , &m_dequantizeTableS);
ADD (scale_reg, ARM64Reg::X2 , scale_reg, ArithOption (scale_reg, ShiftType::LSL, 3 ));
const s32 load_offset = MOVPage2R (temp_reg , &m_dequantizeTableS);
ADD (scale_reg, temp_reg , scale_reg, ArithOption (scale_reg, ShiftType::LSL, 3 ));
float_emit.LDR (32 , IndexType::Unsigned, ARM64Reg::D1, scale_reg, load_offset);
float_emit.FMUL (32 , ARM64Reg::D0, ARM64Reg::D0, ARM64Reg::D1, 0 );
RET (ARM64Reg::X30);
Expand All
@@ -658,8 +659,8 @@ void JitArm64::GenerateQuantizedLoads()
float_emit.UXTL (16 , ARM64Reg::D0, ARM64Reg::D0);
float_emit.UCVTF (32 , ARM64Reg::D0, ARM64Reg::D0);
const s32 load_offset = MOVPage2R (ARM64Reg::X2 , &m_dequantizeTableS);
ADD (scale_reg, ARM64Reg::X2 , scale_reg, ArithOption (scale_reg, ShiftType::LSL, 3 ));
const s32 load_offset = MOVPage2R (temp_reg , &m_dequantizeTableS);
ADD (scale_reg, temp_reg , scale_reg, ArithOption (scale_reg, ShiftType::LSL, 3 ));
float_emit.LDR (32 , IndexType::Unsigned, ARM64Reg::D1, scale_reg, load_offset);
float_emit.FMUL (32 , ARM64Reg::D0, ARM64Reg::D0, ARM64Reg::D1, 0 );
RET (ARM64Reg::X30);
Expand All
@@ -675,8 +676,8 @@ void JitArm64::GenerateQuantizedLoads()
float_emit.SXTL (16 , ARM64Reg::D0, ARM64Reg::D0);
float_emit.SCVTF (32 , ARM64Reg::D0, ARM64Reg::D0);
const s32 load_offset = MOVPage2R (ARM64Reg::X2 , &m_dequantizeTableS);
ADD (scale_reg, ARM64Reg::X2 , scale_reg, ArithOption (scale_reg, ShiftType::LSL, 3 ));
const s32 load_offset = MOVPage2R (temp_reg , &m_dequantizeTableS);
ADD (scale_reg, temp_reg , scale_reg, ArithOption (scale_reg, ShiftType::LSL, 3 ));
float_emit.LDR (32 , IndexType::Unsigned, ARM64Reg::D1, scale_reg, load_offset);
float_emit.FMUL (32 , ARM64Reg::D0, ARM64Reg::D0, ARM64Reg::D1, 0 );
RET (ARM64Reg::X30);
Expand Down
Expand Up
@@ -711,18 +712,19 @@ void JitArm64::GenerateQuantizedLoads()
void JitArm64::GenerateQuantizedStores ()
{
// X0 is the scale
// X1 is the address
// X2 is a temporary
// X0 is a temporary
// X1 is the scale
// X2 is the address
// X3 is a temporary if jo.fastmem is false (used in EmitBackpatchRoutine)
// X30 is LR
// Q0 is the register
// Q1 is a temporary
ARM64Reg scale_reg = ARM64Reg::X0;
ARM64Reg addr_reg = ARM64Reg::X1;
BitSet32 gprs_to_push = CALLER_SAVED_GPRS & ~BitSet32{0 , 2 };
ARM64Reg temp_reg = ARM64Reg::X0;
ARM64Reg scale_reg = ARM64Reg::X1;
ARM64Reg addr_reg = ARM64Reg::X2;
BitSet32 gprs_to_push = CALLER_SAVED_GPRS & ~BitSet32{0 , 1 };
if (!jo.memcheck )
gprs_to_push &= ~BitSet32{1 };
gprs_to_push &= ~BitSet32{2 };
if (!jo.fastmem )
gprs_to_push &= ~BitSet32{3 };
BitSet32 fprs_to_push = BitSet32 (0xFFFFFFFF ) & ~BitSet32{0 , 1 };
Expand All
@@ -743,8 +745,8 @@ void JitArm64::GenerateQuantizedStores()
}
const u8* storePairedU8 = GetCodePtr ();
{
const s32 load_offset = MOVPage2R (ARM64Reg::X2 , &m_quantizeTableS);
ADD (scale_reg, ARM64Reg::X2 , scale_reg, ArithOption (scale_reg, ShiftType::LSL, 3 ));
const s32 load_offset = MOVPage2R (temp_reg , &m_quantizeTableS);
ADD (scale_reg, temp_reg , scale_reg, ArithOption (scale_reg, ShiftType::LSL, 3 ));
float_emit.LDR (32 , IndexType::Unsigned, ARM64Reg::D1, scale_reg, load_offset);
float_emit.FMUL (32 , ARM64Reg::D0, ARM64Reg::D0, ARM64Reg::D1, 0 );
Expand All
@@ -762,8 +764,8 @@ void JitArm64::GenerateQuantizedStores()
}
const u8* storePairedS8 = GetCodePtr ();
{
const s32 load_offset = MOVPage2R (ARM64Reg::X2 , &m_quantizeTableS);
ADD (scale_reg, ARM64Reg::X2 , scale_reg, ArithOption (scale_reg, ShiftType::LSL, 3 ));
const s32 load_offset = MOVPage2R (temp_reg , &m_quantizeTableS);
ADD (scale_reg, temp_reg , scale_reg, ArithOption (scale_reg, ShiftType::LSL, 3 ));
float_emit.LDR (32 , IndexType::Unsigned, ARM64Reg::D1, scale_reg, load_offset);
float_emit.FMUL (32 , ARM64Reg::D0, ARM64Reg::D0, ARM64Reg::D1, 0 );
Expand All
@@ -781,8 +783,8 @@ void JitArm64::GenerateQuantizedStores()
}
const u8* storePairedU16 = GetCodePtr ();
{
const s32 load_offset = MOVPage2R (ARM64Reg::X2 , &m_quantizeTableS);
ADD (scale_reg, ARM64Reg::X2 , scale_reg, ArithOption (scale_reg, ShiftType::LSL, 3 ));
const s32 load_offset = MOVPage2R (temp_reg , &m_quantizeTableS);
ADD (scale_reg, temp_reg , scale_reg, ArithOption (scale_reg, ShiftType::LSL, 3 ));
float_emit.LDR (32 , IndexType::Unsigned, ARM64Reg::D1, scale_reg, load_offset);
float_emit.FMUL (32 , ARM64Reg::D0, ARM64Reg::D0, ARM64Reg::D1, 0 );
Expand All
@@ -799,8 +801,8 @@ void JitArm64::GenerateQuantizedStores()
}
const u8* storePairedS16 = GetCodePtr (); // Used by Viewtiful Joe's intro movie
{
const s32 load_offset = MOVPage2R (ARM64Reg::X2 , &m_quantizeTableS);
ADD (scale_reg, ARM64Reg::X2 , scale_reg, ArithOption (scale_reg, ShiftType::LSL, 3 ));
const s32 load_offset = MOVPage2R (temp_reg , &m_quantizeTableS);
ADD (scale_reg, temp_reg , scale_reg, ArithOption (scale_reg, ShiftType::LSL, 3 ));
float_emit.LDR (32 , IndexType::Unsigned, ARM64Reg::D1, scale_reg, load_offset);
float_emit.FMUL (32 , ARM64Reg::D0, ARM64Reg::D0, ARM64Reg::D1, 0 );
Expand Down
Expand Up
@@ -828,8 +830,8 @@ void JitArm64::GenerateQuantizedStores()
}
const u8* storeSingleU8 = GetCodePtr (); // Used by MKWii
{
const s32 load_offset = MOVPage2R (ARM64Reg::X2 , &m_quantizeTableS);
ADD (scale_reg, ARM64Reg::X2 , scale_reg, ArithOption (scale_reg, ShiftType::LSL, 3 ));
const s32 load_offset = MOVPage2R (temp_reg , &m_quantizeTableS);
ADD (scale_reg, temp_reg , scale_reg, ArithOption (scale_reg, ShiftType::LSL, 3 ));
float_emit.LDR (32 , IndexType::Unsigned, ARM64Reg::D1, scale_reg, load_offset);
float_emit.FMUL (32 , ARM64Reg::D0, ARM64Reg::D0, ARM64Reg::D1);
Expand All
@@ -847,8 +849,8 @@ void JitArm64::GenerateQuantizedStores()
}
const u8* storeSingleS8 = GetCodePtr ();
{
const s32 load_offset = MOVPage2R (ARM64Reg::X2 , &m_quantizeTableS);
ADD (scale_reg, ARM64Reg::X2 , scale_reg, ArithOption (scale_reg, ShiftType::LSL, 3 ));
const s32 load_offset = MOVPage2R (temp_reg , &m_quantizeTableS);
ADD (scale_reg, temp_reg , scale_reg, ArithOption (scale_reg, ShiftType::LSL, 3 ));
float_emit.LDR (32 , IndexType::Unsigned, ARM64Reg::D1, scale_reg, load_offset);
float_emit.FMUL (32 , ARM64Reg::D0, ARM64Reg::D0, ARM64Reg::D1);
Expand All
@@ -866,8 +868,8 @@ void JitArm64::GenerateQuantizedStores()
}
const u8* storeSingleU16 = GetCodePtr (); // Used by MKWii
{
const s32 load_offset = MOVPage2R (ARM64Reg::X2 , &m_quantizeTableS);
ADD (scale_reg, ARM64Reg::X2 , scale_reg, ArithOption (scale_reg, ShiftType::LSL, 3 ));
const s32 load_offset = MOVPage2R (temp_reg , &m_quantizeTableS);
ADD (scale_reg, temp_reg , scale_reg, ArithOption (scale_reg, ShiftType::LSL, 3 ));
float_emit.LDR (32 , IndexType::Unsigned, ARM64Reg::D1, scale_reg, load_offset);
float_emit.FMUL (32 , ARM64Reg::D0, ARM64Reg::D0, ARM64Reg::D1);
Expand All
@@ -884,8 +886,8 @@ void JitArm64::GenerateQuantizedStores()
}
const u8* storeSingleS16 = GetCodePtr ();
{
const s32 load_offset = MOVPage2R (ARM64Reg::X2 , &m_quantizeTableS);
ADD (scale_reg, ARM64Reg::X2 , scale_reg, ArithOption (scale_reg, ShiftType::LSL, 3 ));
const s32 load_offset = MOVPage2R (temp_reg , &m_quantizeTableS);
ADD (scale_reg, temp_reg , scale_reg, ArithOption (scale_reg, ShiftType::LSL, 3 ));
float_emit.LDR (32 , IndexType::Unsigned, ARM64Reg::D1, scale_reg, load_offset);
float_emit.FMUL (32 , ARM64Reg::D0, ARM64Reg::D0, ARM64Reg::D1);
Expand Down