Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[AArch64] Vertex loader and things #2041

Merged
merged 12 commits into from Feb 17, 2015
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
241 changes: 208 additions & 33 deletions Source/Core/Common/Arm64Emitter.cpp

Large diffs are not rendered by default.

31 changes: 28 additions & 3 deletions Source/Core/Common/Arm64Emitter.h
Expand Up @@ -4,6 +4,8 @@

#pragma once

#include <functional>

#include "Common/ArmCommon.h"
#include "Common/BitSet.h"
#include "Common/CodeBlock.h"
Expand Down Expand Up @@ -76,8 +78,8 @@ enum ARM64Reg
};

inline bool Is64Bit(ARM64Reg reg) { return reg & 0x20; }
inline bool IsSingle(ARM64Reg reg) { return reg & 0x40; }
inline bool IsDouble(ARM64Reg reg) { return reg & 0x80; }
inline bool IsSingle(ARM64Reg reg) { return (reg & 0xC0) == 0x40; }
inline bool IsDouble(ARM64Reg reg) { return (reg & 0xC0) == 0x80; }
inline bool IsQuad(ARM64Reg reg) { return (reg & 0xC0) == 0xC0; }
inline bool IsVector(ARM64Reg reg) { return (reg & 0xC0) != 0; }
inline ARM64Reg DecodeReg(ARM64Reg reg) { return (ARM64Reg)(reg & 0x1F); }
Expand Down Expand Up @@ -332,6 +334,7 @@ class ARM64XEmitter
void EncodeLogicalImmInst(u32 op, ARM64Reg Rd, ARM64Reg Rn, u32 immr, u32 imms);
void EncodeLoadStorePair(u32 op, u32 load, IndexType type, ARM64Reg Rt, ARM64Reg Rt2, ARM64Reg Rn, s32 imm);
void EncodeAddressInst(u32 op, ARM64Reg Rd, s32 imm);
void EncodeLoadStoreUnscaled(u32 size, u32 op, ARM64Reg Rt, ARM64Reg Rn, s32 imm);

protected:
inline void Write32(u32 value)
Expand Down Expand Up @@ -477,6 +480,7 @@ class ARM64XEmitter
void MADD(ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm, ARM64Reg Ra);
void MSUB(ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm, ARM64Reg Ra);
void SMADDL(ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm, ARM64Reg Ra);
void SMULL(ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm);
void SMSUBL(ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm, ARM64Reg Ra);
void SMULH(ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm);
void UMADDL(ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm, ARM64Reg Ra);
Expand Down Expand Up @@ -582,6 +586,17 @@ class ARM64XEmitter
void LDRSW(ARM64Reg Rt, ARM64Reg Rn, ArithOption Rm);
void PRFM(ARM64Reg Rt, ARM64Reg Rn, ArithOption Rm);

// Load/Store register (unscaled offset)
void STURB(ARM64Reg Rt, ARM64Reg Rn, s32 imm);
void LDURB(ARM64Reg Rt, ARM64Reg Rn, s32 imm);
void LDURSB(ARM64Reg Rt, ARM64Reg Rn, s32 imm);
void STURH(ARM64Reg Rt, ARM64Reg Rn, s32 imm);
void LDURH(ARM64Reg Rt, ARM64Reg Rn, s32 imm);
void LDURSH(ARM64Reg Rt, ARM64Reg Rn, s32 imm);
void STUR(ARM64Reg Rt, ARM64Reg Rn, s32 imm);
void LDUR(ARM64Reg Rt, ARM64Reg Rn, s32 imm);
void LDURSW(ARM64Reg Rt, ARM64Reg Rn, s32 imm);

// Load/Store pair
void LDP(IndexType type, ARM64Reg Rt, ARM64Reg Rt2, ARM64Reg Rn, s32 imm);
void LDPSW(IndexType type, ARM64Reg Rt, ARM64Reg Rt2, ARM64Reg Rn, s32 imm);
Expand Down Expand Up @@ -630,6 +645,10 @@ class ARM64FloatEmitter
void LDR(u8 size, IndexType type, ARM64Reg Rt, ARM64Reg Rn, s32 imm);
void STR(u8 size, IndexType type, ARM64Reg Rt, ARM64Reg Rn, s32 imm);

// Loadstore unscaled
void LDUR(u8 size, ARM64Reg Rt, ARM64Reg Rn, s32 imm);
void STUR(u8 size, ARM64Reg Rt, ARM64Reg Rn, s32 imm);

// Loadstore single structure
void LD1(u8 size, ARM64Reg Rt, u8 index, ARM64Reg Rn);
void LD1(u8 size, ARM64Reg Rt, u8 index, ARM64Reg Rn, ARM64Reg Rm);
Expand All @@ -639,6 +658,7 @@ class ARM64FloatEmitter

// Loadstore multiple structure
void LD1(u8 size, u8 count, ARM64Reg Rt, ARM64Reg Rn);
void ST1(u8 size, u8 count, ARM64Reg Rt, ARM64Reg Rn);

// Scalar - 1 Source
void FABS(ARM64Reg Rd, ARM64Reg Rn);
Expand Down Expand Up @@ -723,6 +743,9 @@ class ARM64FloatEmitter
void SXTL(u8 src_size, ARM64Reg Rd, ARM64Reg Rn);
void UXTL(u8 src_size, ARM64Reg Rd, ARM64Reg Rn);

// vector x indexed element
void FMUL(u8 size, ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm, u8 index);

// ABI related
void ABI_PushRegisters(BitSet32 registers);
void ABI_PopRegisters(BitSet32 registers, BitSet32 ignore_mask = BitSet32(0));
Expand All @@ -748,6 +771,8 @@ class ARM64FloatEmitter
void EmitShiftImm(bool U, u32 immh, u32 immb, u32 opcode, ARM64Reg Rd, ARM64Reg Rn);
void EmitLoadStoreMultipleStructure(u32 size, bool L, u32 opcode, ARM64Reg Rt, ARM64Reg Rn);
void EmitScalar1Source(bool M, bool S, u32 type, u32 opcode, ARM64Reg Rd, ARM64Reg Rn);
void EmitVectorxElement(bool U, u32 size, bool L, u32 opcode, bool H, ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm);
void EmitLoadStoreUnscaled(u32 size, u32 op, ARM64Reg Rt, ARM64Reg Rn, s32 imm);
};

class ARM64CodeBlock : public CodeBlock<ARM64XEmitter>
Expand All @@ -756,7 +781,7 @@ class ARM64CodeBlock : public CodeBlock<ARM64XEmitter>
void PoisonMemory() override
{
u32* ptr = (u32*)region;
u32* maxptr = (u32*)region + region_size;
u32* maxptr = (u32*)(region + region_size);
// If our memory isn't a multiple of u32 then this won't write the last remaining bytes with anything
// Less than optimal, but there would be nothing we could do but throw a runtime warning anyway.
// AArch64: 0xD4200000 = BRK 0
Expand Down
2 changes: 1 addition & 1 deletion Source/Core/Common/ArmEmitter.h
Expand Up @@ -689,7 +689,7 @@ class ARMCodeBlock : public CodeBlock<ARMXEmitter>
void PoisonMemory() override
{
u32* ptr = (u32*)region;
u32* maxptr = (u32*)region + region_size;
u32* maxptr = (u32*)(region + region_size);
// If our memory isn't a multiple of u32 then this won't write the last remaining bytes with anything
// Less than optimal, but there would be nothing we could do but throw a runtime warning anyway.
// ARM: 0x01200070 = BKPT 0
Expand Down
4 changes: 2 additions & 2 deletions Source/Core/Core/PowerPC/JitArm64/JitArm64_BackPatch.cpp
Expand Up @@ -137,7 +137,7 @@ u32 JitArm64::EmitBackpatchRoutine(ARM64XEmitter* emit, u32 flags, bool fastmem,
ARM64FloatEmitter float_emit(emit);
if (flags & BackPatchInfo::FLAG_SIZE_F32)
{
float_emit.FCVT(32, 64, Q0, RS);
float_emit.FCVT(32, 64, D0, RS);
float_emit.REV32(8, D0, D0);
trouble_offset = (emit->GetCodePtr() - code_base) / 4;
float_emit.STR(32, INDEX_UNSIGNED, D0, addr, 0);
Expand Down Expand Up @@ -215,7 +215,7 @@ u32 JitArm64::EmitBackpatchRoutine(ARM64XEmitter* emit, u32 flags, bool fastmem,
ARM64FloatEmitter float_emit(emit);
if (flags & BackPatchInfo::FLAG_SIZE_F32)
{
float_emit.FCVT(32, 64, Q0, RS);
float_emit.FCVT(32, 64, D0, RS);
float_emit.UMOV(32, W0, Q0, 0);
emit->MOVI2R(X30, (u64)&PowerPC::Write_U32);
emit->BLR(X30);
Expand Down
Expand Up @@ -390,7 +390,7 @@ void JitArm64::stfXX(UGeckoInstruction inst)
}
else if (accessSize == 32)
{
m_float_emit.FCVT(32, 64, Q0, V0);
m_float_emit.FCVT(32, 64, D0, EncodeRegToDouble(V0));
m_float_emit.REV32(8, D0, D0);
m_float_emit.STR(32, INDEX_UNSIGNED, D0, X1, 0);
}
Expand Down
4 changes: 2 additions & 2 deletions Source/Core/Core/PowerPC/JitArm64/JitArm64_RegCache.cpp
Expand Up @@ -46,14 +46,14 @@ u32 Arm64RegCache::GetUnlockedRegisterCount()
void Arm64RegCache::LockRegister(ARM64Reg host_reg)
{
auto reg = std::find(m_host_registers.begin(), m_host_registers.end(), host_reg);
_assert_msg_(DYNA_REC, reg == m_host_registers.end(), "Don't try locking a register that isn't in the cache");
_assert_msg_(DYNA_REC, reg != m_host_registers.end(), "Don't try locking a register that isn't in the cache. Reg %d", host_reg);
reg->Lock();
}

void Arm64RegCache::UnlockRegister(ARM64Reg host_reg)
{
auto reg = std::find(m_host_registers.begin(), m_host_registers.end(), host_reg);
_assert_msg_(DYNA_REC, reg == m_host_registers.end(), "Don't try unlocking a register that isn't in the cache");
_assert_msg_(DYNA_REC, reg != m_host_registers.end(), "Don't try unlocking a register that isn't in the cache. Reg %d", host_reg);
reg->Unlock();
}

Expand Down
48 changes: 24 additions & 24 deletions Source/Core/Core/PowerPC/JitArm64/JitAsm.cpp
Expand Up @@ -128,8 +128,8 @@ void JitArm64AsmRoutineManager::GenerateCommon()

MOVI2R(addr_reg, (u64)&m_dequantizeTableS);
ADD(scale_reg, addr_reg, scale_reg, ArithOption(scale_reg, ST_LSL, 3));
float_emit.LD1R(32, D1, scale_reg);
float_emit.FMUL(32, D0, D0, D1);
float_emit.LDR(32, INDEX_UNSIGNED, D1, scale_reg, 0);
float_emit.FMUL(32, D0, D0, D1, 0);
RET(X30);
}
const u8* loadPairedS8Two = GetCodePtr();
Expand All @@ -142,8 +142,8 @@ void JitArm64AsmRoutineManager::GenerateCommon()

MOVI2R(addr_reg, (u64)&m_dequantizeTableS);
ADD(scale_reg, addr_reg, scale_reg, ArithOption(scale_reg, ST_LSL, 3));
float_emit.LD1R(32, D1, scale_reg);
float_emit.FMUL(32, D0, D0, D1);
float_emit.LDR(32, INDEX_UNSIGNED, D1, scale_reg, 0);
float_emit.FMUL(32, D0, D0, D1, 0);
RET(X30);
}
const u8* loadPairedU16Two = GetCodePtr();
Expand All @@ -156,8 +156,8 @@ void JitArm64AsmRoutineManager::GenerateCommon()

MOVI2R(addr_reg, (u64)&m_dequantizeTableS);
ADD(scale_reg, addr_reg, scale_reg, ArithOption(scale_reg, ST_LSL, 3));
float_emit.LD1R(32, D1, scale_reg);
float_emit.FMUL(32, D0, D0, D1);
float_emit.LDR(32, INDEX_UNSIGNED, D1, scale_reg, 0);
float_emit.FMUL(32, D0, D0, D1, 0);
RET(X30);
}
const u8* loadPairedS16Two = GetCodePtr();
Expand All @@ -170,8 +170,8 @@ void JitArm64AsmRoutineManager::GenerateCommon()

MOVI2R(addr_reg, (u64)&m_dequantizeTableS);
ADD(scale_reg, addr_reg, scale_reg, ArithOption(scale_reg, ST_LSL, 3));
float_emit.LD1R(32, D1, scale_reg);
float_emit.FMUL(32, D0, D0, D1);
float_emit.LDR(32, INDEX_UNSIGNED, D1, scale_reg, 0);
float_emit.FMUL(32, D0, D0, D1, 0);
RET(X30);
}

Expand All @@ -192,8 +192,8 @@ void JitArm64AsmRoutineManager::GenerateCommon()

MOVI2R(addr_reg, (u64)&m_dequantizeTableS);
ADD(scale_reg, addr_reg, scale_reg, ArithOption(scale_reg, ST_LSL, 3));
float_emit.LD1R(32, D1, scale_reg);
float_emit.FMUL(32, D0, D0, D1);
float_emit.LDR(32, INDEX_UNSIGNED, D1, scale_reg, 0);
float_emit.FMUL(32, D0, D0, D1, 0);
RET(X30);
}
const u8* loadPairedS8One = GetCodePtr();
Expand All @@ -206,8 +206,8 @@ void JitArm64AsmRoutineManager::GenerateCommon()

MOVI2R(addr_reg, (u64)&m_dequantizeTableS);
ADD(scale_reg, addr_reg, scale_reg, ArithOption(scale_reg, ST_LSL, 3));
float_emit.LD1R(32, D1, scale_reg);
float_emit.FMUL(32, D0, D0, D1);
float_emit.LDR(32, INDEX_UNSIGNED, D1, scale_reg, 0);
float_emit.FMUL(32, D0, D0, D1, 0);
RET(X30);
}
const u8* loadPairedU16One = GetCodePtr();
Expand All @@ -220,8 +220,8 @@ void JitArm64AsmRoutineManager::GenerateCommon()

MOVI2R(addr_reg, (u64)&m_dequantizeTableS);
ADD(scale_reg, addr_reg, scale_reg, ArithOption(scale_reg, ST_LSL, 3));
float_emit.LD1R(32, D1, scale_reg);
float_emit.FMUL(32, D0, D0, D1);
float_emit.LDR(32, INDEX_UNSIGNED, D1, scale_reg, 0);
float_emit.FMUL(32, D0, D0, D1, 0);
RET(X30);
}
const u8* loadPairedS16One = GetCodePtr();
Expand All @@ -234,8 +234,8 @@ void JitArm64AsmRoutineManager::GenerateCommon()

MOVI2R(addr_reg, (u64)&m_dequantizeTableS);
ADD(scale_reg, addr_reg, scale_reg, ArithOption(scale_reg, ST_LSL, 3));
float_emit.LD1R(32, D1, scale_reg);
float_emit.FMUL(32, D0, D0, D1);
float_emit.LDR(32, INDEX_UNSIGNED, D1, scale_reg, 0);
float_emit.FMUL(32, D0, D0, D1, 0);
RET(X30);
}

Expand Down Expand Up @@ -295,8 +295,8 @@ void JitArm64AsmRoutineManager::GenerateCommon()

MOVI2R(X2, (u64)&m_quantizeTableS);
ADD(scale_reg, X2, scale_reg, ArithOption(scale_reg, ST_LSL, 3));
float_emit.LD1R(32, D1, scale_reg);
float_emit.FMUL(32, D0, D0, D1);
float_emit.LDR(32, INDEX_UNSIGNED, D1, scale_reg, 0);
float_emit.FMUL(32, D0, D0, D1, 0);
float_emit.FCVTZU(32, D0, D0);
float_emit.XTN(16, D0, D0);
float_emit.XTN(8, D0, D0);
Expand Down Expand Up @@ -326,8 +326,8 @@ void JitArm64AsmRoutineManager::GenerateCommon()

MOVI2R(X2, (u64)&m_quantizeTableS);
ADD(scale_reg, X2, scale_reg, ArithOption(scale_reg, ST_LSL, 3));
float_emit.LD1R(32, D1, scale_reg);
float_emit.FMUL(32, D0, D0, D1);
float_emit.LDR(32, INDEX_UNSIGNED, D1, scale_reg, 0);
float_emit.FMUL(32, D0, D0, D1, 0);
float_emit.FCVTZS(32, D0, D0);
float_emit.XTN(16, D0, D0);
float_emit.XTN(8, D0, D0);
Expand Down Expand Up @@ -358,8 +358,8 @@ void JitArm64AsmRoutineManager::GenerateCommon()

MOVI2R(X2, (u64)&m_quantizeTableS);
ADD(scale_reg, X2, scale_reg, ArithOption(scale_reg, ST_LSL, 3));
float_emit.LD1R(32, D1, scale_reg);
float_emit.FMUL(32, D0, D0, D1);
float_emit.LDR(32, INDEX_UNSIGNED, D1, scale_reg, 0);
float_emit.FMUL(32, D0, D0, D1, 0);
float_emit.FCVTZU(32, D0, D0);
float_emit.XTN(16, D0, D0);
float_emit.REV16(8, D0, D0);
Expand Down Expand Up @@ -388,8 +388,8 @@ void JitArm64AsmRoutineManager::GenerateCommon()

MOVI2R(X2, (u64)&m_quantizeTableS);
ADD(scale_reg, X2, scale_reg, ArithOption(scale_reg, ST_LSL, 3));
float_emit.LD1R(32, D1, scale_reg);
float_emit.FMUL(32, D0, D0, D1);
float_emit.LDR(32, INDEX_UNSIGNED, D1, scale_reg, 0);
float_emit.FMUL(32, D0, D0, D1, 0);
float_emit.FCVTZS(32, D0, D0);
float_emit.XTN(16, D0, D0);
float_emit.REV16(8, D0, D0);
Expand Down
8 changes: 8 additions & 0 deletions Source/Core/DolphinWX/MainAndroid.cpp
Expand Up @@ -105,6 +105,12 @@ void Host_SetWiiMoteConnectionState(int _State) {}

void Host_ShowVideoConfig(void*, const std::string&, const std::string&) {}

static bool MsgAlert(const char* caption, const char* text, bool /*yes_no*/, int /*Style*/)
{
__android_log_print(ANDROID_LOG_INFO, DOLPHIN_TAG, "%s:%s", caption, text);
return false;
}

#define DVD_BANNER_WIDTH 96
#define DVD_BANNER_HEIGHT 32
std::vector<std::string> m_volume_names;
Expand Down Expand Up @@ -344,6 +350,8 @@ JNIEXPORT void JNICALL Java_org_dolphinemu_dolphinemu_NativeLibrary_Run(JNIEnv *
OSD::AddCallback(OSD::OSD_INIT, ButtonManager::Init);
OSD::AddCallback(OSD::OSD_SHUTDOWN, ButtonManager::Shutdown);

RegisterMsgAlertHandler(&MsgAlert);

UICommon::Init();

// No use running the loop when booting fails
Expand Down
4 changes: 2 additions & 2 deletions Source/Core/VideoBackends/Software/SWVertexLoader.cpp
Expand Up @@ -174,9 +174,9 @@ void SWVertexLoader::LoadVertex()
// convert the vertex from the gc format to the videocommon (hardware optimized) format
u8* old = g_video_buffer_read_ptr;
int converted_vertices = m_CurrentLoader->RunVertices(
m_primitiveType, 1,
DataReader(g_video_buffer_read_ptr, nullptr), // src
DataReader(m_LoadedVertices.data(), m_LoadedVertices.data() + m_LoadedVertices.size()) // dst
DataReader(m_LoadedVertices.data(), m_LoadedVertices.data() + m_LoadedVertices.size()), // dst
1, m_primitiveType
);
g_video_buffer_read_ptr = old + m_CurrentLoader->m_VertexSize;

Expand Down
9 changes: 4 additions & 5 deletions Source/Core/VideoCommon/CMakeLists.txt
Expand Up @@ -45,12 +45,11 @@ set(SRCS BoundingBox.cpp
set(LIBS core png)

if(_M_X86)
set(SRCS ${SRCS} TextureDecoder_x64.cpp VertexLoaderX64.cpp)
set(SRCS ${SRCS} TextureDecoder_x64.cpp VertexLoaderX64.cpp)
elseif(_M_ARM_64)
set(SRCS ${SRCS} VertexLoaderARM64.cpp TextureDecoder_Generic.cpp)
else()
set(SRCS ${SRCS} TextureDecoder_Generic.cpp)
endif()
if(NOT ${CL} STREQUAL CL-NOTFOUND)
list(APPEND LIBS ${CL})
set(SRCS ${SRCS} TextureDecoder_Generic.cpp)
endif()

if(LIBAV_FOUND OR WIN32)
Expand Down
2 changes: 1 addition & 1 deletion Source/Core/VideoCommon/VertexLoader.cpp
Expand Up @@ -316,7 +316,7 @@ void VertexLoader::WriteCall(TPipelineFunction func)
m_PipelineStages[m_numPipelineStages++] = func;
}

int VertexLoader::RunVertices(int primitive, int count, DataReader src, DataReader dst)
int VertexLoader::RunVertices(DataReader src, DataReader dst, int count, int primitive)
{
g_vertex_manager_write_ptr = dst.GetPointer();
g_video_buffer_read_ptr = src.GetPointer();
Expand Down
2 changes: 1 addition & 1 deletion Source/Core/VideoCommon/VertexLoader.h
Expand Up @@ -32,7 +32,7 @@ class VertexLoader : public VertexLoaderBase
public:
VertexLoader(const TVtxDesc &vtx_desc, const VAT &vtx_attr);

int RunVertices(int primitive, int count, DataReader src, DataReader dst) override;
int RunVertices(DataReader src, DataReader dst, int count, int primitive) override;
std::string GetName() const override { return "OldLoader"; }
bool IsInitialized() override { return true; } // This vertex loader supports all formats

Expand Down