-
Notifications
You must be signed in to change notification settings - Fork 2.6k
/
JitAsm.cpp
280 lines (223 loc) · 8.67 KB
/
JitAsm.cpp
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
// Copyright 2008 Dolphin Emulator Project
// SPDX-License-Identifier: GPL-2.0-or-later
#include "Core/PowerPC/Jit64/JitAsm.h"
#include <climits>
#include "Common/CommonTypes.h"
#include "Common/EnumUtils.h"
#include "Common/JitRegister.h"
#include "Common/x64ABI.h"
#include "Common/x64Emitter.h"
#include "Core/Config/MainSettings.h"
#include "Core/CoreTiming.h"
#include "Core/HW/CPU.h"
#include "Core/HW/Memmap.h"
#include "Core/PowerPC/Jit64/Jit.h"
#include "Core/PowerPC/Jit64Common/Jit64PowerPCState.h"
#include "Core/PowerPC/PowerPC.h"
#include "Core/System.h"
using namespace Gen;
Jit64AsmRoutineManager::Jit64AsmRoutineManager(Jit64& jit) : CommonAsmRoutines(jit)
{
}
void Jit64AsmRoutineManager::Init()
{
m_const_pool.Init(AllocChildCodeSpace(4096), 4096);
Generate();
WriteProtect(true);
}
void Jit64AsmRoutineManager::Regenerate()
{
UnWriteProtect(false);
ResetCodePtr();
Generate();
WriteProtect(true);
}
// PLAN: no more block numbers - crazy opcodes just contain offset within
// dynarec buffer
// At this offset - 4, there is an int specifying the block number.
void Jit64AsmRoutineManager::Generate()
{
const bool enable_debugging = Config::IsDebuggingEnabled();
enter_code = AlignCode16();
// We need to own the beginning of RSP, so we do an extra stack adjustment
// for the shadow region before calls in this function. This call will
// waste a bit of space for a second shadow, but whatever.
ABI_PushRegistersAndAdjustStack(ABI_ALL_CALLEE_SAVED, 8, /*frame*/ 16);
auto& ppc_state = m_jit.m_ppc_state;
// Two statically allocated registers.
// MOV(64, R(RMEM), Imm64((u64)Memory::physical_base));
MOV(64, R(RPPCSTATE), Imm64((u64)&ppc_state + 0x80));
MOV(64, PPCSTATE(stored_stack_pointer), R(RSP));
// something that can't pass the BLR test
MOV(64, MDisp(RSP, 8), Imm32((u32)-1));
const u8* outerLoop = GetCodePtr();
ABI_PushRegistersAndAdjustStack({}, 0);
ABI_CallFunction(CoreTiming::GlobalAdvance);
ABI_PopRegistersAndAdjustStack({}, 0);
// When we've just entered the jit we need to update the membase
// GlobalAdvance also checks exceptions after which we need to
// update the membase so it makes sense to do this here.
MOV(64, R(RMEM), PPCSTATE(mem_ptr));
// skip the sync and compare first time
FixupBranch skipToRealDispatch = J(enable_debugging ? Jump::Near : Jump::Short);
dispatcher_mispredicted_blr = GetCodePtr();
AND(32, PPCSTATE(pc), Imm32(0xFFFFFFFC));
#if 0 // debug mispredicts
MOV(32, R(ABI_PARAM1), MDisp(RSP, 8)); // guessed_pc
ABI_PushRegistersAndAdjustStack(1 << RSCRATCH2, 0);
CALL(reinterpret_cast<void *>(&ReportMispredict));
ABI_PopRegistersAndAdjustStack(1 << RSCRATCH2, 0);
#endif
ResetStack(*this);
SUB(32, PPCSTATE(downcount), R(RSCRATCH2));
dispatcher = GetCodePtr();
// Expected result of SUB(32, PPCSTATE(downcount), Imm32(block_cycles)) is in RFLAGS.
// Branch if downcount is <= 0 (signed).
FixupBranch bail = J_CC(CC_LE, Jump::Near);
dispatcher_no_timing_check = GetCodePtr();
auto& system = m_jit.m_system;
FixupBranch dbg_exit;
if (enable_debugging)
{
MOV(64, R(RSCRATCH), ImmPtr(system.GetCPU().GetStatePtr()));
CMP(32, MatR(RSCRATCH), Imm32(Common::ToUnderlying(CPU::State::Running)));
dbg_exit = J_CC(CC_NE, Jump::Near);
}
SetJumpTarget(skipToRealDispatch);
dispatcher_no_check = GetCodePtr();
// The following is a translation of JitBaseBlockCache::Dispatch into assembly.
const bool assembly_dispatcher = true;
if (assembly_dispatcher)
{
if (m_jit.GetBlockCache()->GetEntryPoints())
{
MOV(32, R(RSCRATCH2), PPCSTATE(feature_flags));
SHL(64, R(RSCRATCH2), Imm8(32));
MOV(32, R(RSCRATCH_EXTRA), PPCSTATE(pc));
OR(64, R(RSCRATCH_EXTRA), R(RSCRATCH2));
u64 icache = reinterpret_cast<u64>(m_jit.GetBlockCache()->GetEntryPoints());
MOV(64, R(RSCRATCH2), Imm64(icache));
// The entry points map is indexed by ((feature_flags << 30) | (pc >> 2)).
// The map contains 8-byte pointers and that means we need to shift feature_flags
// left by 33 bits and pc left by 1 bit to get the correct offset in the map.
MOV(64, R(RSCRATCH), MComplex(RSCRATCH2, RSCRATCH_EXTRA, SCALE_2, 0));
}
else
{
// Fast block number lookup.
// ((PC >> 2) & mask) * sizeof(JitBlock*) = (PC & (mask << 2)) * 2
MOV(32, R(RSCRATCH), PPCSTATE(pc));
// Keep a copy for later.
MOV(32, R(RSCRATCH_EXTRA), R(RSCRATCH));
u64 icache = reinterpret_cast<u64>(m_jit.GetBlockCache()->GetFastBlockMapFallback());
AND(32, R(RSCRATCH), Imm32(JitBaseBlockCache::FAST_BLOCK_MAP_FALLBACK_MASK << 2));
if (icache <= INT_MAX)
{
MOV(64, R(RSCRATCH), MScaled(RSCRATCH, SCALE_2, static_cast<s32>(icache)));
}
else
{
MOV(64, R(RSCRATCH2), Imm64(icache));
MOV(64, R(RSCRATCH), MComplex(RSCRATCH2, RSCRATCH, SCALE_2, 0));
}
}
// Check if we found a block.
TEST(64, R(RSCRATCH), R(RSCRATCH));
FixupBranch not_found = J_CC(CC_Z);
FixupBranch state_mismatch;
if (!m_jit.GetBlockCache()->GetEntryPoints())
{
// Check block.feature_flags.
MOV(32, R(RSCRATCH2), PPCSTATE(feature_flags));
// Also check the block.effectiveAddress. RSCRATCH_EXTRA still has the PC.
SHL(64, R(RSCRATCH_EXTRA), Imm8(32));
OR(64, R(RSCRATCH2), R(RSCRATCH_EXTRA));
static_assert(offsetof(JitBlockData, feature_flags) + 4 ==
offsetof(JitBlockData, effectiveAddress));
CMP(64, R(RSCRATCH2),
MDisp(RSCRATCH, static_cast<s32>(offsetof(JitBlockData, feature_flags))));
state_mismatch = J_CC(CC_NE);
// Success; branch to the block we found.
JMPptr(MDisp(RSCRATCH, static_cast<s32>(offsetof(JitBlockData, normalEntry))));
}
else
{
// Success; branch to the block we found.
JMPptr(R(RSCRATCH));
}
SetJumpTarget(not_found);
if (!m_jit.GetBlockCache()->GetEntryPoints())
{
SetJumpTarget(state_mismatch);
}
// Failure, fallback to the C++ dispatcher for calling the JIT.
}
// There is no point in calling the dispatcher in the fast lookup table
// case, since the assembly dispatcher would already have found a block.
if (!assembly_dispatcher || !m_jit.GetBlockCache()->GetEntryPoints())
{
// Ok, no block, let's call the slow dispatcher
ABI_PushRegistersAndAdjustStack({}, 0);
MOV(64, R(ABI_PARAM1), Imm64(reinterpret_cast<u64>(&m_jit)));
ABI_CallFunction(JitBase::Dispatch);
ABI_PopRegistersAndAdjustStack({}, 0);
TEST(64, R(ABI_RETURN), R(ABI_RETURN));
FixupBranch no_block_available = J_CC(CC_Z);
// Jump to the block
JMPptr(R(ABI_RETURN));
SetJumpTarget(no_block_available);
}
// We reset the stack because Jit might clear the code cache.
// Also if we are in the middle of disabling BLR optimization on windows
// we need to reset the stack before _resetstkoflw() is called in Jit
// otherwise we will generate a second stack overflow exception during DoJit()
ResetStack(*this);
ABI_PushRegistersAndAdjustStack({}, 0);
MOV(64, R(ABI_PARAM1), Imm64(reinterpret_cast<u64>(&m_jit)));
MOV(32, R(ABI_PARAM2), PPCSTATE(pc));
ABI_CallFunction(JitTrampoline);
ABI_PopRegistersAndAdjustStack({}, 0);
// If jitting triggered an ISI exception, MSR.DR may have changed
MOV(64, R(RMEM), PPCSTATE(mem_ptr));
JMP(dispatcher_no_check, Jump::Near);
SetJumpTarget(bail);
do_timing = GetCodePtr();
// make sure npc contains the next pc (needed for exception checking in CoreTiming::Advance)
MOV(32, R(RSCRATCH), PPCSTATE(pc));
MOV(32, PPCSTATE(npc), R(RSCRATCH));
// Check the state pointer to see if we are exiting
// Gets checked on at the end of every slice
MOV(64, R(RSCRATCH), ImmPtr(system.GetCPU().GetStatePtr()));
CMP(32, MatR(RSCRATCH), Imm32(Common::ToUnderlying(CPU::State::Running)));
J_CC(CC_E, outerLoop);
// Landing pad for drec space
dispatcher_exit = GetCodePtr();
if (enable_debugging)
SetJumpTarget(dbg_exit);
// Reset the stack pointer, since the BLR optimization may have pushed things onto the stack
// without popping them.
ResetStack(*this);
ABI_PopRegistersAndAdjustStack(ABI_ALL_CALLEE_SAVED, 8, 16);
RET();
Common::JitRegister::Register(enter_code, GetCodePtr(), "JIT_Loop");
GenerateCommon();
}
void Jit64AsmRoutineManager::ResetStack(X64CodeBlock& emitter)
{
emitter.MOV(64, R(RSP), PPCSTATE(stored_stack_pointer));
}
void Jit64AsmRoutineManager::GenerateCommon()
{
frsqrte = AlignCode4();
GenFrsqrte();
fres = AlignCode4();
GenFres();
mfcr = AlignCode4();
GenMfcr();
cdts = AlignCode4();
GenConvertDoubleToSingle();
GenQuantizedLoads();
GenQuantizedSingleLoads();
GenQuantizedStores();
GenQuantizedSingleStores();
}