-
Notifications
You must be signed in to change notification settings - Fork 2.6k
/
PowerPC.h
445 lines (365 loc) · 12.7 KB
/
PowerPC.h
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
// Copyright 2008 Dolphin Emulator Project
// Licensed under GPLv2+
// Refer to the license.txt file included.
#pragma once
#include <array>
#include <cstddef>
#include <tuple>
#include <vector>
#include "Common/CommonTypes.h"
#include "Core/Debugger/PPCDebugInterface.h"
#include "Core/PowerPC/BreakPoints.h"
#include "Core/PowerPC/Gekko.h"
#include "Core/PowerPC/PPCCache.h"
class CPUCoreBase;
class PointerWrap;
namespace PowerPC
{
// The gaps in the CPUCore numbering are from cores that only existed in the past.
// We avoid re-numbering cores so that settings will be compatible across versions.
enum CPUCore
{
CORE_INTERPRETER = 0,
CORE_JIT64 = 1,
CORE_JITARM64 = 4,
CORE_CACHEDINTERPRETER = 5,
};
enum class CoreMode
{
Interpreter,
JIT,
};
// TLB cache
constexpr size_t TLB_SIZE = 128;
constexpr size_t NUM_TLBS = 2;
constexpr size_t TLB_WAYS = 2;
struct TLBEntry
{
static constexpr u32 INVALID_TAG = 0xffffffff;
u32 tag[TLB_WAYS] = {INVALID_TAG, INVALID_TAG};
u32 paddr[TLB_WAYS] = {};
u32 pte[TLB_WAYS] = {};
u8 recent = 0;
};
// This contains the entire state of the emulated PowerPC "Gekko" CPU.
struct PowerPCState
{
u32 gpr[32]; // General purpose registers. r1 = stack pointer.
u32 pc; // program counter
u32 npc;
// Optimized CR implementation. Instead of storing CR in its PowerPC format
// (4 bit value, SO/EQ/LT/GT), we store instead a 64 bit value for each of
// the 8 CR register parts. This 64 bit value follows this format:
// - SO iff. bit 61 is set
// - EQ iff. lower 32 bits == 0
// - GT iff. (s64)cr_val > 0
// - LT iff. bit 62 is set
//
// This has the interesting property that sign-extending the result of an
// operation from 32 to 64 bits results in a 64 bit value that works as a
// CR value. Checking each part of CR is also fast, as it is equivalent to
// testing one bit or the low 32 bit part of a register. And CR can still
// be manipulated bit by bit fairly easily.
u64 cr_val[8];
UReg_MSR msr; // machine state register
UReg_FPSCR fpscr; // floating point flags/status bits
// Exception management.
u32 Exceptions;
// Downcount for determining when we need to do timing
// This isn't quite the right location for it, but it is here to accelerate the ARM JIT
// This variable should be inside of the CoreTiming namespace if we wanted to be correct.
int downcount;
// XER, reformatted into byte fields for easier access.
u8 xer_ca;
u8 xer_so_ov; // format: (SO << 1) | OV
// The Broadway CPU implements bits 16-23 of the XER register... even though it doesn't support
// lscbx
u16 xer_stringctrl;
// gather pipe pointer for JIT access
u8* gather_pipe_ptr;
u8* gather_pipe_base_ptr;
#if _M_X86_64
// This member exists for the purpose of an assertion in x86 JitBase.cpp
// that its offset <= 0x100. To minimize code size on x86, we want as much
// useful stuff in the one-byte offset range as possible - which is why ps
// is sitting down here. It currently doesn't make a difference on other
// supported architectures.
std::tuple<> above_fits_in_first_0x100;
#endif
// The paired singles are strange : PS0 is stored in the full 64 bits of each FPR
// but ps calculations are only done in 32-bit precision, and PS1 is only 32 bits.
// Since we want to use SIMD, SSE2 is the only viable alternative - 2x double.
alignas(16) u64 ps[32][2];
u32 sr[16]; // Segment registers.
// special purpose registers - controls quantizers, DMA, and lots of other misc extensions.
// also for power management, but we don't care about that.
u32 spr[1024];
// Storage for the stack pointer of the BLR optimization.
u8* stored_stack_pointer;
std::array<std::array<TLBEntry, TLB_SIZE / TLB_WAYS>, NUM_TLBS> tlb;
u32 pagetable_base;
u32 pagetable_hashmask;
InstructionCache iCache;
};
#if _M_X86_64
static_assert(offsetof(PowerPC::PowerPCState, above_fits_in_first_0x100) <= 0x100,
"top of PowerPCState too big");
#endif
extern PowerPCState ppcState;
extern BreakPoints breakpoints;
extern MemChecks memchecks;
extern PPCDebugInterface debug_interface;
const std::vector<CPUCore>& AvailableCPUCores();
CPUCore DefaultCPUCore();
void Init(int cpu_core);
void Reset();
void Shutdown();
void DoState(PointerWrap& p);
void ScheduleInvalidateCacheThreadSafe(u32 address);
CoreMode GetMode();
// [NOT THREADSAFE] CPU Thread or CPU::PauseAndLock or Core::State::Uninitialized
void SetMode(CoreMode _coreType);
const char* GetCPUName();
// Set the current CPU Core to the given implementation until removed.
// Remove the current injected CPU Core by passing nullptr.
// While an external CPUCoreBase is injected, GetMode() will return CoreMode::Interpreter.
// Init() will be called when added and Shutdown() when removed.
// [Threadsafety: Same as SetMode(), except it cannot be called from inside the CPU
// run loop on the CPU Thread - it doesn't make sense for a CPU to remove itself
// while it is in State::Running]
void InjectExternalCPUCore(CPUCoreBase* core);
// Stepping requires the CPU Execution lock (CPU::PauseAndLock or CPU Thread)
// It's not threadsafe otherwise.
void SingleStep();
void CheckExceptions();
void CheckExternalExceptions();
void CheckBreakPoints();
void RunLoop();
u32 CompactCR();
void ExpandCR(u32 cr);
void UpdatePerformanceMonitor(u32 cycles, u32 num_load_stores, u32 num_fp_inst);
// Easy register access macros.
#define HID0 ((UReg_HID0&)PowerPC::ppcState.spr[SPR_HID0])
#define HID2 ((UReg_HID2&)PowerPC::ppcState.spr[SPR_HID2])
#define HID4 ((UReg_HID4&)PowerPC::ppcState.spr[SPR_HID4])
#define DMAU (*(UReg_DMAU*)&PowerPC::ppcState.spr[SPR_DMAU])
#define DMAL (*(UReg_DMAL*)&PowerPC::ppcState.spr[SPR_DMAL])
#define MMCR0 ((UReg_MMCR0&)PowerPC::ppcState.spr[SPR_MMCR0])
#define MMCR1 ((UReg_MMCR1&)PowerPC::ppcState.spr[SPR_MMCR1])
#define PC PowerPC::ppcState.pc
#define NPC PowerPC::ppcState.npc
#define FPSCR PowerPC::ppcState.fpscr
#define MSR PowerPC::ppcState.msr
#define GPR(n) PowerPC::ppcState.gpr[n]
#define rGPR PowerPC::ppcState.gpr
#define rSPR(i) PowerPC::ppcState.spr[i]
#define LR PowerPC::ppcState.spr[SPR_LR]
#define CTR PowerPC::ppcState.spr[SPR_CTR]
#define rDEC PowerPC::ppcState.spr[SPR_DEC]
#define SRR0 PowerPC::ppcState.spr[SPR_SRR0]
#define SRR1 PowerPC::ppcState.spr[SPR_SRR1]
#define SPRG0 PowerPC::ppcState.spr[SPR_SPRG0]
#define SPRG1 PowerPC::ppcState.spr[SPR_SPRG1]
#define SPRG2 PowerPC::ppcState.spr[SPR_SPRG2]
#define SPRG3 PowerPC::ppcState.spr[SPR_SPRG3]
#define GQR(x) PowerPC::ppcState.spr[SPR_GQR0 + x]
#define TL PowerPC::ppcState.spr[SPR_TL]
#define TU PowerPC::ppcState.spr[SPR_TU]
#define rPS0(i) (*(double*)(&PowerPC::ppcState.ps[i][0]))
#define rPS1(i) (*(double*)(&PowerPC::ppcState.ps[i][1]))
#define riPS0(i) (*(u64*)(&PowerPC::ppcState.ps[i][0]))
#define riPS1(i) (*(u64*)(&PowerPC::ppcState.ps[i][1]))
// Routines for debugger UI, cheats, etc. to access emulated memory from the
// perspective of the CPU. Not for use by core emulation routines.
// Use "Host_" prefix.
u8 HostRead_U8(u32 address);
u16 HostRead_U16(u32 address);
u32 HostRead_U32(u32 address);
u64 HostRead_U64(u32 address);
float HostRead_F32(u32 address);
double HostRead_F64(u32 address);
u32 HostRead_Instruction(u32 address);
void HostWrite_U8(u8 var, u32 address);
void HostWrite_U16(u16 var, u32 address);
void HostWrite_U32(u32 var, u32 address);
void HostWrite_U64(u64 var, u32 address);
void HostWrite_F32(float var, u32 address);
void HostWrite_F64(double var, u32 address);
// Returns whether a read or write to the given address will resolve to a RAM
// access given the current CPU state.
bool HostIsRAMAddress(u32 address);
// Same as HostIsRAMAddress, but uses IBAT instead of DBAT.
bool HostIsInstructionRAMAddress(u32 address);
std::string HostGetString(u32 em_address, size_t size = 0);
// Routines for the CPU core to access memory.
// Used by interpreter to read instructions, uses iCache
u32 Read_Opcode(u32 address);
struct TryReadInstResult
{
bool valid;
bool from_bat;
u32 hex;
u32 physical_address;
};
TryReadInstResult TryReadInstruction(u32 address);
u8 Read_U8(u32 address);
u16 Read_U16(u32 address);
u32 Read_U32(u32 address);
u64 Read_U64(u32 address);
// Useful helper functions, used by ARM JIT
float Read_F32(u32 address);
double Read_F64(u32 address);
// used by JIT. Return zero-extended 32bit values
u32 Read_U8_ZX(u32 address);
u32 Read_U16_ZX(u32 address);
void Write_U8(u8 var, u32 address);
void Write_U16(u16 var, u32 address);
void Write_U32(u32 var, u32 address);
void Write_U64(u64 var, u32 address);
void Write_U16_Swap(u16 var, u32 address);
void Write_U32_Swap(u32 var, u32 address);
void Write_U64_Swap(u64 var, u32 address);
// Useful helper functions, used by ARM JIT
void Write_F64(double var, u32 address);
void DMA_LCToMemory(u32 memAddr, u32 cacheAddr, u32 numBlocks);
void DMA_MemoryToLC(u32 cacheAddr, u32 memAddr, u32 numBlocks);
void ClearCacheLine(u32 address); // Zeroes 32 bytes; address should be 32-byte-aligned
// TLB functions
void SDRUpdated();
void InvalidateTLBEntry(u32 address);
void DBATUpdated();
void IBATUpdated();
// Result changes based on the BAT registers and MSR.DR. Returns whether
// it's safe to optimize a read or write to this address to an unguarded
// memory access. Does not consider page tables.
bool IsOptimizableRAMAddress(u32 address);
u32 IsOptimizableMMIOAccess(u32 address, u32 accessSize);
bool IsOptimizableGatherPipeWrite(u32 address);
struct TranslateResult
{
bool valid;
bool from_bat;
u32 address;
};
TranslateResult JitCache_TranslateAddress(u32 address);
constexpr int BAT_INDEX_SHIFT = 17;
constexpr u32 BAT_PAGE_SIZE = 1 << BAT_INDEX_SHIFT;
constexpr u32 BAT_MAPPED_BIT = 0x1;
constexpr u32 BAT_PHYSICAL_BIT = 0x2;
constexpr u32 BAT_RESULT_MASK = UINT32_C(~0x3);
using BatTable = std::array<u32, 1 << (32 - BAT_INDEX_SHIFT)>; // 128 KB
extern BatTable ibat_table;
extern BatTable dbat_table;
inline bool TranslateBatAddess(const BatTable& bat_table, u32* address)
{
u32 bat_result = bat_table[*address >> BAT_INDEX_SHIFT];
if ((bat_result & BAT_MAPPED_BIT) == 0)
return false;
*address = (bat_result & BAT_RESULT_MASK) | (*address & (BAT_PAGE_SIZE - 1));
return true;
}
enum CRBits
{
CR_SO = 1,
CR_EQ = 2,
CR_GT = 4,
CR_LT = 8,
CR_SO_BIT = 0,
CR_EQ_BIT = 1,
CR_GT_BIT = 2,
CR_LT_BIT = 3,
};
// Convert between PPC and internal representation of CR.
inline u64 PPCCRToInternal(u8 value)
{
u64 cr_val = 0x100000000;
cr_val |= (u64) !!(value & CR_SO) << 61;
cr_val |= (u64) !(value & CR_EQ);
cr_val |= (u64) !(value & CR_GT) << 63;
cr_val |= (u64) !!(value & CR_LT) << 62;
return cr_val;
}
// convert flags into 64-bit CR values with a lookup table
extern const std::array<u64, 16> m_crTable;
// Warning: these CR operations are fairly slow since they need to convert from
// PowerPC format (4 bit) to our internal 64 bit format. See the definition of
// ppcState.cr_val for more explanations.
inline void SetCRField(int cr_field, int value)
{
PowerPC::ppcState.cr_val[cr_field] = m_crTable[value];
}
inline u32 GetCRField(int cr_field)
{
u64 cr_val = PowerPC::ppcState.cr_val[cr_field];
u32 ppc_cr = 0;
// SO
ppc_cr |= !!(cr_val & (1ull << 61));
// EQ
ppc_cr |= ((cr_val & 0xFFFFFFFF) == 0) << 1;
// GT
ppc_cr |= ((s64)cr_val > 0) << 2;
// LT
ppc_cr |= !!(cr_val & (1ull << 62)) << 3;
return ppc_cr;
}
inline u32 GetCRBit(int bit)
{
return (GetCRField(bit >> 2) >> (3 - (bit & 3))) & 1;
}
inline void SetCRBit(int bit, int value)
{
if (value & 1)
SetCRField(bit >> 2, GetCRField(bit >> 2) | (0x8 >> (bit & 3)));
else
SetCRField(bit >> 2, GetCRField(bit >> 2) & ~(0x8 >> (bit & 3)));
}
// SetCR and GetCR are fairly slow. Should be avoided if possible.
inline void SetCR(u32 new_cr)
{
PowerPC::ExpandCR(new_cr);
}
inline u32 GetCR()
{
return PowerPC::CompactCR();
}
inline void SetCarry(int ca)
{
PowerPC::ppcState.xer_ca = ca;
}
inline int GetCarry()
{
return PowerPC::ppcState.xer_ca;
}
inline UReg_XER GetXER()
{
u32 xer = 0;
xer |= PowerPC::ppcState.xer_stringctrl;
xer |= PowerPC::ppcState.xer_ca << XER_CA_SHIFT;
xer |= PowerPC::ppcState.xer_so_ov << XER_OV_SHIFT;
return xer;
}
inline void SetXER(UReg_XER new_xer)
{
PowerPC::ppcState.xer_stringctrl = new_xer.BYTE_COUNT + (new_xer.BYTE_CMP << 8);
PowerPC::ppcState.xer_ca = new_xer.CA;
PowerPC::ppcState.xer_so_ov = (new_xer.SO << 1) + new_xer.OV;
}
inline u32 GetXER_SO()
{
return PowerPC::ppcState.xer_so_ov >> 1;
}
inline void SetXER_SO(bool value)
{
PowerPC::ppcState.xer_so_ov |= static_cast<u32>(value) << 1;
}
inline u32 GetXER_OV()
{
return PowerPC::ppcState.xer_so_ov & 1;
}
inline void SetXER_OV(bool value)
{
PowerPC::ppcState.xer_so_ov = (PowerPC::ppcState.xer_so_ov & 0xFE) | static_cast<u32>(value);
SetXER_SO(value);
}
void UpdateFPRF(double dvalue);
} // namespace PowerPC