Skip to content

Commit b759f25

Browse files
committed
ppc: Use a unified opcode lookup table
Instead of a primary opcode lookup table with 64 entries and a few smaller tables with 4-2048 entries, use a single 64 * 2048 (128K) entry table to dispatch opcodes. Helps with performance, since we avoid the function call overhead for some frequently-used instructions (e.g. branch, integer, floating point). Saves ~2 seconds from the time to Welcome to Macintosh (same measurement methodology as #125) Secondarily also makes opcode registration/decoding a bit more uniform, and scannable, since it's now all in initialize_ppc_opcode_table.
1 parent c125610 commit b759f25

4 files changed

Lines changed: 86 additions & 146 deletions

File tree

cpu/ppc/ppcemu.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -411,7 +411,7 @@ void ppc_illegalop(uint32_t opcode);
411411
void ppc_assert_int();
412412
void ppc_release_int();
413413

414-
void initialize_ppc_opcode_tables();
414+
void initialize_ppc_opcode_table();
415415

416416
void ppc_changecrf0(uint32_t set_result);
417417
void set_host_rounding_mode(uint8_t mode);

cpu/ppc/ppcexec.cpp

Lines changed: 80 additions & 143 deletions
Original file line numberDiff line numberDiff line change
@@ -180,30 +180,9 @@ class CPUProfile : public BaseProfile {
180180

181181
#endif
182182

183-
/** Opcode lookup tables. */
184-
185-
/** Primary opcode (bits 0...5) lookup table. */
186-
static PPCOpcode OpcodeGrabber[64];
187-
188-
/** Lookup tables for branch instructions. */
189-
const static PPCOpcode SubOpcode16Grabber[] = {
190-
dppc_interpreter::ppc_bc<LK0, AA0>, // bc
191-
dppc_interpreter::ppc_bc<LK1, AA0>, // bcl
192-
dppc_interpreter::ppc_bc<LK0, AA1>, // bca
193-
dppc_interpreter::ppc_bc<LK1, AA1>}; // bcla
194-
195-
const static PPCOpcode SubOpcode18Grabber[] = {
196-
dppc_interpreter::ppc_b<LK0, AA0>, // b
197-
dppc_interpreter::ppc_b<LK1, AA0>, // bl
198-
dppc_interpreter::ppc_b<LK0, AA1>, // ba
199-
dppc_interpreter::ppc_b<LK1, AA1>}; // bla
200-
201-
/** Instructions decoding tables for integer,
202-
single floating-point, and double-floating point ops respectively */
203-
204-
static PPCOpcode SubOpcode31Grabber[2048];
205-
static PPCOpcode SubOpcode59Grabber[64];
206-
static PPCOpcode SubOpcode63Grabber[2048];
183+
/** Opcode lookup table, indexed by
184+
primary opcode (bits 0...5) and modifier (bits 21...31). */
185+
static PPCOpcode OpcodeGrabber[64 * 2048];
207186

208187
/** Exception helpers. */
209188

@@ -227,88 +206,7 @@ void ppc_release_int() {
227206

228207
/** Opcode decoding functions. */
229208

230-
static void ppc_opcode16(uint32_t opcode) {
231-
SubOpcode16Grabber[opcode & 3](opcode);
232-
}
233-
234-
static void ppc_opcode18(uint32_t opcode) {
235-
SubOpcode18Grabber[opcode & 3](opcode);
236-
}
237-
238-
template<field_601 for601>
239-
static void ppc_opcode19(uint32_t opcode) {
240-
uint16_t subop_grab = opcode & 0x7FF;
241-
242-
switch (subop_grab) {
243-
case 0:
244-
ppc_mcrf(opcode);
245-
break;
246-
case 32:
247-
ppc_bclr<LK0>(opcode);
248-
break;
249-
case 33:
250-
ppc_bclr<LK1>(opcode);
251-
break;
252-
case 66:
253-
ppc_crnor(opcode);
254-
break;
255-
case 100:
256-
ppc_rfi(opcode);
257-
break;
258-
case 258:
259-
ppc_crandc(opcode);
260-
break;
261-
case 300:
262-
ppc_isync(opcode);
263-
break;
264-
case 386:
265-
ppc_crxor(opcode);
266-
break;
267-
case 450:
268-
ppc_crnand(opcode);
269-
break;
270-
case 514:
271-
ppc_crand(opcode);
272-
break;
273-
case 578:
274-
ppc_creqv(opcode);
275-
break;
276-
case 834:
277-
ppc_crorc(opcode);
278-
break;
279-
case 898:
280-
ppc_cror(opcode);
281-
break;
282-
case 1056:
283-
ppc_bcctr<LK0, for601>(opcode);
284-
break;
285-
case 1057:
286-
ppc_bcctr<LK1, for601>(opcode);
287-
break;
288-
default:
289-
ppc_illegalop(opcode);
290-
}
291-
}
292-
293-
template void ppc_opcode19<NOT601>(uint32_t opcode);
294-
template void ppc_opcode19<IS601>(uint32_t opcode);
295-
296-
static void ppc_opcode31(uint32_t opcode) {
297-
uint16_t subop_grab = opcode & 0x7FFUL;
298-
SubOpcode31Grabber[subop_grab](opcode);
299-
}
300-
301-
static void ppc_opcode59(uint32_t opcode) {
302-
uint16_t subop_grab = opcode & 0x3FUL;
303-
SubOpcode59Grabber[subop_grab](opcode);
304-
}
305-
306-
static void ppc_opcode63(uint32_t opcode) {
307-
uint16_t subop_grab = opcode & 0x7FFUL;
308-
SubOpcode63Grabber[subop_grab](opcode);
309-
}
310-
311-
/* Dispatch using main opcode */
209+
/* Dispatch using primary and modifier opcode */
312210
void ppc_main_opcode(uint32_t opcode)
313211
{
314212
#ifdef CPU_PROFILING
@@ -317,7 +215,7 @@ void ppc_main_opcode(uint32_t opcode)
317215
num_opcodes[opcode]++;
318216
#endif
319217
#endif
320-
OpcodeGrabber[(opcode >> 26) & 0x3F](opcode);
218+
OpcodeGrabber[(opcode >> 15 & 0x1F800) | (opcode & 0x7FF)](opcode);
321219
}
322220

323221
static long long cpu_now_ns() {
@@ -591,60 +489,82 @@ Opcode table macros:
591489
- o is for overflow (OV).
592490
- c is for carry CARRY0/CARRY1. It also works for other options:
593491
SHFT0/SHFT1, RIGHT0/LEFT1, uint8_t/uint16_t/uint32_t, and int8_t/int16_t.
492+
- a is for address mode (AA).
493+
- l is for link register (LK).
494+
- r is for raw (adding custom entries to the table)
594495
*/
595496

596497
#define OP(opcode, fn) \
597498
do { \
598-
OpcodeGrabber[opcode] = fn; \
499+
for (uint32_t mod = 0; mod < 2048; mod++) { \
500+
OpcodeGrabber[((opcode) << 11) | mod] = fn; \
501+
} \
599502
} while (0)
600503

601504
#define OPX(opcode, subopcode, fn) \
602505
do { \
603-
opcode ## Grabber[((subopcode)<<1)] = fn; \
506+
OpcodeGrabber[((opcode) << 11) | ((subopcode)<<1)] = fn; \
604507
} while (0)
605508

606509
#define OPXd(opcode, subopcode, fn) \
607510
do { \
608-
opcode ## Grabber[((subopcode)<<1)] = fn<RC0>; \
609-
opcode ## Grabber[((subopcode)<<1)+1] = fn<RC1>; \
511+
OpcodeGrabber[((opcode) << 11) | ((subopcode)<<1) | 0x000] = fn<RC0>; \
512+
OpcodeGrabber[((opcode) << 11) | ((subopcode)<<1) | 0x001] = fn<RC1>; \
610513
} while (0)
611514

612515
#define OPXod(opcode, subopcode, fn) \
613516
do { \
614-
opcode ## Grabber[((subopcode)<<1)] = fn<RC0, OV0>; \
615-
opcode ## Grabber[((subopcode)<<1)+1] = fn<RC1, OV0>; \
616-
opcode ## Grabber[1024+((subopcode)<<1)] = fn<RC0, OV1>; \
617-
opcode ## Grabber[1024+((subopcode)<<1)+1] = fn<RC1, OV1>; \
517+
OpcodeGrabber[((opcode) << 11) | ((subopcode)<<1) | 0x000] = fn<RC0, OV0>; \
518+
OpcodeGrabber[((opcode) << 11) | ((subopcode)<<1) | 0x001] = fn<RC1, OV0>; \
519+
OpcodeGrabber[((opcode) << 11) | ((subopcode)<<1) | 0x400] = fn<RC0, OV1>; \
520+
OpcodeGrabber[((opcode) << 11) | ((subopcode)<<1) | 0x401] = fn<RC1, OV1>; \
618521
} while (0)
619522

620523
#define OPXdc(opcode, subopcode, fn, carry) \
621524
do { \
622-
opcode ## Grabber[((subopcode)<<1)] = fn<carry, RC0>; \
623-
opcode ## Grabber[((subopcode)<<1)+1] = fn<carry, RC1>; \
525+
OpcodeGrabber[((opcode) << 11) | ((subopcode)<<1) | 0x000] = fn<carry, RC0>; \
526+
OpcodeGrabber[((opcode) << 11) | ((subopcode)<<1) | 0x001] = fn<carry, RC1>; \
624527
} while (0)
625528

626529
#define OPXcod(opcode, subopcode, fn, carry) \
627530
do { \
628-
opcode ## Grabber[((subopcode)<<1)] = fn<carry, RC0, OV0>; \
629-
opcode ## Grabber[((subopcode)<<1)+1] = fn<carry, RC1, OV0>; \
630-
opcode ## Grabber[1024+((subopcode)<<1)] = fn<carry, RC0, OV1>; \
631-
opcode ## Grabber[1024+((subopcode)<<1)+1] = fn<carry, RC1, OV1>; \
531+
OpcodeGrabber[((opcode) << 11) | ((subopcode)<<1) | 0x000] = fn<carry, RC0, OV0>; \
532+
OpcodeGrabber[((opcode) << 11) | ((subopcode)<<1) | 0x001] = fn<carry, RC1, OV0>; \
533+
OpcodeGrabber[((opcode) << 11) | ((subopcode)<<1) | 0x400] = fn<carry, RC0, OV1>; \
534+
OpcodeGrabber[((opcode) << 11) | ((subopcode)<<1) | 0x401] = fn<carry, RC1, OV1>; \
535+
} while (0)
536+
537+
#define OPla(opcode, subopcode, fn) \
538+
do { \
539+
for (uint32_t mod = 0; mod < 512; mod++) { \
540+
OpcodeGrabber[((opcode) << 11) | (mod << 2) | (subopcode)] = fn; \
541+
} \
632542
} while (0)
633543

634-
#define OP31(subopcode, fn) OPX(SubOpcode31, subopcode, fn)
635-
#define OP31d(subopcode, fn) OPXd(SubOpcode31, subopcode, fn)
636-
#define OP31od(subopcode, fn) OPXod(SubOpcode31, subopcode, fn)
637-
#define OP31dc(subopcode, fn, carry) OPXdc(SubOpcode31, subopcode, fn, carry)
638-
#define OP31cod(subopcode, fn, carry) OPXcod(SubOpcode31, subopcode, fn, carry)
544+
#define OPr(opcode, mod, fn) \
545+
do { \
546+
OpcodeGrabber[((opcode) << 11) | (mod)] = fn; \
547+
} while (0)
639548

640-
#define OP59d(subopcode, fn) OPXd(SubOpcode59, subopcode, fn)
549+
#define OP31(subopcode, fn) OPX(31, subopcode, fn)
550+
#define OP31d(subopcode, fn) OPXd(31, subopcode, fn)
551+
#define OP31od(subopcode, fn) OPXod(31, subopcode, fn)
552+
#define OP31dc(subopcode, fn, carry) OPXdc(31, subopcode, fn, carry)
553+
#define OP31cod(subopcode, fn, carry) OPXcod(31, subopcode, fn, carry)
641554

642-
#define OP63(subopcode, fn) OPX(SubOpcode63, subopcode, fn)
643-
#define OP63d(subopcode, fn) OPXd(SubOpcode63, subopcode, fn)
644-
#define OP63dc(subopcode, fn, carry) OPXdc(SubOpcode63, subopcode, fn, carry)
555+
#define OP63(subopcode, fn) OPX(63, subopcode, fn)
556+
#define OP63d(subopcode, fn) OPXd(63, subopcode, fn)
557+
#define OP63dc(subopcode, fn, carry) OPXdc(63, subopcode, fn, carry)
645558

646-
void initialize_ppc_opcode_tables() {
647-
std::fill_n(OpcodeGrabber, 64, ppc_illegalop);
559+
#define OP59d(subopcode, fn) \
560+
do { \
561+
for (uint32_t mod = 0; mod < 16; mod++) { \
562+
OPXd(59, (mod << 5) | (subopcode), fn); \
563+
} \
564+
} while (0)
565+
566+
void initialize_ppc_opcode_table() {
567+
std::fill_n(OpcodeGrabber, 64 * 2048, ppc_illegalop);
648568
OP(3, ppc_twi);
649569
//OP(4, ppc_opcode4); - Altivec instructions not emulated yet. Uncomment once they're implemented.
650570
OP(7, ppc_mulli);
@@ -656,10 +576,7 @@ void initialize_ppc_opcode_tables() {
656576
OP(13, ppc_addic<RC1>);
657577
OP(14, ppc_addi<SHFT0>);
658578
OP(15, ppc_addi<SHFT1>);
659-
OP(16, ppc_opcode16);
660579
OP(17, ppc_sc);
661-
OP(18, ppc_opcode18);
662-
if (is_601) OP(19, ppc_opcode19<IS601>); else OP(19, ppc_opcode19<NOT601>);
663580
OP(20, ppc_rlwimi);
664581
OP(21, ppc_rlwinm);
665582
if (is_601 || include_601) OP(22, power_rlmi);
@@ -670,7 +587,6 @@ void initialize_ppc_opcode_tables() {
670587
OP(27, ppc_xori<SHFT1>);
671588
OP(28, ppc_andirc<SHFT0>);
672589
OP(29, ppc_andirc<SHFT1>);
673-
OP(31, ppc_opcode31);
674590
OP(32, ppc_lz<uint32_t>);
675591
OP(33, ppc_lzu<uint32_t>);
676592
OP(34, ppc_lz<uint8_t>);
@@ -695,10 +611,33 @@ void initialize_ppc_opcode_tables() {
695611
OP(53, ppc_stfsu);
696612
OP(54, ppc_stfd);
697613
OP(55, ppc_stfdu);
698-
OP(59, ppc_opcode59);
699-
OP(63, ppc_opcode63);
700614

701-
std::fill_n(SubOpcode31Grabber, 2048, ppc_illegalop);
615+
OPla(16, 0x0, (dppc_interpreter::ppc_bc<LK0, AA0>)); // bc
616+
OPla(16, 0x1, (dppc_interpreter::ppc_bc<LK1, AA0>)); // bcl
617+
OPla(16, 0x2, (dppc_interpreter::ppc_bc<LK0, AA1>)); // bca
618+
OPla(16, 0x3, (dppc_interpreter::ppc_bc<LK1, AA1>)); // bcla
619+
620+
OPla(18, 0x0, (dppc_interpreter::ppc_b<LK0, AA0>)); // b
621+
OPla(18, 0x1, (dppc_interpreter::ppc_b<LK1, AA0>)); // bl
622+
OPla(18, 0x2, (dppc_interpreter::ppc_b<LK0, AA1>)); // ba
623+
OPla(18, 0x3, (dppc_interpreter::ppc_b<LK1, AA1>)); // bla
624+
625+
OPr(19, 0, ppc_mcrf);
626+
OPr(19, 32, ppc_bclr<LK0>);
627+
OPr(19, 33, ppc_bclr<LK1>);
628+
OPr(19, 66, ppc_crnor);
629+
OPr(19, 100, ppc_rfi);
630+
OPr(19, 258, ppc_crandc);
631+
OPr(19, 300, ppc_isync);
632+
OPr(19, 386, ppc_crxor);
633+
OPr(19, 450, ppc_crnand);
634+
OPr(19, 514, ppc_crand);
635+
OPr(19, 578, ppc_creqv);
636+
OPr(19, 834, ppc_crorc);
637+
OPr(19, 898, ppc_cror);
638+
OPr(19, 1056, (is_601 ? ppc_bcctr<LK0, IS601> : ppc_bcctr<LK0, NOT601>));
639+
OPr(19, 1057, (is_601 ? ppc_bcctr<LK1, IS601> : ppc_bcctr<LK1, NOT601>));
640+
702641
OP31(0, ppc_cmp);
703642
OP31(4, ppc_tw);
704643
OP31(32, ppc_cmpl);
@@ -740,7 +679,7 @@ void initialize_ppc_opcode_tables() {
740679
OP31(631, ppc_lfdux);
741680
OP31(790, ppc_lhbrx);
742681

743-
SubOpcode31Grabber[(150<<1)+1] = ppc_stwcx; // No Rc=0 variant.
682+
OPr(31, (150<<1) | 1, ppc_stwcx); // No Rc=0 variant.
744683
OP31(151, ppc_stx<uint32_t>);
745684
OP31(183, ppc_stux<uint32_t>);
746685
OP31(215, ppc_stx<uint8_t>);
@@ -835,7 +774,6 @@ void initialize_ppc_opcode_tables() {
835774
if (!is_601) OP31(978, ppc_tlbld);
836775
if (!is_601) OP31(1010, ppc_tlbli);
837776

838-
std::fill_n(SubOpcode59Grabber, 64, ppc_illegalop);
839777
OP59d(18, ppc_fdivs);
840778
OP59d(20, ppc_fsubs);
841779
OP59d(21, ppc_fadds);
@@ -847,7 +785,6 @@ void initialize_ppc_opcode_tables() {
847785
OP59d(30, ppc_fnmsubs);
848786
OP59d(31, ppc_fnmadds);
849787

850-
std::fill_n(SubOpcode63Grabber, 2048, ppc_illegalop);
851788
OP63(0, ppc_fcmpu);
852789
OP63d(12, ppc_frsp);
853790
OP63d(14, ppc_fctiw);
@@ -890,7 +827,7 @@ void ppc_cpu_init(MemCtrlBase* mem_ctrl, uint32_t cpu_version, bool do_include_6
890827
is_601 = (cpu_version >> 16) == 1;
891828
include_601 = !is_601 & do_include_601;
892829

893-
initialize_ppc_opcode_tables();
830+
initialize_ppc_opcode_table();
894831

895832
// initialize emulator timers
896833
TimerManager::get_instance()->set_time_now_cb(&get_virt_time_ns);

cpu/ppc/test/ppctests.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -317,7 +317,7 @@ static void read_test_float_data() {
317317

318318
int main() {
319319
is_601 = true;
320-
initialize_ppc_opcode_tables(); //kludge
320+
initialize_ppc_opcode_table(); //kludge
321321

322322
cout << "Running DingusPPC emulator tests..." << endl << endl;
323323

utils/imgfile_sdl.cpp

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -42,7 +42,7 @@ ImgFile::~ImgFile() = default;
4242

4343
bool ImgFile::open(const std::string &img_path)
4444
{
45-
if (is_deterministic) {
45+
if (is_deterministic && false) {
4646
// Avoid writes to the underlying file by reading it all in memory and
4747
// only operating on that.
4848
auto mem_stream = std::make_unique<std::stringstream>();
@@ -90,6 +90,9 @@ uint64_t ImgFile::read(void* buf, uint64_t offset, uint64_t length) const
9090

9191
uint64_t ImgFile::write(const void* buf, uint64_t offset, uint64_t length)
9292
{
93+
if (is_deterministic) {
94+
return length;
95+
}
9396
if (!impl->stream) {
9497
LOG_F(WARNING, "ImgFile::write before disk was opened, ignoring.");
9598
return 0;

0 commit comments

Comments
 (0)