Skip to content

Commit

Permalink
Re-add the sc+blr optimization removed in r457:721d85b14981.
Browse files Browse the repository at this point in the history
Since we now pass the instruction word to the sc handler, the primary
reason for removing the optimization (inability to read the instruction
word) is no longer an issue.  The optimization is left optional since
it is still unsafe with respect to getting the address of the sc
instruction.

Fixes #13.
  • Loading branch information
Andrew Church committed Jan 11, 2019
1 parent e7f5160 commit 7f4abd8
Show file tree
Hide file tree
Showing 9 changed files with 269 additions and 13 deletions.
1 change: 1 addition & 0 deletions CHANGES
Expand Up @@ -5,6 +5,7 @@ Version 0.2
-----------
New features:
- Added binrec_enable_verify().
- Added the BINREC_OPT_G_PPC_SC_BLR optimization flag.

Changes:
- Moved binrec_setup_t state offset fields to an architecture-specific
Expand Down
1 change: 1 addition & 0 deletions include/binrec++.h
Expand Up @@ -74,6 +74,7 @@ namespace Optimize {
const unsigned int NO_FPSCR_STATE = BINREC_OPT_G_PPC_NO_FPSCR_STATE;
const unsigned int PAIRED_LWARX_STWCX = BINREC_OPT_G_PPC_PAIRED_LWARX_STWCX;
const unsigned int PS_STORE_DENORMALS = BINREC_OPT_G_PPC_PS_STORE_DENORMALS;
const unsigned int SC_BLR = BINREC_OPT_G_PPC_SC_BLR;
const unsigned int SINGLE_PREC_INPUTS = BINREC_OPT_G_PPC_SINGLE_PREC_INPUTS;
const unsigned int TRIM_CR_STORES = BINREC_OPT_G_PPC_TRIM_CR_STORES;
const unsigned int USE_SPLIT_FIELDS = BINREC_OPT_G_PPC_USE_SPLIT_FIELDS;
Expand Down
32 changes: 23 additions & 9 deletions include/binrec.h
Expand Up @@ -163,12 +163,13 @@ extern "C" {
* The value of the NIA field in the PSB is set as the SRR0 register would
* be set on a true PowerPC processor: to the address of the trap
* instruction for trap exceptions, and to the address of the instruction
* _following_ the sc instruction for system call exceptions. The
* translated code will return immediately to its caller when the handler
* returns, and the call to the handler may in fact be translated as a
* tail call. The translated code does not check for NULL function
* pointers, so it will crash if an exception occurs and the associated
* function pointer is not set.
* _following_ the sc instruction for system call exceptions (but see also
* the BINREC_OPT_G_PPC_SC_BLR optimization flag). The translated code
* will return immediately to its caller when the handler returns, and the
* call to the handler may in fact be translated as a tail call. The
* translated code does not check for NULL function pointers, so it will
* crash if an exception occurs and the associated function pointer is
* not set.
*
* All instruction words with the primary opcode of the sc instruction
* (0x11) are decoded as that instruction. This deviates from the PowerPC
Expand Down Expand Up @@ -1155,6 +1156,19 @@ typedef struct binrec_setup_t {
*/
#define BINREC_OPT_G_PPC_PS_STORE_DENORMALS (1<<13)

/**
* BINREC_OPT_G_PPC_SC_BLR: Optimize an instruction sequence of "sc; blr"
* by setting NIA to the value of the LR register rather than the address
* of the instruction following the "sc" when calling the sc handler.
* This avoids the need to translate and call a block containing a single
* blr after returning from the sc handler.
*
* This optimization is UNSAFE: the sc handler cannot recover the original
* address of the instruction which triggered the exception when this
* optimization is triggered.
*/
#define BINREC_OPT_G_PPC_SC_BLR (1<<14)

/**
* BINREC_OPT_G_PPC_SINGLE_PREC_INPUTS: Assume that the inputs to a
* single-precision floating-point instruction are in single precision.
Expand Down Expand Up @@ -1191,7 +1205,7 @@ typedef struct binrec_setup_t {
* the PowerPC architecture specification, it will behave correctly under
* this optimization.
*/
#define BINREC_OPT_G_PPC_SINGLE_PREC_INPUTS (1<<14)
#define BINREC_OPT_G_PPC_SINGLE_PREC_INPUTS (1<<15)

/**
* BINREC_OPT_G_PPC_TRIM_CR_STORES: Analyze the data flow through each
Expand All @@ -1210,7 +1224,7 @@ typedef struct binrec_setup_t {
* This optimization has no effect unless BINREC_OPT_G_PPC_USE_SPLIT_FIELDS
* is also enabled.
*/
#define BINREC_OPT_G_PPC_TRIM_CR_STORES (1<<15)
#define BINREC_OPT_G_PPC_TRIM_CR_STORES (1<<16)

/**
* BINREC_OPT_G_PPC_USE_SPLIT_FIELDS: Treat subfields of certain registers
Expand All @@ -1229,7 +1243,7 @@ typedef struct binrec_setup_t {
* in the processor state block. System call and trap handlers are not
* affected.
*/
#define BINREC_OPT_G_PPC_USE_SPLIT_FIELDS (1<<16)
#define BINREC_OPT_G_PPC_USE_SPLIT_FIELDS (1<<17)

/*------------ Host-architecture-specific optimization flags ------------*/

Expand Down
28 changes: 26 additions & 2 deletions src/guest-ppc/guest-ppc-rtl.c
Expand Up @@ -8641,7 +8641,7 @@ static inline void translate_insn(
RTLUnit * const unit = ctx->unit;

/* Skip instructions which were translated as part of an optimized
* instruction pair. */
* instruction pair (such as sc followed by blr). */
if (ctx->skip_next_insn) {
ctx->skip_next_insn = false;
return;
Expand Down Expand Up @@ -8839,10 +8839,33 @@ static inline void translate_insn(
return;

case OPCD_SC: {
/* Special case: translate sc followed by blr in a single step, to
* avoid having to return to caller and call a new unit containing
* just the blr. The scanner will terminate the block at an sc
* instruction which is not followed by a blr, so we only need to
* check whether this sc is at the end of the block. */
bool is_sc_blr = false;
if ((ctx->handle->guest_opt & BINREC_OPT_G_PPC_SC_BLR)
&& address + 4 < block->start + block->len) {
ASSERT(address + 8 == block->start + block->len);
const uint32_t *memory_base =
(const uint32_t *)ctx->handle->setup.guest_memory_base;
const uint32_t next_insn = bswap_be32(memory_base[(address+4)/4]);
ASSERT(next_insn == 0x4E800020);
is_sc_blr = true;
}
int nia;
if (is_sc_blr) {
const int lr = get_lr(ctx);
nia = rtl_alloc_register(unit, RTLTYPE_INT32);
rtl_add_insn(unit, RTLOP_ANDI, nia, lr, 0, -4);
} else {
nia = rtl_imm32(unit, address + 4);
}
guest_ppc_flush_cr(ctx, false);
guest_ppc_flush_fpscr(ctx);
flush_live_regs(ctx, true);
set_nia_imm(ctx, address + 4);
set_nia(ctx, nia);
const int sc_handler = rtl_alloc_register(unit, RTLTYPE_ADDRESS);
rtl_add_insn(unit, RTLOP_LOAD, sc_handler, ctx->psb_reg, 0,
ctx->handle->setup.state_offsets_ppc.sc_handler);
Expand All @@ -8858,6 +8881,7 @@ static inline void translate_insn(
post_insn_callback(ctx, address);
rtl_add_insn(unit, RTLOP_RETURN, 0, ctx->psb_reg, 0, 0);
ctx->psb_reg = old_psb;
ctx->skip_next_insn = is_sc_blr;
return;
} // case OPCD_SC

Expand Down
8 changes: 6 additions & 2 deletions src/guest-ppc/guest-ppc-scan.c
Expand Up @@ -1328,7 +1328,8 @@ bool guest_ppc_scan(GuestPPCContext *ctx, uint32_t limit)
* to help out data flow analysis, since we need to be able to
* store all live register values to the state block if a trap is
* taken.) Also terminate the entire unit if this looks like the
* end of a function. */
* end of a function. But make sure not to stop at an sc before
* a blr so we can optimize the sc+blr case properly. */
const bool is_direct_branch = ((opcd & ~0x02) == OPCD_BC);
const bool is_indirect_branch =
(opcd == OPCD_x13 && (insn_XO_10(insn) == XO_BCLR
Expand All @@ -1338,7 +1339,10 @@ bool guest_ppc_scan(GuestPPCContext *ctx, uint32_t limit)
|| ((opcd == OPCD_BC || is_indirect_branch)
&& (insn_BO(insn) & 0x14) == 0x14));
const bool is_icbi = (opcd == OPCD_x1F && insn_XO_10(insn) == XO_ICBI);
const bool is_sc = (opcd == OPCD_SC);
const bool is_sc =
(opcd == OPCD_SC
&& (insn_count == max_insns - 1
|| bswap_be32(memory_base[(address+4)/4]) != 0x4E800020));
/* Also terminate at a GQR write for constant GQR optimization. */
const bool is_terminal_gqr_write =
((ctx->handle->guest_opt & BINREC_OPT_G_PPC_CONSTANT_GQRS)
Expand Down
81 changes: 81 additions & 0 deletions tests/guest-ppc/exec/sc-blr.c
@@ -0,0 +1,81 @@
/*
* libbinrec: a recompiling translator for machine code
* Copyright (c) 2016 Andrew Church <achurch@achurch.org>
*
* This software may be copied and redistributed under certain conditions;
* see the file "COPYING" in the source code distribution for details.
* NO WARRANTY is provided with this software.
*/

#include "include/binrec.h"
#include "src/endian.h"
#include "tests/common.h"
#include "tests/execute.h"
#include "tests/guest-ppc/common.h"
#include "tests/log-capture.h"


static uint32_t sc_r3; // Value of r3 at sc instruction.
static uint32_t sc_nia; // Value of NIA at sc instruction.

static PPCState *sc_handler(PPCState *state, uint32_t insn)
{
ASSERT(state);
ASSERT(insn == 0x44000002);

sc_r3 = state->gpr[3];
sc_nia = state->nia;
state->nia = 0x1018;
return state;
}


static void configure_handle(binrec_t *handle)
{
const unsigned int guest_opt = BINREC_OPT_G_PPC_SC_BLR;
binrec_set_optimization_flags(handle, 0, guest_opt, 0);
}

int main(void)
{
if (!binrec_host_supported(binrec_native_arch())) {
printf("Skipping test because native architecture not supported.\n");
return EXIT_SUCCESS;
}

uint8_t *memory;
EXPECT(memory = malloc(0x10000));

static const uint32_t ppc_code[] = {
0x7C0802A6, // mflr r0
0x38601237, // li r3,0x1237
0x7C6803A6, // mtlr r3
0x38600001, // li r3,1
0x44000002, // sc
0x4E800020, // blr
0x7C0803A6, // mtlr r0
0x4E800020, // blr
};
const uint32_t start_address = 0x1000;
memcpy_be32(memory + start_address, ppc_code, sizeof(ppc_code));

PPCState state;
memset(&state, 0, sizeof(state));
state.sc_handler = sc_handler;

if (!call_guest_code(BINREC_ARCH_PPC_7XX, &state, memory, start_address,
configure_handle, NULL)) {
const char *log_messages = get_log_messages();
if (log_messages) {
fputs(log_messages, stdout);
}
FAIL("Failed to execute guest code");
}

EXPECT_EQ(state.gpr[3], 1);
EXPECT_EQ(sc_r3, 1);
EXPECT_EQ(sc_nia, 0x1234);

free(memory);
return EXIT_SUCCESS;
}
47 changes: 47 additions & 0 deletions tests/guest-ppc/opt/sc-blr-limit.c
@@ -0,0 +1,47 @@
/*
* libbinrec: a recompiling translator for machine code
* Copyright (c) 2016 Andrew Church <achurch@achurch.org>
*
* This software may be copied and redistributed under certain conditions;
* see the file "COPYING" in the source code distribution for details.
* NO WARRANTY is provided with this software.
*/

#include "tests/guest-ppc/insn/common.h"

/* Keep the blr outside the range of code to translate; the translator
* should not optimize this case. */
static const struct {
uint8_t input[4];
uint8_t extra[4];
} input_struct = {
{0x44,0x00,0x00,0x02}, // sc
{0x4E,0x80,0x00,0x20}, // blr
};
#define input input_struct.input

static const unsigned int guest_opt = BINREC_OPT_G_PPC_SC_BLR;
static const unsigned int common_opt = 0;

static const bool expected_success = true;

static const char expected[] =
" 0: LOAD_ARG r1, 0\n"
" 1: LOAD_ARG r2, 1\n"
" 2: LOAD_IMM r3, 4\n"
" 3: SET_ALIAS a1, r3\n"
" 4: LOAD r4, 984(r1)\n"
" 5: LOAD_IMM r5, 0x44000002\n"
" 6: CALL r6, @r4, r1, r5\n"
" 7: RETURN r6\n"
" 8: LOAD_IMM r7, 4\n"
" 9: SET_ALIAS a1, r7\n"
" 10: RETURN r1\n"
"\n"
"Alias 1: int32 @ 964(r1)\n"
"\n"
"Block 0: <none> --> [0,7] --> <none>\n"
"Block 1: <none> --> [8,10] --> <none>\n"
;

#include "tests/rtl-disasm-test.i"
43 changes: 43 additions & 0 deletions tests/guest-ppc/opt/sc-blr.c
@@ -0,0 +1,43 @@
/*
* libbinrec: a recompiling translator for machine code
* Copyright (c) 2016 Andrew Church <achurch@achurch.org>
*
* This software may be copied and redistributed under certain conditions;
* see the file "COPYING" in the source code distribution for details.
* NO WARRANTY is provided with this software.
*/

#include "tests/guest-ppc/insn/common.h"

static const uint8_t input[] = {
0x44,0x00,0x00,0x02, // sc
0x4E,0x80,0x00,0x20, // blr
};

static const unsigned int guest_opt = BINREC_OPT_G_PPC_SC_BLR;
static const unsigned int common_opt = 0;

static const bool expected_success = true;

static const char expected[] =
" 0: LOAD_ARG r1, 0\n"
" 1: LOAD_ARG r2, 1\n"
" 2: GET_ALIAS r3, a2\n"
" 3: ANDI r4, r3, -4\n"
" 4: SET_ALIAS a1, r4\n"
" 5: LOAD r5, 984(r1)\n"
" 6: LOAD_IMM r6, 0x44000002\n"
" 7: CALL r7, @r5, r1, r6\n"
" 8: RETURN r7\n"
" 9: LOAD_IMM r8, 8\n"
" 10: SET_ALIAS a1, r8\n"
" 11: RETURN r1\n"
"\n"
"Alias 1: int32 @ 964(r1)\n"
"Alias 2: int32 @ 932(r1)\n"
"\n"
"Block 0: <none> --> [0,8] --> <none>\n"
"Block 1: <none> --> [9,11] --> <none>\n"
;

#include "tests/rtl-disasm-test.i"
41 changes: 41 additions & 0 deletions tests/guest-ppc/opt/sc-not-blr.c
@@ -0,0 +1,41 @@
/*
* libbinrec: a recompiling translator for machine code
* Copyright (c) 2016 Andrew Church <achurch@achurch.org>
*
* This software may be copied and redistributed under certain conditions;
* see the file "COPYING" in the source code distribution for details.
* NO WARRANTY is provided with this software.
*/

#include "tests/guest-ppc/insn/common.h"

static const uint8_t input[] = {
0x44,0x00,0x00,0x02, // sc
0x60,0x00,0x00,0x00, // nop
};

static const unsigned int guest_opt = BINREC_OPT_G_PPC_SC_BLR;
static const unsigned int common_opt = 0;

static const bool expected_success = true;

static const char expected[] =
" 0: LOAD_ARG r1, 0\n"
" 1: LOAD_ARG r2, 1\n"
" 2: LOAD_IMM r3, 4\n"
" 3: SET_ALIAS a1, r3\n"
" 4: LOAD r4, 984(r1)\n"
" 5: LOAD_IMM r5, 0x44000002\n"
" 6: CALL r6, @r4, r1, r5\n"
" 7: RETURN r6\n"
" 8: LOAD_IMM r7, 4\n"
" 9: SET_ALIAS a1, r7\n"
" 10: RETURN r1\n"
"\n"
"Alias 1: int32 @ 964(r1)\n"
"\n"
"Block 0: <none> --> [0,7] --> <none>\n"
"Block 1: <none> --> [8,10] --> <none>\n"
;

#include "tests/rtl-disasm-test.i"

0 comments on commit 7f4abd8

Please sign in to comment.