Skip to content

Commit

Permalink
Avoid QEMU's deposit64/extract64 in hot code paths
Browse files Browse the repository at this point in the history
These functions have assertions that are enabled even in debug mode and
those assertion show up while profiling QEMU booting CheriBSD.
Re-implementing them without assertions gives a small but measurable speedup:

```
hyperfine -L qemu /local/scratch/alr48/cheri/output/sdk/bin/qemu-system-riscv64cheri.v5.2.0-933-g0c09763123,/local/scratch/alr48/cheri/output/sdk/bin/qemu-system-riscv64cheri  '{qemu} -M virt -m 2048 -nographic -bios bbl-riscv64cheri-virt-fw_jump.bin -kernel /local/scratch/alr48/cheri/output/kernel-riscv64-purecap.CHERI-PURECAP-QEMU-MFS-ROOT -append init_path=/sbin/startup-benchmark.sh'
Benchmark CTSRD-CHERI#1: /local/scratch/alr48/cheri/output/sdk/bin/qemu-system-riscv64cheri.v5.2.0-933-g0c09763123 -M virt -m 2048 -nographic -bios bbl-riscv64cheri-virt-fw_jump.bin -kernel /local/scratch/alr48/cheri/output/kernel-riscv64-purecap.CHERI-PURECAP-QEMU-MFS-ROOT -append init_path=/sbin/startup-benchmark.sh
  Time (mean ± σ):      9.494 s ±  0.054 s    [User: 8.519 s, System: 0.178 s]
  Range (min … max):    9.443 s …  9.600 s    10 runs

Benchmark CTSRD-CHERI#2: /local/scratch/alr48/cheri/output/sdk/bin/qemu-system-riscv64cheri -M virt -m 2048 -nographic -bios bbl-riscv64cheri-virt-fw_jump.bin -kernel /local/scratch/alr48/cheri/output/kernel-riscv64-purecap.CHERI-PURECAP-QEMU-MFS-ROOT -append init_path=/sbin/startup-benchmark.sh
  Time (mean ± σ):      9.284 s ±  0.043 s    [User: 8.249 s, System: 0.135 s]
  Range (min … max):    9.234 s …  9.381 s    10 runs

Summary
  '/local/scratch/alr48/cheri/output/sdk/bin/qemu-system-riscv64cheri -M virt -m 2048 -nographic -bios bbl-riscv64cheri-virt-fw_jump.bin -kernel /local/scratch/alr48/cheri/output/kernel-riscv64-purecap.CHERI-PURECAP-QEMU-MFS-ROOT -append init_path=/sbin/startup-benchmark.sh' ran
    1.02 ± 0.01 times faster than '/local/scratch/alr48/cheri/output/sdk/bin/qemu-system-riscv64cheri.v5.2.0-933-g0c09763123 -M virt -m 2048 -nographic -bios bbl-riscv64cheri-virt-fw_jump.bin -kernel /local/scratch/alr48/cheri/output/kernel-riscv64-purecap.CHERI-PURECAP-QEMU-MFS-ROOT -append init_path=/sbin/startup-benchmark.sh'
```
  • Loading branch information
arichardson committed Jul 7, 2021
1 parent 0c09763 commit 2d7de03
Show file tree
Hide file tree
Showing 2 changed files with 21 additions and 8 deletions.
3 changes: 2 additions & 1 deletion target/cheri-common/cheri-lazy-capregs-types.h
Original file line number Diff line number Diff line change
Expand Up @@ -63,7 +63,8 @@ typedef enum CapRegState {
CREG_TAGGED_CAP = 0b10,
/// This capability register holds a fully decompressed capability.
/// The tag bit can be read from the cap_register_t structure.
CREG_FULLY_DECOMPRESSED = 0b11
CREG_FULLY_DECOMPRESSED = 0b11,
CREG_STATE_MASK = 0b11,
} CapRegState;

// Cap registers should be padded so they are easier to move.
Expand Down
26 changes: 19 additions & 7 deletions target/cheri-common/cheri-lazy-capregs.h
Original file line number Diff line number Diff line change
Expand Up @@ -47,15 +47,20 @@

static inline GPCapRegs *cheri_get_gpcrs(CPUArchState *env);

static inline uint64_t capreg_state_set_to_integer_mask(unsigned reg)
static inline QEMU_ALWAYS_INLINE uint64_t
capreg_state_set_to_integer_mask(unsigned reg)
{
return ~(UINT64_C(3) << (reg * 2));
return ~(((uint64_t)CREG_STATE_MASK) << (reg * 2));
}

static inline CapRegState get_capreg_state(const GPCapRegs *gpcrs, unsigned reg)
{
cheri_debug_assert(reg < 32);
return (CapRegState)extract64(gpcrs->capreg_state, reg * 2, 2);
/*
* Note: QEMU's extract64 has assertions enabled (even in release mode).
* Since this is a hot path, we re-implement it without assertions here.
*/
return (CapRegState)((gpcrs->capreg_state >> (reg * 2)) & CREG_STATE_MASK);
}

static inline void sanity_check_capreg(GPCapRegs *gpcrs, unsigned regnum)
Expand Down Expand Up @@ -101,8 +106,9 @@ static inline void sanity_check_capreg(GPCapRegs *gpcrs, unsigned regnum)
#endif // CONFIG_DEBUG_TCG
}

static inline void set_capreg_state(GPCapRegs *gpcrs, unsigned regnum,
CapRegState new_state)
/* Marked as always_inline to avoid the |= if called with CREG_INTEGER. */
static inline QEMU_ALWAYS_INLINE void
set_capreg_state(GPCapRegs *gpcrs, unsigned regnum, CapRegState new_state)
{
if (regnum == NULL_CAPREG_INDEX) {
cheri_debug_assert(new_state == CREG_FULLY_DECOMPRESSED &&
Expand All @@ -111,8 +117,14 @@ static inline void set_capreg_state(GPCapRegs *gpcrs, unsigned regnum,
}

cheri_debug_assert(regnum < 32);
gpcrs->capreg_state =
deposit64(gpcrs->capreg_state, regnum * 2, 2, new_state);
/*
* Note: QEMU's deposit64 has assertions enabled (even in release mode).
* Since this is a hot path, we re-implement it without assertions here.
*/
gpcrs->capreg_state &= capreg_state_set_to_integer_mask(regnum);
if (!__builtin_constant_p(new_state) || new_state != 0) {
gpcrs->capreg_state |= (((uint64_t)new_state) << (regnum * 2));
}
// Check that the compressed and decompressed caps are in sync
sanity_check_capreg(gpcrs, regnum);
}
Expand Down

0 comments on commit 2d7de03

Please sign in to comment.