Skip to content

Commit

Permalink
cranelift/x64: Narrow test immediate operands
Browse files Browse the repository at this point in the history
The x86 `test` instruction does a bitwise-and operation, setting flags,
but otherwise discarding the result. And when one operand is an
immediate constant, any zero bits in the constant can't have an effect
on the result. So we can emit shorter versions of this instruction by
truncating the most significant zero bits and using the narrowest
possible immediate form for the provided constant.
  • Loading branch information
jameysharp committed Apr 20, 2024
1 parent 1c013e3 commit d6f67c6
Show file tree
Hide file tree
Showing 6 changed files with 93 additions and 65 deletions.
2 changes: 1 addition & 1 deletion cranelift/codegen/src/isa/x64/inst/args.rs
Original file line number Diff line number Diff line change
Expand Up @@ -2249,7 +2249,7 @@ impl RoundImm {
}

/// An operand's size in bits.
#[derive(Clone, Copy, PartialEq)]
#[derive(Clone, Copy, PartialEq, Eq, PartialOrd, Ord)]
pub enum OperandSize {
/// 8-bit.
Size8,
Expand Down
48 changes: 38 additions & 10 deletions cranelift/codegen/src/isa/x64/inst/emit.rs
Original file line number Diff line number Diff line change
Expand Up @@ -1276,9 +1276,9 @@ pub(crate) fn emit(
};
}

Inst::CmpRmiR {
&Inst::CmpRmiR {
size,
src: src_e,
src: ref src_e,
dst: reg_g,
opcode,
} => {
Expand All @@ -1289,24 +1289,52 @@ pub(crate) fn emit(
CmpOpcode::Test => false,
};

let src_e = src_e.clone().to_reg_mem_imm();
let size = match (opcode, &src_e) {
// Bitwise-and against a constant can safely truncate any upper
// bits that are zero. We don't currently support comparing
// memory to an immediate, but if we did, note that since x86 is
// little-endian, narrowing the operand size for a memory access
// doesn't require adjusting the address mode.
(CmpOpcode::Test, &RegMemImm::Imm { simm32 }) => {
// Choose whichever operand size is narrower: the specified
// size or the size implied by the immediate.
size.min(if simm32 <= u8::MAX as u32 {
OperandSize::Size8
} else if simm32 <= u16::MAX as u32 {
OperandSize::Size16
} else if simm32 <= i32::MAX as u32 {
// When the `test` instruction is used with a r/m64, the
// 32-bit immediate is sign-extended. If the immediate
// has the sign bit clear then this makes no difference
// and we can truncate to a 32-bit comparison, to avoid
// the REX.W prefix.
OperandSize::Size32
} else {
OperandSize::Size64
})
}
_ => size,
};

let mut prefix = LegacyPrefixes::None;
if *size == OperandSize::Size16 {
if size == OperandSize::Size16 {
prefix = LegacyPrefixes::_66;
}
// A redundant REX prefix can change the meaning of this instruction.
let mut rex = RexFlags::from((*size, reg_g));
let mut rex = RexFlags::from((size, reg_g));

match src_e.clone().to_reg_mem_imm() {
match src_e {
RegMemImm::Reg { reg: reg_e } => {
let reg_e = allocs.next(reg_e);
if *size == OperandSize::Size8 {
if size == OperandSize::Size8 {
// Check whether the E register forces the use of a redundant REX.
rex.always_emit_if_8bit_needed(reg_e);
}

// Use the swapped operands encoding for CMP, to stay consistent with the output of
// gcc/llvm.
let opcode = match (*size, is_cmp) {
let opcode = match (size, is_cmp) {
(OperandSize::Size8, true) => 0x38,
(_, true) => 0x39,
(OperandSize::Size8, false) => 0x84,
Expand All @@ -1318,7 +1346,7 @@ pub(crate) fn emit(
RegMemImm::Mem { addr } => {
let addr = &addr.finalize(state, sink).with_allocs(allocs);
// Whereas here we revert to the "normal" G-E ordering for CMP.
let opcode = match (*size, is_cmp) {
let opcode = match (size, is_cmp) {
(OperandSize::Size8, true) => 0x3A,
(_, true) => 0x3B,
(OperandSize::Size8, false) => 0x84,
Expand All @@ -1334,15 +1362,15 @@ pub(crate) fn emit(

// And also here we use the "normal" G-E ordering.
let opcode = if is_cmp {
if *size == OperandSize::Size8 {
if size == OperandSize::Size8 {
0x80
} else if use_imm8 {
0x83
} else {
0x81
}
} else {
if *size == OperandSize::Size8 {
if size == OperandSize::Size8 {
0xF6
} else {
0xF7
Expand Down
48 changes: 24 additions & 24 deletions cranelift/filetests/filetests/isa/x64/i128.clif
Original file line number Diff line number Diff line change
Expand Up @@ -446,14 +446,14 @@ block0(v0: i128, v1: i128):
; cmpq %rcx, %rsi
; sete %r10b
; andq %r10, %r9
; testq $1, %r9
; testb $1, %r9b
; setne %al
; cmpq %rdx, %rdi
; setne %r8b
; cmpq %rcx, %rsi
; setne %r9b
; orq %r9, %r8
; testq $1, %r8
; testb $1, %r8b
; setne %r9b
; movq %r9, (%rsp)
; cmpq %rcx, %rsi
Expand All @@ -463,7 +463,7 @@ block0(v0: i128, v1: i128):
; setb %r11b
; andq %r11, %r10
; orq %r10, %r8
; testq $1, %r8
; testb $1, %r8b
; setne %r10b
; cmpq %rcx, %rsi
; setl %r11b
Expand All @@ -472,7 +472,7 @@ block0(v0: i128, v1: i128):
; setbe %r15b
; andq %r15, %r8
; orq %r8, %r11
; testq $1, %r11
; testb $1, %r11b
; setne %r8b
; cmpq %rcx, %rsi
; setg %r11b
Expand All @@ -481,7 +481,7 @@ block0(v0: i128, v1: i128):
; seta %r13b
; andq %r13, %r12
; orq %r12, %r11
; testq $1, %r11
; testb $1, %r11b
; setne %r11b
; cmpq %rcx, %rsi
; setg %r15b
Expand All @@ -490,7 +490,7 @@ block0(v0: i128, v1: i128):
; setae %r12b
; andq %r12, %rbx
; orq %rbx, %r15
; testq $1, %r15
; testb $1, %r15b
; setne %r13b
; cmpq %rcx, %rsi
; setb %r14b
Expand All @@ -499,7 +499,7 @@ block0(v0: i128, v1: i128):
; setb %bl
; andq %rbx, %r15
; orq %r15, %r14
; testq $1, %r14
; testb $1, %r14b
; setne %r14b
; cmpq %rcx, %rsi
; setb %bl
Expand All @@ -508,7 +508,7 @@ block0(v0: i128, v1: i128):
; setbe %r15b
; andq %r15, %r12
; orq %r12, %rbx
; testq $1, %rbx
; testb $1, %bl
; setne %r15b
; cmpq %rcx, %rsi
; seta %bl
Expand All @@ -517,7 +517,7 @@ block0(v0: i128, v1: i128):
; seta %r9b
; andq %r9, %r12
; orq %r12, %rbx
; testq $1, %rbx
; testb $1, %bl
; setne %bl
; cmpq %rcx, %rsi
; seta %sil
Expand All @@ -526,7 +526,7 @@ block0(v0: i128, v1: i128):
; setae %dil
; andq %rdi, %rcx
; orq %rcx, %rsi
; testq $1, %rsi
; testb $1, %sil
; setne %sil
; movq (%rsp), %rcx
; andl %ecx, %eax
Expand Down Expand Up @@ -1622,10 +1622,10 @@ block0(v0: i128, v1: i128):
; subq %r8, %rcx
; shrq %cl, %rdi
; xorq %rax, %rax
; testq $0x7f, %r8
; testb $0x7f, %r8b
; cmoveq %rax, %rdi
; orq %rsi, %rdi
; testq $0x40, %r8
; testb $0x40, %r8b
; cmoveq %rdx, %rax
; cmoveq %rdi, %rdx
; movq %rbp, %rsp
Expand Down Expand Up @@ -1680,10 +1680,10 @@ block0(v0: i128, v1: i128):
; subq %rax, %rcx
; shlq %cl, %rsi
; xorq %rdx, %rdx
; testq $0x7f, %rax
; testb $0x7f, %al
; cmoveq %rdx, %rsi
; orq %rdi, %rsi
; testq $0x40, %rax
; testb $0x40, %al
; movq %r10, %rax
; cmoveq %rsi, %rax
; cmoveq %r10, %rdx
Expand Down Expand Up @@ -1743,11 +1743,11 @@ block0(v0: i128, v1: i128):
; movq %rsi, %r9
; shlq %cl, %r9
; xorq %r11, %r11
; testq $0x7f, %rax
; testb $0x7f, %al
; cmoveq %r11, %r9
; orq %r9, %rdi
; sarq $0x3f, %rsi
; testq $0x40, %rax
; testb $0x40, %al
; movq %r10, %rax
; cmoveq %rdi, %rax
; movq %rsi, %rdx
Expand Down Expand Up @@ -1830,10 +1830,10 @@ block0(v0: i128, v1: i128):
; shrq %cl, %r10
; xorq %rax, %rax
; movq %r8, %rcx
; testq $0x7f, %rcx
; testb $0x7f, %cl
; cmoveq %rax, %r10
; orq %r11, %r10
; testq $0x40, %rcx
; testb $0x40, %cl
; cmoveq %rdx, %rax
; cmoveq %r10, %rdx
; movl $0x80, %ecx
Expand All @@ -1848,10 +1848,10 @@ block0(v0: i128, v1: i128):
; subq %r10, %rcx
; shlq %cl, %rsi
; xorq %r8, %r8
; testq $0x7f, %r10
; testb $0x7f, %r10b
; cmoveq %r8, %rsi
; orq %rdi, %rsi
; testq $0x40, %r10
; testb $0x40, %r10b
; movq %r9, %r10
; cmoveq %rsi, %r10
; cmoveq %r9, %r8
Expand Down Expand Up @@ -1934,10 +1934,10 @@ block0(v0: i128, v1: i128):
; shlq %cl, %r11
; xorq %rdx, %rdx
; movq %r9, %rcx
; testq $0x7f, %rcx
; testb $0x7f, %cl
; cmoveq %rdx, %r11
; orq %r8, %r11
; testq $0x40, %rcx
; testb $0x40, %cl
; movq %r10, %rax
; cmoveq %r11, %rax
; cmoveq %r10, %rdx
Expand All @@ -1952,10 +1952,10 @@ block0(v0: i128, v1: i128):
; subq %r9, %rcx
; shrq %cl, %rdi
; xorq %r11, %r11
; testq $0x7f, %r9
; testb $0x7f, %r9b
; cmoveq %r11, %rdi
; orq %rsi, %rdi
; testq $0x40, %r9
; testb $0x40, %r9b
; cmoveq %r8, %r11
; cmoveq %rdi, %r8
; orq %r11, %rax
Expand Down
20 changes: 10 additions & 10 deletions cranelift/filetests/filetests/isa/x64/ishl.clif
Original file line number Diff line number Diff line change
Expand Up @@ -53,10 +53,10 @@ block0(v0: i128, v1: i8):
; subq %r8, %rcx
; shrq %cl, %rdi
; xorq %rax, %rax
; testq $0x7f, %r8
; testb $0x7f, %r8b
; cmoveq %rax, %rdi
; orq %rsi, %rdi
; testq $0x40, %r8
; testb $0x40, %r8b
; cmoveq %rdx, %rax
; cmoveq %rdi, %rdx
; movq %rbp, %rsp
Expand Down Expand Up @@ -110,10 +110,10 @@ block0(v0: i128, v1: i64):
; subq %r8, %rcx
; shrq %cl, %rdi
; xorq %rax, %rax
; testq $0x7f, %r8
; testb $0x7f, %r8b
; cmoveq %rax, %rdi
; orq %rsi, %rdi
; testq $0x40, %r8
; testb $0x40, %r8b
; cmoveq %rdx, %rax
; cmoveq %rdi, %rdx
; movq %rbp, %rsp
Expand Down Expand Up @@ -167,10 +167,10 @@ block0(v0: i128, v1: i32):
; subq %r8, %rcx
; shrq %cl, %rdi
; xorq %rax, %rax
; testq $0x7f, %r8
; testb $0x7f, %r8b
; cmoveq %rax, %rdi
; orq %rsi, %rdi
; testq $0x40, %r8
; testb $0x40, %r8b
; cmoveq %rdx, %rax
; cmoveq %rdi, %rdx
; movq %rbp, %rsp
Expand Down Expand Up @@ -224,10 +224,10 @@ block0(v0: i128, v1: i16):
; subq %r8, %rcx
; shrq %cl, %rdi
; xorq %rax, %rax
; testq $0x7f, %r8
; testb $0x7f, %r8b
; cmoveq %rax, %rdi
; orq %rsi, %rdi
; testq $0x40, %r8
; testb $0x40, %r8b
; cmoveq %rdx, %rax
; cmoveq %rdi, %rdx
; movq %rbp, %rsp
Expand Down Expand Up @@ -281,10 +281,10 @@ block0(v0: i128, v1: i8):
; subq %r8, %rcx
; shrq %cl, %rdi
; xorq %rax, %rax
; testq $0x7f, %r8
; testb $0x7f, %r8b
; cmoveq %rax, %rdi
; orq %rsi, %rdi
; testq $0x40, %r8
; testb $0x40, %r8b
; cmoveq %rdx, %rax
; cmoveq %rdi, %rdx
; movq %rbp, %rsp
Expand Down

0 comments on commit d6f67c6

Please sign in to comment.