diff --git a/cranelift/codegen/src/isa/x64/inst/args.rs b/cranelift/codegen/src/isa/x64/inst/args.rs index caf21dbbb4a2..c1a92bd88cdc 100644 --- a/cranelift/codegen/src/isa/x64/inst/args.rs +++ b/cranelift/codegen/src/isa/x64/inst/args.rs @@ -2249,7 +2249,7 @@ impl RoundImm { } /// An operand's size in bits. -#[derive(Clone, Copy, PartialEq)] +#[derive(Clone, Copy, PartialEq, Eq, PartialOrd, Ord)] pub enum OperandSize { /// 8-bit. Size8, diff --git a/cranelift/codegen/src/isa/x64/inst/emit.rs b/cranelift/codegen/src/isa/x64/inst/emit.rs index cd43516a4b26..b44b008f33a3 100644 --- a/cranelift/codegen/src/isa/x64/inst/emit.rs +++ b/cranelift/codegen/src/isa/x64/inst/emit.rs @@ -1276,9 +1276,9 @@ pub(crate) fn emit( }; } - Inst::CmpRmiR { + &Inst::CmpRmiR { size, - src: src_e, + src: ref src_e, dst: reg_g, opcode, } => { @@ -1289,24 +1289,52 @@ pub(crate) fn emit( CmpOpcode::Test => false, }; + let src_e = src_e.clone().to_reg_mem_imm(); + let size = match (opcode, &src_e) { + // Bitwise-and against a constant can safely truncate any upper + // bits that are zero. We don't currently support comparing + // memory to an immediate, but if we did, note that since x86 is + // little-endian, narrowing the operand size for a memory access + // doesn't require adjusting the address mode. + (CmpOpcode::Test, &RegMemImm::Imm { simm32 }) => { + // Choose whichever operand size is narrower: the specified + // size or the size implied by the immediate. + size.min(if simm32 <= u8::MAX as u32 { + OperandSize::Size8 + } else if simm32 <= u16::MAX as u32 { + OperandSize::Size16 + } else if simm32 <= i32::MAX as u32 { + // When the `test` instruction is used with a r/m64, the + // 32-bit immediate is sign-extended. If the immediate + // has the sign bit clear then this makes no difference + // and we can truncate to a 32-bit comparison, to avoid + // the REX.W prefix. + OperandSize::Size32 + } else { + OperandSize::Size64 + }) + } + _ => size, + }; + let mut prefix = LegacyPrefixes::None; - if *size == OperandSize::Size16 { + if size == OperandSize::Size16 { prefix = LegacyPrefixes::_66; } // A redundant REX prefix can change the meaning of this instruction. - let mut rex = RexFlags::from((*size, reg_g)); + let mut rex = RexFlags::from((size, reg_g)); - match src_e.clone().to_reg_mem_imm() { + match src_e { RegMemImm::Reg { reg: reg_e } => { let reg_e = allocs.next(reg_e); - if *size == OperandSize::Size8 { + if size == OperandSize::Size8 { // Check whether the E register forces the use of a redundant REX. rex.always_emit_if_8bit_needed(reg_e); } // Use the swapped operands encoding for CMP, to stay consistent with the output of // gcc/llvm. - let opcode = match (*size, is_cmp) { + let opcode = match (size, is_cmp) { (OperandSize::Size8, true) => 0x38, (_, true) => 0x39, (OperandSize::Size8, false) => 0x84, @@ -1318,7 +1346,7 @@ pub(crate) fn emit( RegMemImm::Mem { addr } => { let addr = &addr.finalize(state, sink).with_allocs(allocs); // Whereas here we revert to the "normal" G-E ordering for CMP. - let opcode = match (*size, is_cmp) { + let opcode = match (size, is_cmp) { (OperandSize::Size8, true) => 0x3A, (_, true) => 0x3B, (OperandSize::Size8, false) => 0x84, @@ -1334,7 +1362,7 @@ pub(crate) fn emit( // And also here we use the "normal" G-E ordering. let opcode = if is_cmp { - if *size == OperandSize::Size8 { + if size == OperandSize::Size8 { 0x80 } else if use_imm8 { 0x83 @@ -1342,7 +1370,7 @@ pub(crate) fn emit( 0x81 } } else { - if *size == OperandSize::Size8 { + if size == OperandSize::Size8 { 0xF6 } else { 0xF7 diff --git a/cranelift/filetests/filetests/isa/x64/i128.clif b/cranelift/filetests/filetests/isa/x64/i128.clif index d7523724bdf1..cb6219aefa48 100644 --- a/cranelift/filetests/filetests/isa/x64/i128.clif +++ b/cranelift/filetests/filetests/isa/x64/i128.clif @@ -446,14 +446,14 @@ block0(v0: i128, v1: i128): ; cmpq %rcx, %rsi ; sete %r10b ; andq %r10, %r9 -; testq $1, %r9 +; testb $1, %r9b ; setne %al ; cmpq %rdx, %rdi ; setne %r8b ; cmpq %rcx, %rsi ; setne %r9b ; orq %r9, %r8 -; testq $1, %r8 +; testb $1, %r8b ; setne %r9b ; movq %r9, (%rsp) ; cmpq %rcx, %rsi @@ -463,7 +463,7 @@ block0(v0: i128, v1: i128): ; setb %r11b ; andq %r11, %r10 ; orq %r10, %r8 -; testq $1, %r8 +; testb $1, %r8b ; setne %r10b ; cmpq %rcx, %rsi ; setl %r11b @@ -472,7 +472,7 @@ block0(v0: i128, v1: i128): ; setbe %r15b ; andq %r15, %r8 ; orq %r8, %r11 -; testq $1, %r11 +; testb $1, %r11b ; setne %r8b ; cmpq %rcx, %rsi ; setg %r11b @@ -481,7 +481,7 @@ block0(v0: i128, v1: i128): ; seta %r13b ; andq %r13, %r12 ; orq %r12, %r11 -; testq $1, %r11 +; testb $1, %r11b ; setne %r11b ; cmpq %rcx, %rsi ; setg %r15b @@ -490,7 +490,7 @@ block0(v0: i128, v1: i128): ; setae %r12b ; andq %r12, %rbx ; orq %rbx, %r15 -; testq $1, %r15 +; testb $1, %r15b ; setne %r13b ; cmpq %rcx, %rsi ; setb %r14b @@ -499,7 +499,7 @@ block0(v0: i128, v1: i128): ; setb %bl ; andq %rbx, %r15 ; orq %r15, %r14 -; testq $1, %r14 +; testb $1, %r14b ; setne %r14b ; cmpq %rcx, %rsi ; setb %bl @@ -508,7 +508,7 @@ block0(v0: i128, v1: i128): ; setbe %r15b ; andq %r15, %r12 ; orq %r12, %rbx -; testq $1, %rbx +; testb $1, %bl ; setne %r15b ; cmpq %rcx, %rsi ; seta %bl @@ -517,7 +517,7 @@ block0(v0: i128, v1: i128): ; seta %r9b ; andq %r9, %r12 ; orq %r12, %rbx -; testq $1, %rbx +; testb $1, %bl ; setne %bl ; cmpq %rcx, %rsi ; seta %sil @@ -526,7 +526,7 @@ block0(v0: i128, v1: i128): ; setae %dil ; andq %rdi, %rcx ; orq %rcx, %rsi -; testq $1, %rsi +; testb $1, %sil ; setne %sil ; movq (%rsp), %rcx ; andl %ecx, %eax @@ -1622,10 +1622,10 @@ block0(v0: i128, v1: i128): ; subq %r8, %rcx ; shrq %cl, %rdi ; xorq %rax, %rax -; testq $0x7f, %r8 +; testb $0x7f, %r8b ; cmoveq %rax, %rdi ; orq %rsi, %rdi -; testq $0x40, %r8 +; testb $0x40, %r8b ; cmoveq %rdx, %rax ; cmoveq %rdi, %rdx ; movq %rbp, %rsp @@ -1680,10 +1680,10 @@ block0(v0: i128, v1: i128): ; subq %rax, %rcx ; shlq %cl, %rsi ; xorq %rdx, %rdx -; testq $0x7f, %rax +; testb $0x7f, %al ; cmoveq %rdx, %rsi ; orq %rdi, %rsi -; testq $0x40, %rax +; testb $0x40, %al ; movq %r10, %rax ; cmoveq %rsi, %rax ; cmoveq %r10, %rdx @@ -1743,11 +1743,11 @@ block0(v0: i128, v1: i128): ; movq %rsi, %r9 ; shlq %cl, %r9 ; xorq %r11, %r11 -; testq $0x7f, %rax +; testb $0x7f, %al ; cmoveq %r11, %r9 ; orq %r9, %rdi ; sarq $0x3f, %rsi -; testq $0x40, %rax +; testb $0x40, %al ; movq %r10, %rax ; cmoveq %rdi, %rax ; movq %rsi, %rdx @@ -1830,10 +1830,10 @@ block0(v0: i128, v1: i128): ; shrq %cl, %r10 ; xorq %rax, %rax ; movq %r8, %rcx -; testq $0x7f, %rcx +; testb $0x7f, %cl ; cmoveq %rax, %r10 ; orq %r11, %r10 -; testq $0x40, %rcx +; testb $0x40, %cl ; cmoveq %rdx, %rax ; cmoveq %r10, %rdx ; movl $0x80, %ecx @@ -1848,10 +1848,10 @@ block0(v0: i128, v1: i128): ; subq %r10, %rcx ; shlq %cl, %rsi ; xorq %r8, %r8 -; testq $0x7f, %r10 +; testb $0x7f, %r10b ; cmoveq %r8, %rsi ; orq %rdi, %rsi -; testq $0x40, %r10 +; testb $0x40, %r10b ; movq %r9, %r10 ; cmoveq %rsi, %r10 ; cmoveq %r9, %r8 @@ -1934,10 +1934,10 @@ block0(v0: i128, v1: i128): ; shlq %cl, %r11 ; xorq %rdx, %rdx ; movq %r9, %rcx -; testq $0x7f, %rcx +; testb $0x7f, %cl ; cmoveq %rdx, %r11 ; orq %r8, %r11 -; testq $0x40, %rcx +; testb $0x40, %cl ; movq %r10, %rax ; cmoveq %r11, %rax ; cmoveq %r10, %rdx @@ -1952,10 +1952,10 @@ block0(v0: i128, v1: i128): ; subq %r9, %rcx ; shrq %cl, %rdi ; xorq %r11, %r11 -; testq $0x7f, %r9 +; testb $0x7f, %r9b ; cmoveq %r11, %rdi ; orq %rsi, %rdi -; testq $0x40, %r9 +; testb $0x40, %r9b ; cmoveq %r8, %r11 ; cmoveq %rdi, %r8 ; orq %r11, %rax diff --git a/cranelift/filetests/filetests/isa/x64/ishl.clif b/cranelift/filetests/filetests/isa/x64/ishl.clif index 32ab2f796270..3adb5490fee9 100644 --- a/cranelift/filetests/filetests/isa/x64/ishl.clif +++ b/cranelift/filetests/filetests/isa/x64/ishl.clif @@ -53,10 +53,10 @@ block0(v0: i128, v1: i8): ; subq %r8, %rcx ; shrq %cl, %rdi ; xorq %rax, %rax -; testq $0x7f, %r8 +; testb $0x7f, %r8b ; cmoveq %rax, %rdi ; orq %rsi, %rdi -; testq $0x40, %r8 +; testb $0x40, %r8b ; cmoveq %rdx, %rax ; cmoveq %rdi, %rdx ; movq %rbp, %rsp @@ -110,10 +110,10 @@ block0(v0: i128, v1: i64): ; subq %r8, %rcx ; shrq %cl, %rdi ; xorq %rax, %rax -; testq $0x7f, %r8 +; testb $0x7f, %r8b ; cmoveq %rax, %rdi ; orq %rsi, %rdi -; testq $0x40, %r8 +; testb $0x40, %r8b ; cmoveq %rdx, %rax ; cmoveq %rdi, %rdx ; movq %rbp, %rsp @@ -167,10 +167,10 @@ block0(v0: i128, v1: i32): ; subq %r8, %rcx ; shrq %cl, %rdi ; xorq %rax, %rax -; testq $0x7f, %r8 +; testb $0x7f, %r8b ; cmoveq %rax, %rdi ; orq %rsi, %rdi -; testq $0x40, %r8 +; testb $0x40, %r8b ; cmoveq %rdx, %rax ; cmoveq %rdi, %rdx ; movq %rbp, %rsp @@ -224,10 +224,10 @@ block0(v0: i128, v1: i16): ; subq %r8, %rcx ; shrq %cl, %rdi ; xorq %rax, %rax -; testq $0x7f, %r8 +; testb $0x7f, %r8b ; cmoveq %rax, %rdi ; orq %rsi, %rdi -; testq $0x40, %r8 +; testb $0x40, %r8b ; cmoveq %rdx, %rax ; cmoveq %rdi, %rdx ; movq %rbp, %rsp @@ -281,10 +281,10 @@ block0(v0: i128, v1: i8): ; subq %r8, %rcx ; shrq %cl, %rdi ; xorq %rax, %rax -; testq $0x7f, %r8 +; testb $0x7f, %r8b ; cmoveq %rax, %rdi ; orq %rsi, %rdi -; testq $0x40, %r8 +; testb $0x40, %r8b ; cmoveq %rdx, %rax ; cmoveq %rdi, %rdx ; movq %rbp, %rsp diff --git a/cranelift/filetests/filetests/isa/x64/sshr.clif b/cranelift/filetests/filetests/isa/x64/sshr.clif index b44ad0c35ed6..d4d60bd09d13 100644 --- a/cranelift/filetests/filetests/isa/x64/sshr.clif +++ b/cranelift/filetests/filetests/isa/x64/sshr.clif @@ -57,11 +57,11 @@ block0(v0: i128, v1: i8): ; movq %rsi, %r9 ; shlq %cl, %r9 ; xorq %r11, %r11 -; testq $0x7f, %rax +; testb $0x7f, %al ; cmoveq %r11, %r9 ; orq %r9, %rdi ; sarq $0x3f, %rsi -; testq $0x40, %rax +; testb $0x40, %al ; movq %r10, %rax ; cmoveq %rdi, %rax ; movq %rsi, %rdx @@ -122,11 +122,11 @@ block0(v0: i128, v1: i64): ; movq %rsi, %r8 ; shlq %cl, %r8 ; xorq %r10, %r10 -; testq $0x7f, %rax +; testb $0x7f, %al ; cmoveq %r10, %r8 ; orq %r8, %rdi ; sarq $0x3f, %rsi -; testq $0x40, %rax +; testb $0x40, %al ; movq %r9, %rax ; cmoveq %rdi, %rax ; movq %rsi, %rdx @@ -187,11 +187,11 @@ block0(v0: i128, v1: i32): ; movq %rsi, %r8 ; shlq %cl, %r8 ; xorq %r10, %r10 -; testq $0x7f, %rax +; testb $0x7f, %al ; cmoveq %r10, %r8 ; orq %r8, %rdi ; sarq $0x3f, %rsi -; testq $0x40, %rax +; testb $0x40, %al ; movq %r9, %rax ; cmoveq %rdi, %rax ; movq %rsi, %rdx @@ -252,11 +252,11 @@ block0(v0: i128, v1: i16): ; movq %rsi, %r8 ; shlq %cl, %r8 ; xorq %r10, %r10 -; testq $0x7f, %rax +; testb $0x7f, %al ; cmoveq %r10, %r8 ; orq %r8, %rdi ; sarq $0x3f, %rsi -; testq $0x40, %rax +; testb $0x40, %al ; movq %r9, %rax ; cmoveq %rdi, %rax ; movq %rsi, %rdx @@ -317,11 +317,11 @@ block0(v0: i128, v1: i8): ; movq %rsi, %r8 ; shlq %cl, %r8 ; xorq %r10, %r10 -; testq $0x7f, %rax +; testb $0x7f, %al ; cmoveq %r10, %r8 ; orq %r8, %rdi ; sarq $0x3f, %rsi -; testq $0x40, %rax +; testb $0x40, %al ; movq %r9, %rax ; cmoveq %rdi, %rax ; movq %rsi, %rdx diff --git a/cranelift/filetests/filetests/isa/x64/ushr.clif b/cranelift/filetests/filetests/isa/x64/ushr.clif index 8c760f2d48f0..e86061a60d7d 100644 --- a/cranelift/filetests/filetests/isa/x64/ushr.clif +++ b/cranelift/filetests/filetests/isa/x64/ushr.clif @@ -52,10 +52,10 @@ block0(v0: i128, v1: i8): ; subq %rax, %rcx ; shlq %cl, %rsi ; xorq %rdx, %rdx -; testq $0x7f, %rax +; testb $0x7f, %al ; cmoveq %rdx, %rsi ; orq %rdi, %rsi -; testq $0x40, %rax +; testb $0x40, %al ; movq %r10, %rax ; cmoveq %rsi, %rax ; cmoveq %r10, %rdx @@ -111,10 +111,10 @@ block0(v0: i128, v1: i64): ; subq %rax, %rcx ; shlq %cl, %rsi ; xorq %rdx, %rdx -; testq $0x7f, %rax +; testb $0x7f, %al ; cmoveq %rdx, %rsi ; orq %rdi, %rsi -; testq $0x40, %rax +; testb $0x40, %al ; movq %r9, %rax ; cmoveq %rsi, %rax ; cmoveq %r9, %rdx @@ -170,10 +170,10 @@ block0(v0: i128, v1: i32): ; subq %rax, %rcx ; shlq %cl, %rsi ; xorq %rdx, %rdx -; testq $0x7f, %rax +; testb $0x7f, %al ; cmoveq %rdx, %rsi ; orq %rdi, %rsi -; testq $0x40, %rax +; testb $0x40, %al ; movq %r9, %rax ; cmoveq %rsi, %rax ; cmoveq %r9, %rdx @@ -229,10 +229,10 @@ block0(v0: i128, v1: i16): ; subq %rax, %rcx ; shlq %cl, %rsi ; xorq %rdx, %rdx -; testq $0x7f, %rax +; testb $0x7f, %al ; cmoveq %rdx, %rsi ; orq %rdi, %rsi -; testq $0x40, %rax +; testb $0x40, %al ; movq %r9, %rax ; cmoveq %rsi, %rax ; cmoveq %r9, %rdx @@ -288,10 +288,10 @@ block0(v0: i128, v1: i8): ; subq %rax, %rcx ; shlq %cl, %rsi ; xorq %rdx, %rdx -; testq $0x7f, %rax +; testb $0x7f, %al ; cmoveq %rdx, %rsi ; orq %rdi, %rsi -; testq $0x40, %rax +; testb $0x40, %al ; movq %r9, %rax ; cmoveq %rsi, %rax ; cmoveq %r9, %rdx