diff --git a/cranelift/codegen/src/isa/x64/abi.rs b/cranelift/codegen/src/isa/x64/abi.rs index 4444edafc3a6..dadaa6e35973 100644 --- a/cranelift/codegen/src/isa/x64/abi.rs +++ b/cranelift/codegen/src/isa/x64/abi.rs @@ -7,7 +7,7 @@ use std::mem; use crate::binemit::Stackmap; use crate::ir::{self, types, types::*, ArgumentExtension, StackSlot, Type}; -use crate::isa::{self, x64::inst::*}; +use crate::isa::{x64::inst::*, CallConv}; use crate::machinst::*; use crate::settings; use crate::{CodegenError, CodegenResult}; @@ -40,7 +40,7 @@ struct ABISig { /// Index in `args` of the stack-return-value-area argument. stack_ret_arg: Option, /// Calling convention used. - call_conv: isa::CallConv, + call_conv: CallConv, } pub(crate) struct X64ABIBody { @@ -65,7 +65,7 @@ pub(crate) struct X64ABIBody { /// which RSP is adjusted downwards to allocate the spill area. frame_size_bytes: Option, - call_conv: isa::CallConv, + call_conv: CallConv, /// The settings controlling this function's compilation. flags: settings::Flags, @@ -93,7 +93,11 @@ fn in_vec_reg(ty: types::Type) -> bool { } } -fn get_intreg_for_arg_systemv(idx: usize) -> Option { +fn get_intreg_for_arg_systemv(call_conv: &CallConv, idx: usize) -> Option { + match call_conv { + CallConv::Fast | CallConv::Cold | CallConv::SystemV | CallConv::BaldrdashSystemV => {} + _ => panic!("int args only supported for SysV calling convention"), + }; match idx { 0 => Some(regs::rdi()), 1 => Some(regs::rsi()), @@ -105,7 +109,11 @@ fn get_intreg_for_arg_systemv(idx: usize) -> Option { } } -fn get_fltreg_for_arg_systemv(idx: usize) -> Option { +fn get_fltreg_for_arg_systemv(call_conv: &CallConv, idx: usize) -> Option { + match call_conv { + CallConv::Fast | CallConv::Cold | CallConv::SystemV | CallConv::BaldrdashSystemV => {} + _ => panic!("float args only supported for SysV calling convention"), + }; match idx { 0 => Some(regs::xmm0()), 1 => Some(regs::xmm1()), @@ -119,19 +127,39 @@ fn get_fltreg_for_arg_systemv(idx: usize) -> Option { } } -fn get_intreg_for_retval_systemv(idx: usize) -> Option { - match idx { - 0 => Some(regs::rax()), - 1 => Some(regs::rdx()), - _ => None, +fn get_intreg_for_retval_systemv(call_conv: &CallConv, idx: usize) -> Option { + match call_conv { + CallConv::Fast | CallConv::Cold | CallConv::SystemV => match idx { + 0 => Some(regs::rax()), + 1 => Some(regs::rdx()), + _ => None, + }, + CallConv::BaldrdashSystemV => { + if idx == 0 { + Some(regs::rax()) + } else { + None + } + } + CallConv::WindowsFastcall | CallConv::BaldrdashWindows | CallConv::Probestack => todo!(), } } -fn get_fltreg_for_retval_systemv(idx: usize) -> Option { - match idx { - 0 => Some(regs::xmm0()), - 1 => Some(regs::xmm1()), - _ => None, +fn get_fltreg_for_retval_systemv(call_conv: &CallConv, idx: usize) -> Option { + match call_conv { + CallConv::Fast | CallConv::Cold | CallConv::SystemV => match idx { + 0 => Some(regs::xmm0()), + 1 => Some(regs::xmm1()), + _ => None, + }, + CallConv::BaldrdashSystemV => { + if idx == 0 { + Some(regs::xmm0()) + } else { + None + } + } + CallConv::WindowsFastcall | CallConv::BaldrdashWindows | CallConv::Probestack => todo!(), } } @@ -147,10 +175,39 @@ fn is_callee_save_systemv(r: RealReg) -> bool { } } -fn get_callee_saves(regs: Vec>) -> Vec> { - regs.into_iter() - .filter(|r| is_callee_save_systemv(r.to_reg())) - .collect() +fn is_callee_save_baldrdash(r: RealReg) -> bool { + use regs::*; + match r.get_class() { + RegClass::I64 => { + if r.get_hw_encoding() as u8 == ENC_R14 { + // r14 is the WasmTlsReg and is preserved implicitly. + false + } else { + // Defer to native for the other ones. + is_callee_save_systemv(r) + } + } + RegClass::V128 => false, + _ => unimplemented!(), + } +} + +fn get_callee_saves(call_conv: &CallConv, regs: Vec>) -> Vec> { + match call_conv { + CallConv::BaldrdashSystemV => regs + .into_iter() + .filter(|r| is_callee_save_baldrdash(r.to_reg())) + .collect(), + CallConv::BaldrdashWindows => { + todo!("baldrdash windows"); + } + CallConv::Fast | CallConv::Cold | CallConv::SystemV => regs + .into_iter() + .filter(|r| is_callee_save_systemv(r.to_reg())) + .collect(), + CallConv::WindowsFastcall => todo!("windows fastcall"), + CallConv::Probestack => todo!("probestack?"), + } } impl X64ABIBody { @@ -160,7 +217,7 @@ impl X64ABIBody { let call_conv = f.signature.call_conv; debug_assert!( - call_conv == isa::CallConv::SystemV || call_conv.extends_baldrdash(), + call_conv == CallConv::SystemV || call_conv.extends_baldrdash(), "unsupported or unimplemented calling convention {}", call_conv ); @@ -195,7 +252,6 @@ impl X64ABIBody { if self.call_conv.extends_baldrdash() { let num_words = self.flags.baldrdash_prologue_words() as i64; debug_assert!(num_words > 0, "baldrdash must set baldrdash_prologue_words"); - debug_assert_eq!(num_words % 2, 0, "stack must be 16-aligned"); num_words * 8 } else { 16 // frame pointer + return address. @@ -269,7 +325,18 @@ impl ABIBody for X64ABIBody { } fn gen_retval_area_setup(&self) -> Option { - None + if let Some(i) = self.sig.stack_ret_arg { + let inst = self.gen_copy_arg_to_reg(i, self.ret_area_ptr.unwrap()); + trace!( + "gen_retval_area_setup: inst {:?}; ptr reg is {:?}", + inst, + self.ret_area_ptr.unwrap().to_reg() + ); + Some(inst) + } else { + trace!("gen_retval_area_setup: not needed"); + None + } } fn gen_copy_reg_to_retval( @@ -295,15 +362,17 @@ impl ABIBody for X64ABIBody { (ArgumentExtension::Uext, Some(ext_mode)) => { ret.push(Inst::movzx_rm_r( ext_mode, - RegMem::reg(r.to_reg()), + RegMem::reg(from_reg.to_reg()), dest_reg, + /* infallible load */ None, )); } (ArgumentExtension::Sext, Some(ext_mode)) => { ret.push(Inst::movsx_rm_r( ext_mode, - RegMem::reg(r.to_reg()), + RegMem::reg(from_reg.to_reg()), dest_reg, + /* infallible load */ None, )); } _ => ret.push(Inst::gen_move(dest_reg, from_reg.to_reg(), ty)), @@ -327,6 +396,7 @@ impl ABIBody for X64ABIBody { ext_mode, RegMem::reg(from_reg.to_reg()), from_reg, + /* infallible load */ None, )); } (ArgumentExtension::Sext, Some(ext_mode)) => { @@ -334,6 +404,7 @@ impl ABIBody for X64ABIBody { ext_mode, RegMem::reg(from_reg.to_reg()), from_reg, + /* infallible load */ None, )); } _ => {} @@ -437,7 +508,7 @@ impl ABIBody for X64ABIBody { insts.push(Inst::mov_r_r(true, r_rsp, w_rbp)); } - let clobbered = get_callee_saves(self.clobbered.to_vec()); + let clobbered = get_callee_saves(&self.call_conv, self.clobbered.to_vec()); let callee_saved_used: usize = clobbered .iter() .map(|reg| match reg.to_reg().get_class() { @@ -481,7 +552,7 @@ impl ABIBody for X64ABIBody { // Save callee saved registers that we trash. Keep track of how much space we've used, so // as to know what we have to do to get the base of the spill area 0 % 16. - let clobbered = get_callee_saves(self.clobbered.to_vec()); + let clobbered = get_callee_saves(&self.call_conv, self.clobbered.to_vec()); for reg in clobbered { let r_reg = reg.to_reg(); match r_reg.get_class() { @@ -511,7 +582,7 @@ impl ABIBody for X64ABIBody { // Undo what we did in the prologue. // Restore regs. - let clobbered = get_callee_saves(self.clobbered.to_vec()); + let clobbered = get_callee_saves(&self.call_conv, self.clobbered.to_vec()); for wreg in clobbered.into_iter().rev() { let rreg = wreg.to_reg(); match rreg.get_class() { @@ -608,7 +679,7 @@ fn ty_from_ty_hint_or_reg_class(r: Reg, ty: Option) -> Type { } } -fn get_caller_saves(call_conv: isa::CallConv) -> Vec> { +fn get_caller_saves(call_conv: CallConv) -> Vec> { let mut caller_saved = Vec::new(); // Systemv calling convention: @@ -623,6 +694,14 @@ fn get_caller_saves(call_conv: isa::CallConv) -> Vec> { caller_saved.push(Writable::from_reg(regs::r10())); caller_saved.push(Writable::from_reg(regs::r11())); + if call_conv.extends_baldrdash() { + caller_saved.push(Writable::from_reg(regs::r12())); + caller_saved.push(Writable::from_reg(regs::r13())); + // Not r14; implicitly preserved in the entry. + caller_saved.push(Writable::from_reg(regs::r15())); + caller_saved.push(Writable::from_reg(regs::rbx())); + } + // - XMM: all the registers! caller_saved.push(Writable::from_reg(regs::xmm0())); caller_saved.push(Writable::from_reg(regs::xmm1())); @@ -641,10 +720,6 @@ fn get_caller_saves(call_conv: isa::CallConv) -> Vec> { caller_saved.push(Writable::from_reg(regs::xmm14())); caller_saved.push(Writable::from_reg(regs::xmm15())); - if call_conv.extends_baldrdash() { - todo!("add the baldrdash caller saved") - } - caller_saved } @@ -671,7 +746,7 @@ fn abisig_to_uses_and_defs(sig: &ABISig) -> (Vec, Vec>) { } /// Try to fill a Baldrdash register, returning it if it was found. -fn try_fill_baldrdash_reg(call_conv: isa::CallConv, param: &ir::AbiParam) -> Option { +fn try_fill_baldrdash_reg(call_conv: CallConv, param: &ir::AbiParam) -> Option { if call_conv.extends_baldrdash() { match ¶m.purpose { &ir::ArgumentPurpose::VMContext => { @@ -705,16 +780,13 @@ enum ArgsOrRets { /// to a 16-byte-aligned boundary), and if `add_ret_area_ptr` was passed, the /// index of the extra synthetic arg that was added. fn compute_arg_locs( - call_conv: isa::CallConv, + call_conv: CallConv, params: &[ir::AbiParam], args_or_rets: ArgsOrRets, add_ret_area_ptr: bool, ) -> CodegenResult<(Vec, i64, Option)> { let is_baldrdash = call_conv.extends_baldrdash(); - // XXX assume SystemV at the moment. - debug_assert!(!is_baldrdash, "baldrdash nyi"); - let mut next_gpr = 0; let mut next_vreg = 0; let mut next_stack: u64 = 0; @@ -748,8 +820,8 @@ fn compute_arg_locs( let (next_reg, candidate) = if intreg { let candidate = match args_or_rets { - ArgsOrRets::Args => get_intreg_for_arg_systemv(next_gpr), - ArgsOrRets::Rets => get_intreg_for_retval_systemv(next_gpr), + ArgsOrRets::Args => get_intreg_for_arg_systemv(&call_conv, next_gpr), + ArgsOrRets::Rets => get_intreg_for_retval_systemv(&call_conv, next_gpr), }; debug_assert!(candidate .map(|r| r.get_class() == RegClass::I64) @@ -757,8 +829,8 @@ fn compute_arg_locs( (&mut next_gpr, candidate) } else { let candidate = match args_or_rets { - ArgsOrRets::Args => get_fltreg_for_arg_systemv(next_vreg), - ArgsOrRets::Rets => get_fltreg_for_retval_systemv(next_vreg), + ArgsOrRets::Args => get_fltreg_for_arg_systemv(&call_conv, next_vreg), + ArgsOrRets::Rets => get_fltreg_for_retval_systemv(&call_conv, next_vreg), }; debug_assert!(candidate .map(|r| r.get_class() == RegClass::V128) @@ -791,7 +863,7 @@ fn compute_arg_locs( let extra_arg = if add_ret_area_ptr { debug_assert!(args_or_rets == ArgsOrRets::Args); - if let Some(reg) = get_intreg_for_arg_systemv(next_gpr) { + if let Some(reg) = get_intreg_for_arg_systemv(&call_conv, next_gpr) { ret.push(ABIArg::Reg(reg.to_real_reg(), ir::types::I64)); } else { ret.push(ABIArg::Stack(next_stack as i64, ir::types::I64)); @@ -897,8 +969,13 @@ fn load_stack(mem: impl Into, into_reg: Writable, ty: Type) let mem = mem.into(); match ext_mode { - Some(ext_mode) => Inst::movsx_rm_r(ext_mode, RegMem::mem(mem), into_reg), - None => Inst::mov64_m_r(mem, into_reg), + Some(ext_mode) => Inst::movsx_rm_r( + ext_mode, + RegMem::mem(mem), + into_reg, + /* infallible load */ None, + ), + None => Inst::mov64_m_r(mem, into_reg, None /* infallible */), } } @@ -914,7 +991,7 @@ fn store_stack(mem: impl Into, from_reg: Reg, ty: Type) -> Inst }; let mem = mem.into(); if is_int { - Inst::mov_r_m(size, from_reg, mem) + Inst::mov_r_m(size, from_reg, mem, /* infallible store */ None) } else { unimplemented!("f32/f64 store_stack"); } diff --git a/cranelift/codegen/src/isa/x64/inst/args.rs b/cranelift/codegen/src/isa/x64/inst/args.rs index 78187f6011ed..4090f2d33af7 100644 --- a/cranelift/codegen/src/isa/x64/inst/args.rs +++ b/cranelift/codegen/src/isa/x64/inst/args.rs @@ -199,7 +199,7 @@ impl RegMemImm { match self { Self::Reg { reg } => collector.add_use(*reg), Self::Mem { addr } => addr.get_regs_as_uses(collector), - Self::Imm { simm32: _ } => {} + Self::Imm { .. } => {} } } } @@ -234,12 +234,11 @@ impl RegMem { pub(crate) fn mem(addr: impl Into) -> Self { Self::Mem { addr: addr.into() } } - /// Add the regs mentioned by `self` to `collector`. pub(crate) fn get_regs_as_uses(&self, collector: &mut RegUsageCollector) { match self { RegMem::Reg { reg } => collector.add_use(*reg), - RegMem::Mem { addr } => addr.get_regs_as_uses(collector), + RegMem::Mem { addr, .. } => addr.get_regs_as_uses(collector), } } } @@ -252,7 +251,7 @@ impl ShowWithRRU for RegMem { fn show_rru_sized(&self, mb_rru: Option<&RealRegUniverse>, size: u8) -> String { match self { RegMem::Reg { reg } => show_ireg_sized(*reg, mb_rru, size), - RegMem::Mem { addr } => addr.show_rru(mb_rru), + RegMem::Mem { addr, .. } => addr.show_rru(mb_rru), } } } @@ -283,9 +282,32 @@ impl fmt::Debug for AluRmiROpcode { } } -impl ToString for AluRmiROpcode { - fn to_string(&self) -> String { - format!("{:?}", self) +impl fmt::Display for AluRmiROpcode { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + fmt::Debug::fmt(self, f) + } +} + +#[derive(Clone, PartialEq)] +pub enum UnaryRmROpcode { + /// Bit-scan reverse. + Bsr, + /// Bit-scan forward. + Bsf, +} + +impl fmt::Debug for UnaryRmROpcode { + fn fmt(&self, fmt: &mut fmt::Formatter) -> fmt::Result { + match self { + UnaryRmROpcode::Bsr => write!(fmt, "bsr"), + UnaryRmROpcode::Bsf => write!(fmt, "bsf"), + } + } +} + +impl fmt::Display for UnaryRmROpcode { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + fmt::Debug::fmt(self, f) } } @@ -446,9 +468,9 @@ impl fmt::Debug for SseOpcode { } } -impl ToString for SseOpcode { - fn to_string(&self) -> String { - format!("{:?}", self) +impl fmt::Display for SseOpcode { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + fmt::Debug::fmt(self, f) } } @@ -497,34 +519,65 @@ impl fmt::Debug for ExtMode { } } -impl ToString for ExtMode { - fn to_string(&self) -> String { - format!("{:?}", self) +impl fmt::Display for ExtMode { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + fmt::Debug::fmt(self, f) } } -/// These indicate the form of a scalar shift: left, signed right, unsigned right. +/// These indicate the form of a scalar shift/rotate: left, signed right, unsigned right. #[derive(Clone)] pub enum ShiftKind { - Left, - RightZ, - RightS, + ShiftLeft, + /// Inserts zeros in the most significant bits. + ShiftRightLogical, + /// Replicates the sign bit in the most significant bits. + ShiftRightArithmetic, + RotateLeft, + RotateRight, } impl fmt::Debug for ShiftKind { fn fmt(&self, fmt: &mut fmt::Formatter) -> fmt::Result { let name = match self { - ShiftKind::Left => "shl", - ShiftKind::RightZ => "shr", - ShiftKind::RightS => "sar", + ShiftKind::ShiftLeft => "shl", + ShiftKind::ShiftRightLogical => "shr", + ShiftKind::ShiftRightArithmetic => "sar", + ShiftKind::RotateLeft => "rol", + ShiftKind::RotateRight => "ror", }; write!(fmt, "{}", name) } } -impl ToString for ShiftKind { - fn to_string(&self) -> String { - format!("{:?}", self) +impl fmt::Display for ShiftKind { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + fmt::Debug::fmt(self, f) + } +} + +/// What kind of division or remainer instruction this is? +#[derive(Clone)] +pub enum DivOrRemKind { + SignedDiv, + UnsignedDiv, + SignedRem, + UnsignedRem, +} + +impl DivOrRemKind { + pub(crate) fn is_signed(&self) -> bool { + match self { + DivOrRemKind::SignedDiv | DivOrRemKind::SignedRem => true, + _ => false, + } + } + + pub(crate) fn is_div(&self) -> bool { + match self { + DivOrRemKind::SignedDiv | DivOrRemKind::UnsignedDiv => true, + _ => false, + } } } @@ -639,9 +692,9 @@ impl fmt::Debug for CC { } } -impl ToString for CC { - fn to_string(&self) -> String { - format!("{:?}", self) +impl fmt::Display for CC { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + fmt::Debug::fmt(self, f) } } diff --git a/cranelift/codegen/src/isa/x64/inst/emit.rs b/cranelift/codegen/src/isa/x64/inst/emit.rs index d89a712cc1d4..432bbc9a0da5 100644 --- a/cranelift/codegen/src/isa/x64/inst/emit.rs +++ b/cranelift/codegen/src/isa/x64/inst/emit.rs @@ -556,6 +556,41 @@ pub(crate) fn emit( } } + Inst::UnaryRmR { size, op, src, dst } => { + let (prefix, rex_flags) = match size { + 2 => (LegacyPrefix::_66, RexFlags::clear_w()), + 4 => (LegacyPrefix::None, RexFlags::clear_w()), + 8 => (LegacyPrefix::None, RexFlags::set_w()), + _ => unreachable!(), + }; + + let (opcode, num_opcodes) = match op { + UnaryRmROpcode::Bsr => (0x0fbd, 2), + UnaryRmROpcode::Bsf => (0x0fbc, 2), + }; + + match src { + RegMem::Reg { reg: src } => emit_std_reg_reg( + sink, + prefix, + opcode, + num_opcodes, + dst.to_reg(), + *src, + rex_flags, + ), + RegMem::Mem { addr: src } => emit_std_reg_mem( + sink, + prefix, + opcode, + num_opcodes, + dst.to_reg(), + &src.finalize(state), + rex_flags, + ), + } + } + Inst::Div { size, signed, @@ -589,6 +624,32 @@ pub(crate) fn emit( } } + Inst::MulHi { size, signed, rhs } => { + let (prefix, rex_flags) = match size { + 2 => (LegacyPrefix::_66, RexFlags::clear_w()), + 4 => (LegacyPrefix::None, RexFlags::clear_w()), + 8 => (LegacyPrefix::None, RexFlags::set_w()), + _ => unreachable!(), + }; + + let subopcode = if *signed { 5 } else { 4 }; + match rhs { + RegMem::Reg { reg } => { + let src = int_reg_enc(*reg); + emit_std_enc_enc(sink, prefix, 0xF7, 1, subopcode, src, rex_flags) + } + RegMem::Mem { addr: src } => emit_std_enc_mem( + sink, + prefix, + 0xF7, + 1, + subopcode, + &src.finalize(state), + rex_flags, + ), + } + } + Inst::SignExtendRaxRdx { size } => { match size { 2 => sink.put1(0x66), @@ -600,11 +661,11 @@ pub(crate) fn emit( } Inst::CheckedDivOrRemSeq { - is_div, - is_signed, + kind, size, divisor, loc, + tmp, } => { // Generates the following code sequence: // @@ -642,7 +703,7 @@ pub(crate) fn emit( let inst = Inst::trap_if(CC::Z, TrapCode::IntegerDivisionByZero, *loc); inst.emit(sink, flags, state); - let (do_op, done_label) = if *is_signed { + let (do_op, done_label) = if kind.is_signed() { // Now check if the divisor is -1. let inst = Inst::cmp_rmi_r(*size, RegMemImm::imm(0xffffffff), *divisor); inst.emit(sink, flags, state); @@ -653,7 +714,7 @@ pub(crate) fn emit( one_way_jmp(sink, CC::NZ, do_op); // Here, divisor == -1. - if !*is_div { + if !kind.is_div() { // x % -1 = 0; put the result into the destination, $rdx. let done_label = sink.get_label(); @@ -666,8 +727,18 @@ pub(crate) fn emit( (Some(do_op), Some(done_label)) } else { // Check for integer overflow. - let inst = Inst::cmp_rmi_r(*size, RegMemImm::imm(0x80000000), regs::rax()); - inst.emit(sink, flags, state); + if *size == 8 { + let tmp = tmp.expect("temporary for i64 sdiv"); + + let inst = Inst::imm_r(true, 0x8000000000000000, tmp); + inst.emit(sink, flags, state); + + let inst = Inst::cmp_rmi_r(8, RegMemImm::reg(tmp.to_reg()), regs::rax()); + inst.emit(sink, flags, state); + } else { + let inst = Inst::cmp_rmi_r(*size, RegMemImm::imm(0x80000000), regs::rax()); + inst.emit(sink, flags, state); + } // If not equal, jump over the trap. let inst = Inst::trap_if(CC::Z, TrapCode::IntegerOverflow, *loc); @@ -684,7 +755,7 @@ pub(crate) fn emit( } // Fill in the high parts: - if *is_signed { + if kind.is_signed() { // sign-extend the sign-bit of rax into rdx, for signed opcodes. let inst = Inst::sign_extend_rax_to_rdx(*size); inst.emit(sink, flags, state); @@ -694,7 +765,7 @@ pub(crate) fn emit( inst.emit(sink, flags, state); } - let inst = Inst::div(*size, *is_signed, RegMem::reg(*divisor), *loc); + let inst = Inst::div(*size, kind.is_signed(), RegMem::reg(*divisor), *loc); inst.emit(sink, flags, state); // Lowering takes care of moving the result back into the right register, see comment @@ -735,7 +806,12 @@ pub(crate) fn emit( emit_std_reg_reg(sink, LegacyPrefix::None, 0x89, 1, *src, dst.to_reg(), rex); } - Inst::MovZX_RM_R { ext_mode, src, dst } => { + Inst::MovZX_RM_R { + ext_mode, + src, + dst, + srcloc, + } => { let (opcodes, num_opcodes, rex_flags) = match ext_mode { ExtMode::BL => { // MOVZBL is (REX.W==0) 0F B6 /r @@ -777,27 +853,45 @@ pub(crate) fn emit( *src, rex_flags, ), - RegMem::Mem { addr: src } => emit_std_reg_mem( - sink, - LegacyPrefix::None, - opcodes, - num_opcodes, - dst.to_reg(), - &src.finalize(state), - rex_flags, - ), + RegMem::Mem { addr: src } => { + let src = &src.finalize(state); + + if let Some(srcloc) = *srcloc { + // Register the offset at which the actual load instruction starts. + sink.add_trap(srcloc, TrapCode::HeapOutOfBounds); + } + + emit_std_reg_mem( + sink, + LegacyPrefix::None, + opcodes, + num_opcodes, + dst.to_reg(), + src, + rex_flags, + ) + } } } - Inst::Mov64_M_R { src, dst } => emit_std_reg_mem( - sink, - LegacyPrefix::None, - 0x8B, - 1, - dst.to_reg(), - &src.finalize(state), - RexFlags::set_w(), - ), + Inst::Mov64_M_R { src, dst, srcloc } => { + let src = &src.finalize(state); + + if let Some(srcloc) = *srcloc { + // Register the offset at which the actual load instruction starts. + sink.add_trap(srcloc, TrapCode::HeapOutOfBounds); + } + + emit_std_reg_mem( + sink, + LegacyPrefix::None, + 0x8B, + 1, + dst.to_reg(), + src, + RexFlags::set_w(), + ) + } Inst::LoadEffectiveAddress { addr, dst } => emit_std_reg_mem( sink, @@ -809,7 +903,12 @@ pub(crate) fn emit( RexFlags::set_w(), ), - Inst::MovSX_RM_R { ext_mode, src, dst } => { + Inst::MovSX_RM_R { + ext_mode, + src, + dst, + srcloc, + } => { let (opcodes, num_opcodes, rex_flags) = match ext_mode { ExtMode::BL => { // MOVSBL is (REX.W==0) 0F BE /r @@ -843,21 +942,41 @@ pub(crate) fn emit( *src, rex_flags, ), - RegMem::Mem { addr: src } => emit_std_reg_mem( - sink, - LegacyPrefix::None, - opcodes, - num_opcodes, - dst.to_reg(), - &src.finalize(state), - rex_flags, - ), + + RegMem::Mem { addr: src } => { + let src = &src.finalize(state); + + if let Some(srcloc) = *srcloc { + // Register the offset at which the actual load instruction starts. + sink.add_trap(srcloc, TrapCode::HeapOutOfBounds); + } + + emit_std_reg_mem( + sink, + LegacyPrefix::None, + opcodes, + num_opcodes, + dst.to_reg(), + src, + rex_flags, + ) + } } } - Inst::Mov_R_M { size, src, dst } => { + Inst::Mov_R_M { + size, + src, + dst, + srcloc, + } => { let dst = &dst.finalize(state); + if let Some(srcloc) = *srcloc { + // Register the offset at which the actual load instruction starts. + sink.add_trap(srcloc, TrapCode::HeapOutOfBounds); + } + match size { 1 => { // This is one of the few places where the presence of a @@ -925,9 +1044,11 @@ pub(crate) fn emit( } => { let enc_dst = int_reg_enc(dst.to_reg()); let subopcode = match kind { - ShiftKind::Left => 4, - ShiftKind::RightZ => 5, - ShiftKind::RightS => 7, + ShiftKind::RotateLeft => 0, + ShiftKind::RotateRight => 1, + ShiftKind::ShiftLeft => 4, + ShiftKind::ShiftRightLogical => 5, + ShiftKind::ShiftRightArithmetic => 7, }; let rex = if *is_64 { @@ -1262,7 +1383,7 @@ pub(crate) fn emit( // We generate the following sequence: // ;; generated by lowering: cmp #jmp_table_size, %idx // jnb $default_target - // mov %idx, %tmp2 + // movl %idx, %tmp2 // lea start_of_jump_table_offset(%rip), %tmp1 // movzlq [%tmp1, %tmp2], %tmp2 // addq %tmp2, %tmp1 @@ -1275,7 +1396,8 @@ pub(crate) fn emit( }; one_way_jmp(sink, CC::NB, *default_label); // idx unsigned >= jmp table size - let inst = Inst::gen_move(*tmp2, *idx, I64); + // Copy the index (and make sure to clear the high 32-bits lane of tmp2). + let inst = Inst::movzx_rm_r(ExtMode::LQ, RegMem::reg(*idx), *tmp2, None); inst.emit(sink, flags, state); // Load base address of jump table. @@ -1291,6 +1413,7 @@ pub(crate) fn emit( ExtMode::LQ, RegMem::mem(Amode::imm_reg_reg_shift(0, tmp1.to_reg(), tmp2.to_reg(), 2)), *tmp2, + None, ); inst.emit(sink, flags, state); @@ -1343,6 +1466,7 @@ pub(crate) fn emit( op, src: src_e, dst: reg_g, + srcloc, } => { let rex = RexFlags::clear_w(); let (prefix, opcode) = match op { @@ -1357,9 +1481,12 @@ pub(crate) fn emit( RegMem::Reg { reg: reg_e } => { emit_std_reg_reg(sink, prefix, opcode, 2, reg_g.to_reg(), *reg_e, rex); } - RegMem::Mem { addr } => { let addr = &addr.finalize(state); + if let Some(srcloc) = *srcloc { + // Register the offset at which the actual load instruction starts. + sink.add_trap(srcloc, TrapCode::HeapOutOfBounds); + } emit_std_reg_mem(sink, prefix, opcode, 2, reg_g.to_reg(), addr, rex); } } @@ -1387,14 +1514,19 @@ pub(crate) fn emit( RegMem::Reg { reg: reg_e } => { emit_std_reg_reg(sink, prefix, opcode, 2, reg_g.to_reg(), *reg_e, rex); } - RegMem::Mem { addr } => { let addr = &addr.finalize(state); emit_std_reg_mem(sink, prefix, opcode, 2, reg_g.to_reg(), addr, rex); } } } - Inst::XMM_Mov_R_M { op, src, dst } => { + + Inst::XMM_Mov_R_M { + op, + src, + dst, + srcloc, + } => { let rex = RexFlags::clear_w(); let (prefix, opcode) = match op { SseOpcode::Movd => (LegacyPrefix::_66, 0x0F7E), @@ -1403,8 +1535,32 @@ pub(crate) fn emit( }; let dst = &dst.finalize(state); + if let Some(srcloc) = *srcloc { + // Register the offset at which the actual load instruction starts. + sink.add_trap(srcloc, TrapCode::HeapOutOfBounds); + } emit_std_reg_mem(sink, prefix, opcode, 2, *src, dst, rex); } + + Inst::LoadExtName { + dst, + name, + offset, + srcloc, + } => { + // The full address can be encoded in the register, with a relocation. + // Generates: movabsq $name, %dst + let enc_dst = int_reg_enc(dst.to_reg()); + sink.put1(0x48 | ((enc_dst >> 3) & 1)); + sink.put1(0xB8 | (enc_dst & 7)); + sink.add_reloc(*srcloc, Reloc::Abs8, name, *offset); + if flags.emit_all_ones_funcaddrs() { + sink.put8(u64::max_value()); + } else { + sink.put8(0); + } + } + Inst::Hlt => { sink.put1(0xcc); } diff --git a/cranelift/codegen/src/isa/x64/inst/emit_tests.rs b/cranelift/codegen/src/isa/x64/inst/emit_tests.rs index 78a13b8a555f..ee4674729bb8 100644 --- a/cranelift/codegen/src/isa/x64/inst/emit_tests.rs +++ b/cranelift/codegen/src/isa/x64/inst/emit_tests.rs @@ -95,82 +95,82 @@ fn test_x64_emit() { // // Addr_IR, offset zero insns.push(( - Inst::mov64_m_r(Amode::imm_reg(0, rax), w_rdi), + Inst::mov64_m_r(Amode::imm_reg(0, rax), w_rdi, None), "488B38", "movq 0(%rax), %rdi", )); insns.push(( - Inst::mov64_m_r(Amode::imm_reg(0, rbx), w_rdi), + Inst::mov64_m_r(Amode::imm_reg(0, rbx), w_rdi, None), "488B3B", "movq 0(%rbx), %rdi", )); insns.push(( - Inst::mov64_m_r(Amode::imm_reg(0, rcx), w_rdi), + Inst::mov64_m_r(Amode::imm_reg(0, rcx), w_rdi, None), "488B39", "movq 0(%rcx), %rdi", )); insns.push(( - Inst::mov64_m_r(Amode::imm_reg(0, rdx), w_rdi), + Inst::mov64_m_r(Amode::imm_reg(0, rdx), w_rdi, None), "488B3A", "movq 0(%rdx), %rdi", )); insns.push(( - Inst::mov64_m_r(Amode::imm_reg(0, rbp), w_rdi), + Inst::mov64_m_r(Amode::imm_reg(0, rbp), w_rdi, None), "488B7D00", "movq 0(%rbp), %rdi", )); insns.push(( - Inst::mov64_m_r(Amode::imm_reg(0, rsp), w_rdi), + Inst::mov64_m_r(Amode::imm_reg(0, rsp), w_rdi, None), "488B3C24", "movq 0(%rsp), %rdi", )); insns.push(( - Inst::mov64_m_r(Amode::imm_reg(0, rsi), w_rdi), + Inst::mov64_m_r(Amode::imm_reg(0, rsi), w_rdi, None), "488B3E", "movq 0(%rsi), %rdi", )); insns.push(( - Inst::mov64_m_r(Amode::imm_reg(0, rdi), w_rdi), + Inst::mov64_m_r(Amode::imm_reg(0, rdi), w_rdi, None), "488B3F", "movq 0(%rdi), %rdi", )); insns.push(( - Inst::mov64_m_r(Amode::imm_reg(0, r8), w_rdi), + Inst::mov64_m_r(Amode::imm_reg(0, r8), w_rdi, None), "498B38", "movq 0(%r8), %rdi", )); insns.push(( - Inst::mov64_m_r(Amode::imm_reg(0, r9), w_rdi), + Inst::mov64_m_r(Amode::imm_reg(0, r9), w_rdi, None), "498B39", "movq 0(%r9), %rdi", )); insns.push(( - Inst::mov64_m_r(Amode::imm_reg(0, r10), w_rdi), + Inst::mov64_m_r(Amode::imm_reg(0, r10), w_rdi, None), "498B3A", "movq 0(%r10), %rdi", )); insns.push(( - Inst::mov64_m_r(Amode::imm_reg(0, r11), w_rdi), + Inst::mov64_m_r(Amode::imm_reg(0, r11), w_rdi, None), "498B3B", "movq 0(%r11), %rdi", )); insns.push(( - Inst::mov64_m_r(Amode::imm_reg(0, r12), w_rdi), + Inst::mov64_m_r(Amode::imm_reg(0, r12), w_rdi, None), "498B3C24", "movq 0(%r12), %rdi", )); insns.push(( - Inst::mov64_m_r(Amode::imm_reg(0, r13), w_rdi), + Inst::mov64_m_r(Amode::imm_reg(0, r13), w_rdi, None), "498B7D00", "movq 0(%r13), %rdi", )); insns.push(( - Inst::mov64_m_r(Amode::imm_reg(0, r14), w_rdi), + Inst::mov64_m_r(Amode::imm_reg(0, r14), w_rdi, None), "498B3E", "movq 0(%r14), %rdi", )); insns.push(( - Inst::mov64_m_r(Amode::imm_reg(0, r15), w_rdi), + Inst::mov64_m_r(Amode::imm_reg(0, r15), w_rdi, None), "498B3F", "movq 0(%r15), %rdi", )); @@ -178,82 +178,82 @@ fn test_x64_emit() { // ======================================================== // Addr_IR, offset max simm8 insns.push(( - Inst::mov64_m_r(Amode::imm_reg(127, rax), w_rdi), + Inst::mov64_m_r(Amode::imm_reg(127, rax), w_rdi, None), "488B787F", "movq 127(%rax), %rdi", )); insns.push(( - Inst::mov64_m_r(Amode::imm_reg(127, rbx), w_rdi), + Inst::mov64_m_r(Amode::imm_reg(127, rbx), w_rdi, None), "488B7B7F", "movq 127(%rbx), %rdi", )); insns.push(( - Inst::mov64_m_r(Amode::imm_reg(127, rcx), w_rdi), + Inst::mov64_m_r(Amode::imm_reg(127, rcx), w_rdi, None), "488B797F", "movq 127(%rcx), %rdi", )); insns.push(( - Inst::mov64_m_r(Amode::imm_reg(127, rdx), w_rdi), + Inst::mov64_m_r(Amode::imm_reg(127, rdx), w_rdi, None), "488B7A7F", "movq 127(%rdx), %rdi", )); insns.push(( - Inst::mov64_m_r(Amode::imm_reg(127, rbp), w_rdi), + Inst::mov64_m_r(Amode::imm_reg(127, rbp), w_rdi, None), "488B7D7F", "movq 127(%rbp), %rdi", )); insns.push(( - Inst::mov64_m_r(Amode::imm_reg(127, rsp), w_rdi), + Inst::mov64_m_r(Amode::imm_reg(127, rsp), w_rdi, None), "488B7C247F", "movq 127(%rsp), %rdi", )); insns.push(( - Inst::mov64_m_r(Amode::imm_reg(127, rsi), w_rdi), + Inst::mov64_m_r(Amode::imm_reg(127, rsi), w_rdi, None), "488B7E7F", "movq 127(%rsi), %rdi", )); insns.push(( - Inst::mov64_m_r(Amode::imm_reg(127, rdi), w_rdi), + Inst::mov64_m_r(Amode::imm_reg(127, rdi), w_rdi, None), "488B7F7F", "movq 127(%rdi), %rdi", )); insns.push(( - Inst::mov64_m_r(Amode::imm_reg(127, r8), w_rdi), + Inst::mov64_m_r(Amode::imm_reg(127, r8), w_rdi, None), "498B787F", "movq 127(%r8), %rdi", )); insns.push(( - Inst::mov64_m_r(Amode::imm_reg(127, r9), w_rdi), + Inst::mov64_m_r(Amode::imm_reg(127, r9), w_rdi, None), "498B797F", "movq 127(%r9), %rdi", )); insns.push(( - Inst::mov64_m_r(Amode::imm_reg(127, r10), w_rdi), + Inst::mov64_m_r(Amode::imm_reg(127, r10), w_rdi, None), "498B7A7F", "movq 127(%r10), %rdi", )); insns.push(( - Inst::mov64_m_r(Amode::imm_reg(127, r11), w_rdi), + Inst::mov64_m_r(Amode::imm_reg(127, r11), w_rdi, None), "498B7B7F", "movq 127(%r11), %rdi", )); insns.push(( - Inst::mov64_m_r(Amode::imm_reg(127, r12), w_rdi), + Inst::mov64_m_r(Amode::imm_reg(127, r12), w_rdi, None), "498B7C247F", "movq 127(%r12), %rdi", )); insns.push(( - Inst::mov64_m_r(Amode::imm_reg(127, r13), w_rdi), + Inst::mov64_m_r(Amode::imm_reg(127, r13), w_rdi, None), "498B7D7F", "movq 127(%r13), %rdi", )); insns.push(( - Inst::mov64_m_r(Amode::imm_reg(127, r14), w_rdi), + Inst::mov64_m_r(Amode::imm_reg(127, r14), w_rdi, None), "498B7E7F", "movq 127(%r14), %rdi", )); insns.push(( - Inst::mov64_m_r(Amode::imm_reg(127, r15), w_rdi), + Inst::mov64_m_r(Amode::imm_reg(127, r15), w_rdi, None), "498B7F7F", "movq 127(%r15), %rdi", )); @@ -261,82 +261,82 @@ fn test_x64_emit() { // ======================================================== // Addr_IR, offset min simm8 insns.push(( - Inst::mov64_m_r(Amode::imm_reg(-128i32 as u32, rax), w_rdi), + Inst::mov64_m_r(Amode::imm_reg(-128i32 as u32, rax), w_rdi, None), "488B7880", "movq -128(%rax), %rdi", )); insns.push(( - Inst::mov64_m_r(Amode::imm_reg(-128i32 as u32, rbx), w_rdi), + Inst::mov64_m_r(Amode::imm_reg(-128i32 as u32, rbx), w_rdi, None), "488B7B80", "movq -128(%rbx), %rdi", )); insns.push(( - Inst::mov64_m_r(Amode::imm_reg(-128i32 as u32, rcx), w_rdi), + Inst::mov64_m_r(Amode::imm_reg(-128i32 as u32, rcx), w_rdi, None), "488B7980", "movq -128(%rcx), %rdi", )); insns.push(( - Inst::mov64_m_r(Amode::imm_reg(-128i32 as u32, rdx), w_rdi), + Inst::mov64_m_r(Amode::imm_reg(-128i32 as u32, rdx), w_rdi, None), "488B7A80", "movq -128(%rdx), %rdi", )); insns.push(( - Inst::mov64_m_r(Amode::imm_reg(-128i32 as u32, rbp), w_rdi), + Inst::mov64_m_r(Amode::imm_reg(-128i32 as u32, rbp), w_rdi, None), "488B7D80", "movq -128(%rbp), %rdi", )); insns.push(( - Inst::mov64_m_r(Amode::imm_reg(-128i32 as u32, rsp), w_rdi), + Inst::mov64_m_r(Amode::imm_reg(-128i32 as u32, rsp), w_rdi, None), "488B7C2480", "movq -128(%rsp), %rdi", )); insns.push(( - Inst::mov64_m_r(Amode::imm_reg(-128i32 as u32, rsi), w_rdi), + Inst::mov64_m_r(Amode::imm_reg(-128i32 as u32, rsi), w_rdi, None), "488B7E80", "movq -128(%rsi), %rdi", )); insns.push(( - Inst::mov64_m_r(Amode::imm_reg(-128i32 as u32, rdi), w_rdi), + Inst::mov64_m_r(Amode::imm_reg(-128i32 as u32, rdi), w_rdi, None), "488B7F80", "movq -128(%rdi), %rdi", )); insns.push(( - Inst::mov64_m_r(Amode::imm_reg(-128i32 as u32, r8), w_rdi), + Inst::mov64_m_r(Amode::imm_reg(-128i32 as u32, r8), w_rdi, None), "498B7880", "movq -128(%r8), %rdi", )); insns.push(( - Inst::mov64_m_r(Amode::imm_reg(-128i32 as u32, r9), w_rdi), + Inst::mov64_m_r(Amode::imm_reg(-128i32 as u32, r9), w_rdi, None), "498B7980", "movq -128(%r9), %rdi", )); insns.push(( - Inst::mov64_m_r(Amode::imm_reg(-128i32 as u32, r10), w_rdi), + Inst::mov64_m_r(Amode::imm_reg(-128i32 as u32, r10), w_rdi, None), "498B7A80", "movq -128(%r10), %rdi", )); insns.push(( - Inst::mov64_m_r(Amode::imm_reg(-128i32 as u32, r11), w_rdi), + Inst::mov64_m_r(Amode::imm_reg(-128i32 as u32, r11), w_rdi, None), "498B7B80", "movq -128(%r11), %rdi", )); insns.push(( - Inst::mov64_m_r(Amode::imm_reg(-128i32 as u32, r12), w_rdi), + Inst::mov64_m_r(Amode::imm_reg(-128i32 as u32, r12), w_rdi, None), "498B7C2480", "movq -128(%r12), %rdi", )); insns.push(( - Inst::mov64_m_r(Amode::imm_reg(-128i32 as u32, r13), w_rdi), + Inst::mov64_m_r(Amode::imm_reg(-128i32 as u32, r13), w_rdi, None), "498B7D80", "movq -128(%r13), %rdi", )); insns.push(( - Inst::mov64_m_r(Amode::imm_reg(-128i32 as u32, r14), w_rdi), + Inst::mov64_m_r(Amode::imm_reg(-128i32 as u32, r14), w_rdi, None), "498B7E80", "movq -128(%r14), %rdi", )); insns.push(( - Inst::mov64_m_r(Amode::imm_reg(-128i32 as u32, r15), w_rdi), + Inst::mov64_m_r(Amode::imm_reg(-128i32 as u32, r15), w_rdi, None), "498B7F80", "movq -128(%r15), %rdi", )); @@ -344,82 +344,82 @@ fn test_x64_emit() { // ======================================================== // Addr_IR, offset smallest positive simm32 insns.push(( - Inst::mov64_m_r(Amode::imm_reg(128, rax), w_rdi), + Inst::mov64_m_r(Amode::imm_reg(128, rax), w_rdi, None), "488BB880000000", "movq 128(%rax), %rdi", )); insns.push(( - Inst::mov64_m_r(Amode::imm_reg(128, rbx), w_rdi), + Inst::mov64_m_r(Amode::imm_reg(128, rbx), w_rdi, None), "488BBB80000000", "movq 128(%rbx), %rdi", )); insns.push(( - Inst::mov64_m_r(Amode::imm_reg(128, rcx), w_rdi), + Inst::mov64_m_r(Amode::imm_reg(128, rcx), w_rdi, None), "488BB980000000", "movq 128(%rcx), %rdi", )); insns.push(( - Inst::mov64_m_r(Amode::imm_reg(128, rdx), w_rdi), + Inst::mov64_m_r(Amode::imm_reg(128, rdx), w_rdi, None), "488BBA80000000", "movq 128(%rdx), %rdi", )); insns.push(( - Inst::mov64_m_r(Amode::imm_reg(128, rbp), w_rdi), + Inst::mov64_m_r(Amode::imm_reg(128, rbp), w_rdi, None), "488BBD80000000", "movq 128(%rbp), %rdi", )); insns.push(( - Inst::mov64_m_r(Amode::imm_reg(128, rsp), w_rdi), + Inst::mov64_m_r(Amode::imm_reg(128, rsp), w_rdi, None), "488BBC2480000000", "movq 128(%rsp), %rdi", )); insns.push(( - Inst::mov64_m_r(Amode::imm_reg(128, rsi), w_rdi), + Inst::mov64_m_r(Amode::imm_reg(128, rsi), w_rdi, None), "488BBE80000000", "movq 128(%rsi), %rdi", )); insns.push(( - Inst::mov64_m_r(Amode::imm_reg(128, rdi), w_rdi), + Inst::mov64_m_r(Amode::imm_reg(128, rdi), w_rdi, None), "488BBF80000000", "movq 128(%rdi), %rdi", )); insns.push(( - Inst::mov64_m_r(Amode::imm_reg(128, r8), w_rdi), + Inst::mov64_m_r(Amode::imm_reg(128, r8), w_rdi, None), "498BB880000000", "movq 128(%r8), %rdi", )); insns.push(( - Inst::mov64_m_r(Amode::imm_reg(128, r9), w_rdi), + Inst::mov64_m_r(Amode::imm_reg(128, r9), w_rdi, None), "498BB980000000", "movq 128(%r9), %rdi", )); insns.push(( - Inst::mov64_m_r(Amode::imm_reg(128, r10), w_rdi), + Inst::mov64_m_r(Amode::imm_reg(128, r10), w_rdi, None), "498BBA80000000", "movq 128(%r10), %rdi", )); insns.push(( - Inst::mov64_m_r(Amode::imm_reg(128, r11), w_rdi), + Inst::mov64_m_r(Amode::imm_reg(128, r11), w_rdi, None), "498BBB80000000", "movq 128(%r11), %rdi", )); insns.push(( - Inst::mov64_m_r(Amode::imm_reg(128, r12), w_rdi), + Inst::mov64_m_r(Amode::imm_reg(128, r12), w_rdi, None), "498BBC2480000000", "movq 128(%r12), %rdi", )); insns.push(( - Inst::mov64_m_r(Amode::imm_reg(128, r13), w_rdi), + Inst::mov64_m_r(Amode::imm_reg(128, r13), w_rdi, None), "498BBD80000000", "movq 128(%r13), %rdi", )); insns.push(( - Inst::mov64_m_r(Amode::imm_reg(128, r14), w_rdi), + Inst::mov64_m_r(Amode::imm_reg(128, r14), w_rdi, None), "498BBE80000000", "movq 128(%r14), %rdi", )); insns.push(( - Inst::mov64_m_r(Amode::imm_reg(128, r15), w_rdi), + Inst::mov64_m_r(Amode::imm_reg(128, r15), w_rdi, None), "498BBF80000000", "movq 128(%r15), %rdi", )); @@ -427,82 +427,82 @@ fn test_x64_emit() { // ======================================================== // Addr_IR, offset smallest negative simm32 insns.push(( - Inst::mov64_m_r(Amode::imm_reg(-129i32 as u32, rax), w_rdi), + Inst::mov64_m_r(Amode::imm_reg(-129i32 as u32, rax), w_rdi, None), "488BB87FFFFFFF", "movq -129(%rax), %rdi", )); insns.push(( - Inst::mov64_m_r(Amode::imm_reg(-129i32 as u32, rbx), w_rdi), + Inst::mov64_m_r(Amode::imm_reg(-129i32 as u32, rbx), w_rdi, None), "488BBB7FFFFFFF", "movq -129(%rbx), %rdi", )); insns.push(( - Inst::mov64_m_r(Amode::imm_reg(-129i32 as u32, rcx), w_rdi), + Inst::mov64_m_r(Amode::imm_reg(-129i32 as u32, rcx), w_rdi, None), "488BB97FFFFFFF", "movq -129(%rcx), %rdi", )); insns.push(( - Inst::mov64_m_r(Amode::imm_reg(-129i32 as u32, rdx), w_rdi), + Inst::mov64_m_r(Amode::imm_reg(-129i32 as u32, rdx), w_rdi, None), "488BBA7FFFFFFF", "movq -129(%rdx), %rdi", )); insns.push(( - Inst::mov64_m_r(Amode::imm_reg(-129i32 as u32, rbp), w_rdi), + Inst::mov64_m_r(Amode::imm_reg(-129i32 as u32, rbp), w_rdi, None), "488BBD7FFFFFFF", "movq -129(%rbp), %rdi", )); insns.push(( - Inst::mov64_m_r(Amode::imm_reg(-129i32 as u32, rsp), w_rdi), + Inst::mov64_m_r(Amode::imm_reg(-129i32 as u32, rsp), w_rdi, None), "488BBC247FFFFFFF", "movq -129(%rsp), %rdi", )); insns.push(( - Inst::mov64_m_r(Amode::imm_reg(-129i32 as u32, rsi), w_rdi), + Inst::mov64_m_r(Amode::imm_reg(-129i32 as u32, rsi), w_rdi, None), "488BBE7FFFFFFF", "movq -129(%rsi), %rdi", )); insns.push(( - Inst::mov64_m_r(Amode::imm_reg(-129i32 as u32, rdi), w_rdi), + Inst::mov64_m_r(Amode::imm_reg(-129i32 as u32, rdi), w_rdi, None), "488BBF7FFFFFFF", "movq -129(%rdi), %rdi", )); insns.push(( - Inst::mov64_m_r(Amode::imm_reg(-129i32 as u32, r8), w_rdi), + Inst::mov64_m_r(Amode::imm_reg(-129i32 as u32, r8), w_rdi, None), "498BB87FFFFFFF", "movq -129(%r8), %rdi", )); insns.push(( - Inst::mov64_m_r(Amode::imm_reg(-129i32 as u32, r9), w_rdi), + Inst::mov64_m_r(Amode::imm_reg(-129i32 as u32, r9), w_rdi, None), "498BB97FFFFFFF", "movq -129(%r9), %rdi", )); insns.push(( - Inst::mov64_m_r(Amode::imm_reg(-129i32 as u32, r10), w_rdi), + Inst::mov64_m_r(Amode::imm_reg(-129i32 as u32, r10), w_rdi, None), "498BBA7FFFFFFF", "movq -129(%r10), %rdi", )); insns.push(( - Inst::mov64_m_r(Amode::imm_reg(-129i32 as u32, r11), w_rdi), + Inst::mov64_m_r(Amode::imm_reg(-129i32 as u32, r11), w_rdi, None), "498BBB7FFFFFFF", "movq -129(%r11), %rdi", )); insns.push(( - Inst::mov64_m_r(Amode::imm_reg(-129i32 as u32, r12), w_rdi), + Inst::mov64_m_r(Amode::imm_reg(-129i32 as u32, r12), w_rdi, None), "498BBC247FFFFFFF", "movq -129(%r12), %rdi", )); insns.push(( - Inst::mov64_m_r(Amode::imm_reg(-129i32 as u32, r13), w_rdi), + Inst::mov64_m_r(Amode::imm_reg(-129i32 as u32, r13), w_rdi, None), "498BBD7FFFFFFF", "movq -129(%r13), %rdi", )); insns.push(( - Inst::mov64_m_r(Amode::imm_reg(-129i32 as u32, r14), w_rdi), + Inst::mov64_m_r(Amode::imm_reg(-129i32 as u32, r14), w_rdi, None), "498BBE7FFFFFFF", "movq -129(%r14), %rdi", )); insns.push(( - Inst::mov64_m_r(Amode::imm_reg(-129i32 as u32, r15), w_rdi), + Inst::mov64_m_r(Amode::imm_reg(-129i32 as u32, r15), w_rdi, None), "498BBF7FFFFFFF", "movq -129(%r15), %rdi", )); @@ -510,82 +510,82 @@ fn test_x64_emit() { // ======================================================== // Addr_IR, offset large positive simm32 insns.push(( - Inst::mov64_m_r(Amode::imm_reg(0x17732077, rax), w_rdi), + Inst::mov64_m_r(Amode::imm_reg(0x17732077, rax), w_rdi, None), "488BB877207317", "movq 393420919(%rax), %rdi", )); insns.push(( - Inst::mov64_m_r(Amode::imm_reg(0x17732077, rbx), w_rdi), + Inst::mov64_m_r(Amode::imm_reg(0x17732077, rbx), w_rdi, None), "488BBB77207317", "movq 393420919(%rbx), %rdi", )); insns.push(( - Inst::mov64_m_r(Amode::imm_reg(0x17732077, rcx), w_rdi), + Inst::mov64_m_r(Amode::imm_reg(0x17732077, rcx), w_rdi, None), "488BB977207317", "movq 393420919(%rcx), %rdi", )); insns.push(( - Inst::mov64_m_r(Amode::imm_reg(0x17732077, rdx), w_rdi), + Inst::mov64_m_r(Amode::imm_reg(0x17732077, rdx), w_rdi, None), "488BBA77207317", "movq 393420919(%rdx), %rdi", )); insns.push(( - Inst::mov64_m_r(Amode::imm_reg(0x17732077, rbp), w_rdi), + Inst::mov64_m_r(Amode::imm_reg(0x17732077, rbp), w_rdi, None), "488BBD77207317", "movq 393420919(%rbp), %rdi", )); insns.push(( - Inst::mov64_m_r(Amode::imm_reg(0x17732077, rsp), w_rdi), + Inst::mov64_m_r(Amode::imm_reg(0x17732077, rsp), w_rdi, None), "488BBC2477207317", "movq 393420919(%rsp), %rdi", )); insns.push(( - Inst::mov64_m_r(Amode::imm_reg(0x17732077, rsi), w_rdi), + Inst::mov64_m_r(Amode::imm_reg(0x17732077, rsi), w_rdi, None), "488BBE77207317", "movq 393420919(%rsi), %rdi", )); insns.push(( - Inst::mov64_m_r(Amode::imm_reg(0x17732077, rdi), w_rdi), + Inst::mov64_m_r(Amode::imm_reg(0x17732077, rdi), w_rdi, None), "488BBF77207317", "movq 393420919(%rdi), %rdi", )); insns.push(( - Inst::mov64_m_r(Amode::imm_reg(0x17732077, r8), w_rdi), + Inst::mov64_m_r(Amode::imm_reg(0x17732077, r8), w_rdi, None), "498BB877207317", "movq 393420919(%r8), %rdi", )); insns.push(( - Inst::mov64_m_r(Amode::imm_reg(0x17732077, r9), w_rdi), + Inst::mov64_m_r(Amode::imm_reg(0x17732077, r9), w_rdi, None), "498BB977207317", "movq 393420919(%r9), %rdi", )); insns.push(( - Inst::mov64_m_r(Amode::imm_reg(0x17732077, r10), w_rdi), + Inst::mov64_m_r(Amode::imm_reg(0x17732077, r10), w_rdi, None), "498BBA77207317", "movq 393420919(%r10), %rdi", )); insns.push(( - Inst::mov64_m_r(Amode::imm_reg(0x17732077, r11), w_rdi), + Inst::mov64_m_r(Amode::imm_reg(0x17732077, r11), w_rdi, None), "498BBB77207317", "movq 393420919(%r11), %rdi", )); insns.push(( - Inst::mov64_m_r(Amode::imm_reg(0x17732077, r12), w_rdi), + Inst::mov64_m_r(Amode::imm_reg(0x17732077, r12), w_rdi, None), "498BBC2477207317", "movq 393420919(%r12), %rdi", )); insns.push(( - Inst::mov64_m_r(Amode::imm_reg(0x17732077, r13), w_rdi), + Inst::mov64_m_r(Amode::imm_reg(0x17732077, r13), w_rdi, None), "498BBD77207317", "movq 393420919(%r13), %rdi", )); insns.push(( - Inst::mov64_m_r(Amode::imm_reg(0x17732077, r14), w_rdi), + Inst::mov64_m_r(Amode::imm_reg(0x17732077, r14), w_rdi, None), "498BBE77207317", "movq 393420919(%r14), %rdi", )); insns.push(( - Inst::mov64_m_r(Amode::imm_reg(0x17732077, r15), w_rdi), + Inst::mov64_m_r(Amode::imm_reg(0x17732077, r15), w_rdi, None), "498BBF77207317", "movq 393420919(%r15), %rdi", )); @@ -593,82 +593,82 @@ fn test_x64_emit() { // ======================================================== // Addr_IR, offset large negative simm32 insns.push(( - Inst::mov64_m_r(Amode::imm_reg(-0x31415927i32 as u32, rax), w_rdi), + Inst::mov64_m_r(Amode::imm_reg(-0x31415927i32 as u32, rax), w_rdi, None), "488BB8D9A6BECE", "movq -826366247(%rax), %rdi", )); insns.push(( - Inst::mov64_m_r(Amode::imm_reg(-0x31415927i32 as u32, rbx), w_rdi), + Inst::mov64_m_r(Amode::imm_reg(-0x31415927i32 as u32, rbx), w_rdi, None), "488BBBD9A6BECE", "movq -826366247(%rbx), %rdi", )); insns.push(( - Inst::mov64_m_r(Amode::imm_reg(-0x31415927i32 as u32, rcx), w_rdi), + Inst::mov64_m_r(Amode::imm_reg(-0x31415927i32 as u32, rcx), w_rdi, None), "488BB9D9A6BECE", "movq -826366247(%rcx), %rdi", )); insns.push(( - Inst::mov64_m_r(Amode::imm_reg(-0x31415927i32 as u32, rdx), w_rdi), + Inst::mov64_m_r(Amode::imm_reg(-0x31415927i32 as u32, rdx), w_rdi, None), "488BBAD9A6BECE", "movq -826366247(%rdx), %rdi", )); insns.push(( - Inst::mov64_m_r(Amode::imm_reg(-0x31415927i32 as u32, rbp), w_rdi), + Inst::mov64_m_r(Amode::imm_reg(-0x31415927i32 as u32, rbp), w_rdi, None), "488BBDD9A6BECE", "movq -826366247(%rbp), %rdi", )); insns.push(( - Inst::mov64_m_r(Amode::imm_reg(-0x31415927i32 as u32, rsp), w_rdi), + Inst::mov64_m_r(Amode::imm_reg(-0x31415927i32 as u32, rsp), w_rdi, None), "488BBC24D9A6BECE", "movq -826366247(%rsp), %rdi", )); insns.push(( - Inst::mov64_m_r(Amode::imm_reg(-0x31415927i32 as u32, rsi), w_rdi), + Inst::mov64_m_r(Amode::imm_reg(-0x31415927i32 as u32, rsi), w_rdi, None), "488BBED9A6BECE", "movq -826366247(%rsi), %rdi", )); insns.push(( - Inst::mov64_m_r(Amode::imm_reg(-0x31415927i32 as u32, rdi), w_rdi), + Inst::mov64_m_r(Amode::imm_reg(-0x31415927i32 as u32, rdi), w_rdi, None), "488BBFD9A6BECE", "movq -826366247(%rdi), %rdi", )); insns.push(( - Inst::mov64_m_r(Amode::imm_reg(-0x31415927i32 as u32, r8), w_rdi), + Inst::mov64_m_r(Amode::imm_reg(-0x31415927i32 as u32, r8), w_rdi, None), "498BB8D9A6BECE", "movq -826366247(%r8), %rdi", )); insns.push(( - Inst::mov64_m_r(Amode::imm_reg(-0x31415927i32 as u32, r9), w_rdi), + Inst::mov64_m_r(Amode::imm_reg(-0x31415927i32 as u32, r9), w_rdi, None), "498BB9D9A6BECE", "movq -826366247(%r9), %rdi", )); insns.push(( - Inst::mov64_m_r(Amode::imm_reg(-0x31415927i32 as u32, r10), w_rdi), + Inst::mov64_m_r(Amode::imm_reg(-0x31415927i32 as u32, r10), w_rdi, None), "498BBAD9A6BECE", "movq -826366247(%r10), %rdi", )); insns.push(( - Inst::mov64_m_r(Amode::imm_reg(-0x31415927i32 as u32, r11), w_rdi), + Inst::mov64_m_r(Amode::imm_reg(-0x31415927i32 as u32, r11), w_rdi, None), "498BBBD9A6BECE", "movq -826366247(%r11), %rdi", )); insns.push(( - Inst::mov64_m_r(Amode::imm_reg(-0x31415927i32 as u32, r12), w_rdi), + Inst::mov64_m_r(Amode::imm_reg(-0x31415927i32 as u32, r12), w_rdi, None), "498BBC24D9A6BECE", "movq -826366247(%r12), %rdi", )); insns.push(( - Inst::mov64_m_r(Amode::imm_reg(-0x31415927i32 as u32, r13), w_rdi), + Inst::mov64_m_r(Amode::imm_reg(-0x31415927i32 as u32, r13), w_rdi, None), "498BBDD9A6BECE", "movq -826366247(%r13), %rdi", )); insns.push(( - Inst::mov64_m_r(Amode::imm_reg(-0x31415927i32 as u32, r14), w_rdi), + Inst::mov64_m_r(Amode::imm_reg(-0x31415927i32 as u32, r14), w_rdi, None), "498BBED9A6BECE", "movq -826366247(%r14), %rdi", )); insns.push(( - Inst::mov64_m_r(Amode::imm_reg(-0x31415927i32 as u32, r15), w_rdi), + Inst::mov64_m_r(Amode::imm_reg(-0x31415927i32 as u32, r15), w_rdi, None), "498BBFD9A6BECE", "movq -826366247(%r15), %rdi", )); @@ -680,42 +680,42 @@ fn test_x64_emit() { // // Addr_IRRS, offset max simm8 insns.push(( - Inst::mov64_m_r(Amode::imm_reg_reg_shift(127, rax, rax, 0), w_r11), + Inst::mov64_m_r(Amode::imm_reg_reg_shift(127, rax, rax, 0), w_r11, None), "4C8B5C007F", "movq 127(%rax,%rax,1), %r11", )); insns.push(( - Inst::mov64_m_r(Amode::imm_reg_reg_shift(127, rdi, rax, 1), w_r11), + Inst::mov64_m_r(Amode::imm_reg_reg_shift(127, rdi, rax, 1), w_r11, None), "4C8B5C477F", "movq 127(%rdi,%rax,2), %r11", )); insns.push(( - Inst::mov64_m_r(Amode::imm_reg_reg_shift(127, r8, rax, 2), w_r11), + Inst::mov64_m_r(Amode::imm_reg_reg_shift(127, r8, rax, 2), w_r11, None), "4D8B5C807F", "movq 127(%r8,%rax,4), %r11", )); insns.push(( - Inst::mov64_m_r(Amode::imm_reg_reg_shift(127, r15, rax, 3), w_r11), + Inst::mov64_m_r(Amode::imm_reg_reg_shift(127, r15, rax, 3), w_r11, None), "4D8B5CC77F", "movq 127(%r15,%rax,8), %r11", )); insns.push(( - Inst::mov64_m_r(Amode::imm_reg_reg_shift(127, rax, rdi, 3), w_r11), + Inst::mov64_m_r(Amode::imm_reg_reg_shift(127, rax, rdi, 3), w_r11, None), "4C8B5CF87F", "movq 127(%rax,%rdi,8), %r11", )); insns.push(( - Inst::mov64_m_r(Amode::imm_reg_reg_shift(127, rdi, rdi, 2), w_r11), + Inst::mov64_m_r(Amode::imm_reg_reg_shift(127, rdi, rdi, 2), w_r11, None), "4C8B5CBF7F", "movq 127(%rdi,%rdi,4), %r11", )); insns.push(( - Inst::mov64_m_r(Amode::imm_reg_reg_shift(127, r8, rdi, 1), w_r11), + Inst::mov64_m_r(Amode::imm_reg_reg_shift(127, r8, rdi, 1), w_r11, None), "4D8B5C787F", "movq 127(%r8,%rdi,2), %r11", )); insns.push(( - Inst::mov64_m_r(Amode::imm_reg_reg_shift(127, r15, rdi, 0), w_r11), + Inst::mov64_m_r(Amode::imm_reg_reg_shift(127, r15, rdi, 0), w_r11, None), "4D8B5C3F7F", "movq 127(%r15,%rdi,1), %r11", )); @@ -723,42 +723,74 @@ fn test_x64_emit() { // ======================================================== // Addr_IRRS, offset min simm8 insns.push(( - Inst::mov64_m_r(Amode::imm_reg_reg_shift(-128i32 as u32, rax, r8, 2), w_r11), + Inst::mov64_m_r( + Amode::imm_reg_reg_shift(-128i32 as u32, rax, r8, 2), + w_r11, + None, + ), "4E8B5C8080", "movq -128(%rax,%r8,4), %r11", )); insns.push(( - Inst::mov64_m_r(Amode::imm_reg_reg_shift(-128i32 as u32, rdi, r8, 3), w_r11), + Inst::mov64_m_r( + Amode::imm_reg_reg_shift(-128i32 as u32, rdi, r8, 3), + w_r11, + None, + ), "4E8B5CC780", "movq -128(%rdi,%r8,8), %r11", )); insns.push(( - Inst::mov64_m_r(Amode::imm_reg_reg_shift(-128i32 as u32, r8, r8, 0), w_r11), + Inst::mov64_m_r( + Amode::imm_reg_reg_shift(-128i32 as u32, r8, r8, 0), + w_r11, + None, + ), "4F8B5C0080", "movq -128(%r8,%r8,1), %r11", )); insns.push(( - Inst::mov64_m_r(Amode::imm_reg_reg_shift(-128i32 as u32, r15, r8, 1), w_r11), + Inst::mov64_m_r( + Amode::imm_reg_reg_shift(-128i32 as u32, r15, r8, 1), + w_r11, + None, + ), "4F8B5C4780", "movq -128(%r15,%r8,2), %r11", )); insns.push(( - Inst::mov64_m_r(Amode::imm_reg_reg_shift(-128i32 as u32, rax, r15, 1), w_r11), + Inst::mov64_m_r( + Amode::imm_reg_reg_shift(-128i32 as u32, rax, r15, 1), + w_r11, + None, + ), "4E8B5C7880", "movq -128(%rax,%r15,2), %r11", )); insns.push(( - Inst::mov64_m_r(Amode::imm_reg_reg_shift(-128i32 as u32, rdi, r15, 0), w_r11), + Inst::mov64_m_r( + Amode::imm_reg_reg_shift(-128i32 as u32, rdi, r15, 0), + w_r11, + None, + ), "4E8B5C3F80", "movq -128(%rdi,%r15,1), %r11", )); insns.push(( - Inst::mov64_m_r(Amode::imm_reg_reg_shift(-128i32 as u32, r8, r15, 3), w_r11), + Inst::mov64_m_r( + Amode::imm_reg_reg_shift(-128i32 as u32, r8, r15, 3), + w_r11, + None, + ), "4F8B5CF880", "movq -128(%r8,%r15,8), %r11", )); insns.push(( - Inst::mov64_m_r(Amode::imm_reg_reg_shift(-128i32 as u32, r15, r15, 2), w_r11), + Inst::mov64_m_r( + Amode::imm_reg_reg_shift(-128i32 as u32, r15, r15, 2), + w_r11, + None, + ), "4F8B5CBF80", "movq -128(%r15,%r15,4), %r11", )); @@ -766,42 +798,74 @@ fn test_x64_emit() { // ======================================================== // Addr_IRRS, offset large positive simm32 insns.push(( - Inst::mov64_m_r(Amode::imm_reg_reg_shift(0x4f6625be, rax, rax, 0), w_r11), + Inst::mov64_m_r( + Amode::imm_reg_reg_shift(0x4f6625be, rax, rax, 0), + w_r11, + None, + ), "4C8B9C00BE25664F", "movq 1332094398(%rax,%rax,1), %r11", )); insns.push(( - Inst::mov64_m_r(Amode::imm_reg_reg_shift(0x4f6625be, rdi, rax, 1), w_r11), + Inst::mov64_m_r( + Amode::imm_reg_reg_shift(0x4f6625be, rdi, rax, 1), + w_r11, + None, + ), "4C8B9C47BE25664F", "movq 1332094398(%rdi,%rax,2), %r11", )); insns.push(( - Inst::mov64_m_r(Amode::imm_reg_reg_shift(0x4f6625be, r8, rax, 2), w_r11), + Inst::mov64_m_r( + Amode::imm_reg_reg_shift(0x4f6625be, r8, rax, 2), + w_r11, + None, + ), "4D8B9C80BE25664F", "movq 1332094398(%r8,%rax,4), %r11", )); insns.push(( - Inst::mov64_m_r(Amode::imm_reg_reg_shift(0x4f6625be, r15, rax, 3), w_r11), + Inst::mov64_m_r( + Amode::imm_reg_reg_shift(0x4f6625be, r15, rax, 3), + w_r11, + None, + ), "4D8B9CC7BE25664F", "movq 1332094398(%r15,%rax,8), %r11", )); insns.push(( - Inst::mov64_m_r(Amode::imm_reg_reg_shift(0x4f6625be, rax, rdi, 3), w_r11), + Inst::mov64_m_r( + Amode::imm_reg_reg_shift(0x4f6625be, rax, rdi, 3), + w_r11, + None, + ), "4C8B9CF8BE25664F", "movq 1332094398(%rax,%rdi,8), %r11", )); insns.push(( - Inst::mov64_m_r(Amode::imm_reg_reg_shift(0x4f6625be, rdi, rdi, 2), w_r11), + Inst::mov64_m_r( + Amode::imm_reg_reg_shift(0x4f6625be, rdi, rdi, 2), + w_r11, + None, + ), "4C8B9CBFBE25664F", "movq 1332094398(%rdi,%rdi,4), %r11", )); insns.push(( - Inst::mov64_m_r(Amode::imm_reg_reg_shift(0x4f6625be, r8, rdi, 1), w_r11), + Inst::mov64_m_r( + Amode::imm_reg_reg_shift(0x4f6625be, r8, rdi, 1), + w_r11, + None, + ), "4D8B9C78BE25664F", "movq 1332094398(%r8,%rdi,2), %r11", )); insns.push(( - Inst::mov64_m_r(Amode::imm_reg_reg_shift(0x4f6625be, r15, rdi, 0), w_r11), + Inst::mov64_m_r( + Amode::imm_reg_reg_shift(0x4f6625be, r15, rdi, 0), + w_r11, + None, + ), "4D8B9C3FBE25664F", "movq 1332094398(%r15,%rdi,1), %r11", )); @@ -812,6 +876,7 @@ fn test_x64_emit() { Inst::mov64_m_r( Amode::imm_reg_reg_shift(-0x264d1690i32 as u32, rax, r8, 2), w_r11, + None, ), "4E8B9C8070E9B2D9", "movq -642586256(%rax,%r8,4), %r11", @@ -820,6 +885,7 @@ fn test_x64_emit() { Inst::mov64_m_r( Amode::imm_reg_reg_shift(-0x264d1690i32 as u32, rdi, r8, 3), w_r11, + None, ), "4E8B9CC770E9B2D9", "movq -642586256(%rdi,%r8,8), %r11", @@ -828,6 +894,7 @@ fn test_x64_emit() { Inst::mov64_m_r( Amode::imm_reg_reg_shift(-0x264d1690i32 as u32, r8, r8, 0), w_r11, + None, ), "4F8B9C0070E9B2D9", "movq -642586256(%r8,%r8,1), %r11", @@ -836,6 +903,7 @@ fn test_x64_emit() { Inst::mov64_m_r( Amode::imm_reg_reg_shift(-0x264d1690i32 as u32, r15, r8, 1), w_r11, + None, ), "4F8B9C4770E9B2D9", "movq -642586256(%r15,%r8,2), %r11", @@ -844,6 +912,7 @@ fn test_x64_emit() { Inst::mov64_m_r( Amode::imm_reg_reg_shift(-0x264d1690i32 as u32, rax, r15, 1), w_r11, + None, ), "4E8B9C7870E9B2D9", "movq -642586256(%rax,%r15,2), %r11", @@ -852,6 +921,7 @@ fn test_x64_emit() { Inst::mov64_m_r( Amode::imm_reg_reg_shift(-0x264d1690i32 as u32, rdi, r15, 0), w_r11, + None, ), "4E8B9C3F70E9B2D9", "movq -642586256(%rdi,%r15,1), %r11", @@ -860,6 +930,7 @@ fn test_x64_emit() { Inst::mov64_m_r( Amode::imm_reg_reg_shift(-0x264d1690i32 as u32, r8, r15, 3), w_r11, + None, ), "4F8B9CF870E9B2D9", "movq -642586256(%r8,%r15,8), %r11", @@ -868,6 +939,7 @@ fn test_x64_emit() { Inst::mov64_m_r( Amode::imm_reg_reg_shift(-0x264d1690i32 as u32, r15, r15, 2), w_r11, + None, ), "4F8B9CBF70E9B2D9", "movq -642586256(%r15,%r15,4), %r11", @@ -1154,6 +1226,20 @@ fn test_x64_emit() { "imull $76543210, %esi", )); + // ======================================================== + // UnaryRmR + + insns.push(( + Inst::unary_rm_r(4, UnaryRmROpcode::Bsr, RegMem::reg(rsi), w_rdi), + "0FBDFE", + "bsrl %esi, %edi", + )); + insns.push(( + Inst::unary_rm_r(8, UnaryRmROpcode::Bsr, RegMem::reg(r15), w_rax), + "490FBDC7", + "bsrq %r15, %rax", + )); + // ======================================================== // Div insns.push(( @@ -1197,6 +1283,29 @@ fn test_x64_emit() { "div %rdi", )); + // ======================================================== + // MulHi + insns.push(( + Inst::mul_hi(4, true /*signed*/, RegMem::reg(regs::rsi())), + "F7EE", + "imul %esi", + )); + insns.push(( + Inst::mul_hi(8, true /*signed*/, RegMem::reg(regs::r15())), + "49F7EF", + "imul %r15", + )); + insns.push(( + Inst::mul_hi(4, false /*signed*/, RegMem::reg(regs::r14())), + "41F7E6", + "mul %r14d", + )); + insns.push(( + Inst::mul_hi(8, false /*signed*/, RegMem::reg(regs::rdi())), + "48F7E7", + "mul %rdi", + )); + // ======================================================== // cdq family: SignExtendRaxRdx insns.push((Inst::sign_extend_rax_to_rdx(2), "6699", "cwd")); @@ -1293,12 +1402,12 @@ fn test_x64_emit() { // ======================================================== // MovZX_RM_R insns.push(( - Inst::movzx_rm_r(ExtMode::BL, RegMem::reg(rax), w_rsi), + Inst::movzx_rm_r(ExtMode::BL, RegMem::reg(rax), w_rsi, None), "0FB6F0", "movzbl %al, %esi", )); insns.push(( - Inst::movzx_rm_r(ExtMode::BL, RegMem::reg(r15), w_rsi), + Inst::movzx_rm_r(ExtMode::BL, RegMem::reg(r15), w_rsi, None), "410FB6F7", "movzbl %r15b, %esi", )); @@ -1307,6 +1416,7 @@ fn test_x64_emit() { ExtMode::BL, RegMem::mem(Amode::imm_reg(-7i32 as u32, rcx)), w_rsi, + None, ), "0FB671F9", "movzbl -7(%rcx), %esi", @@ -1316,6 +1426,7 @@ fn test_x64_emit() { ExtMode::BL, RegMem::mem(Amode::imm_reg(-7i32 as u32, r8)), w_rbx, + None, ), "410FB658F9", "movzbl -7(%r8), %ebx", @@ -1325,6 +1436,7 @@ fn test_x64_emit() { ExtMode::BL, RegMem::mem(Amode::imm_reg(-7i32 as u32, r10)), w_r9, + None, ), "450FB64AF9", "movzbl -7(%r10), %r9d", @@ -1334,17 +1446,18 @@ fn test_x64_emit() { ExtMode::BL, RegMem::mem(Amode::imm_reg(-7i32 as u32, r11)), w_rdx, + None, ), "410FB653F9", "movzbl -7(%r11), %edx", )); insns.push(( - Inst::movzx_rm_r(ExtMode::BQ, RegMem::reg(rax), w_rsi), + Inst::movzx_rm_r(ExtMode::BQ, RegMem::reg(rax), w_rsi, None), "480FB6F0", "movzbq %al, %rsi", )); insns.push(( - Inst::movzx_rm_r(ExtMode::BQ, RegMem::reg(r10), w_rsi), + Inst::movzx_rm_r(ExtMode::BQ, RegMem::reg(r10), w_rsi, None), "490FB6F2", "movzbq %r10b, %rsi", )); @@ -1353,6 +1466,7 @@ fn test_x64_emit() { ExtMode::BQ, RegMem::mem(Amode::imm_reg(-7i32 as u32, rcx)), w_rsi, + None, ), "480FB671F9", "movzbq -7(%rcx), %rsi", @@ -1362,6 +1476,7 @@ fn test_x64_emit() { ExtMode::BQ, RegMem::mem(Amode::imm_reg(-7i32 as u32, r8)), w_rbx, + None, ), "490FB658F9", "movzbq -7(%r8), %rbx", @@ -1371,6 +1486,7 @@ fn test_x64_emit() { ExtMode::BQ, RegMem::mem(Amode::imm_reg(-7i32 as u32, r10)), w_r9, + None, ), "4D0FB64AF9", "movzbq -7(%r10), %r9", @@ -1380,17 +1496,18 @@ fn test_x64_emit() { ExtMode::BQ, RegMem::mem(Amode::imm_reg(-7i32 as u32, r11)), w_rdx, + None, ), "490FB653F9", "movzbq -7(%r11), %rdx", )); insns.push(( - Inst::movzx_rm_r(ExtMode::WL, RegMem::reg(rcx), w_rsi), + Inst::movzx_rm_r(ExtMode::WL, RegMem::reg(rcx), w_rsi, None), "0FB7F1", "movzwl %cx, %esi", )); insns.push(( - Inst::movzx_rm_r(ExtMode::WL, RegMem::reg(r10), w_rsi), + Inst::movzx_rm_r(ExtMode::WL, RegMem::reg(r10), w_rsi, None), "410FB7F2", "movzwl %r10w, %esi", )); @@ -1399,6 +1516,7 @@ fn test_x64_emit() { ExtMode::WL, RegMem::mem(Amode::imm_reg(-7i32 as u32, rcx)), w_rsi, + None, ), "0FB771F9", "movzwl -7(%rcx), %esi", @@ -1408,6 +1526,7 @@ fn test_x64_emit() { ExtMode::WL, RegMem::mem(Amode::imm_reg(-7i32 as u32, r8)), w_rbx, + None, ), "410FB758F9", "movzwl -7(%r8), %ebx", @@ -1417,6 +1536,7 @@ fn test_x64_emit() { ExtMode::WL, RegMem::mem(Amode::imm_reg(-7i32 as u32, r10)), w_r9, + None, ), "450FB74AF9", "movzwl -7(%r10), %r9d", @@ -1426,17 +1546,18 @@ fn test_x64_emit() { ExtMode::WL, RegMem::mem(Amode::imm_reg(-7i32 as u32, r11)), w_rdx, + None, ), "410FB753F9", "movzwl -7(%r11), %edx", )); insns.push(( - Inst::movzx_rm_r(ExtMode::WQ, RegMem::reg(rcx), w_rsi), + Inst::movzx_rm_r(ExtMode::WQ, RegMem::reg(rcx), w_rsi, None), "480FB7F1", "movzwq %cx, %rsi", )); insns.push(( - Inst::movzx_rm_r(ExtMode::WQ, RegMem::reg(r11), w_rsi), + Inst::movzx_rm_r(ExtMode::WQ, RegMem::reg(r11), w_rsi, None), "490FB7F3", "movzwq %r11w, %rsi", )); @@ -1445,6 +1566,7 @@ fn test_x64_emit() { ExtMode::WQ, RegMem::mem(Amode::imm_reg(-7i32 as u32, rcx)), w_rsi, + None, ), "480FB771F9", "movzwq -7(%rcx), %rsi", @@ -1454,6 +1576,7 @@ fn test_x64_emit() { ExtMode::WQ, RegMem::mem(Amode::imm_reg(-7i32 as u32, r8)), w_rbx, + None, ), "490FB758F9", "movzwq -7(%r8), %rbx", @@ -1463,6 +1586,7 @@ fn test_x64_emit() { ExtMode::WQ, RegMem::mem(Amode::imm_reg(-7i32 as u32, r10)), w_r9, + None, ), "4D0FB74AF9", "movzwq -7(%r10), %r9", @@ -1472,12 +1596,13 @@ fn test_x64_emit() { ExtMode::WQ, RegMem::mem(Amode::imm_reg(-7i32 as u32, r11)), w_rdx, + None, ), "490FB753F9", "movzwq -7(%r11), %rdx", )); insns.push(( - Inst::movzx_rm_r(ExtMode::LQ, RegMem::reg(rcx), w_rsi), + Inst::movzx_rm_r(ExtMode::LQ, RegMem::reg(rcx), w_rsi, None), "8BF1", "movl %ecx, %esi", )); @@ -1486,6 +1611,7 @@ fn test_x64_emit() { ExtMode::LQ, RegMem::mem(Amode::imm_reg(-7i32 as u32, rcx)), w_rsi, + None, ), "8B71F9", "movl -7(%rcx), %esi", @@ -1495,6 +1621,7 @@ fn test_x64_emit() { ExtMode::LQ, RegMem::mem(Amode::imm_reg(-7i32 as u32, r8)), w_rbx, + None, ), "418B58F9", "movl -7(%r8), %ebx", @@ -1504,6 +1631,7 @@ fn test_x64_emit() { ExtMode::LQ, RegMem::mem(Amode::imm_reg(-7i32 as u32, r10)), w_r9, + None, ), "458B4AF9", "movl -7(%r10), %r9d", @@ -1513,6 +1641,7 @@ fn test_x64_emit() { ExtMode::LQ, RegMem::mem(Amode::imm_reg(-7i32 as u32, r11)), w_rdx, + None, ), "418B53F9", "movl -7(%r11), %edx", @@ -1521,42 +1650,42 @@ fn test_x64_emit() { // ======================================================== // Mov64_M_R insns.push(( - Inst::mov64_m_r(Amode::imm_reg_reg_shift(179, rax, rbx, 0), w_rcx), + Inst::mov64_m_r(Amode::imm_reg_reg_shift(179, rax, rbx, 0), w_rcx, None), "488B8C18B3000000", "movq 179(%rax,%rbx,1), %rcx", )); insns.push(( - Inst::mov64_m_r(Amode::imm_reg_reg_shift(179, rax, rbx, 0), w_r8), + Inst::mov64_m_r(Amode::imm_reg_reg_shift(179, rax, rbx, 0), w_r8, None), "4C8B8418B3000000", "movq 179(%rax,%rbx,1), %r8", )); insns.push(( - Inst::mov64_m_r(Amode::imm_reg_reg_shift(179, rax, r9, 0), w_rcx), + Inst::mov64_m_r(Amode::imm_reg_reg_shift(179, rax, r9, 0), w_rcx, None), "4A8B8C08B3000000", "movq 179(%rax,%r9,1), %rcx", )); insns.push(( - Inst::mov64_m_r(Amode::imm_reg_reg_shift(179, rax, r9, 0), w_r8), + Inst::mov64_m_r(Amode::imm_reg_reg_shift(179, rax, r9, 0), w_r8, None), "4E8B8408B3000000", "movq 179(%rax,%r9,1), %r8", )); insns.push(( - Inst::mov64_m_r(Amode::imm_reg_reg_shift(179, r10, rbx, 0), w_rcx), + Inst::mov64_m_r(Amode::imm_reg_reg_shift(179, r10, rbx, 0), w_rcx, None), "498B8C1AB3000000", "movq 179(%r10,%rbx,1), %rcx", )); insns.push(( - Inst::mov64_m_r(Amode::imm_reg_reg_shift(179, r10, rbx, 0), w_r8), + Inst::mov64_m_r(Amode::imm_reg_reg_shift(179, r10, rbx, 0), w_r8, None), "4D8B841AB3000000", "movq 179(%r10,%rbx,1), %r8", )); insns.push(( - Inst::mov64_m_r(Amode::imm_reg_reg_shift(179, r10, r9, 0), w_rcx), + Inst::mov64_m_r(Amode::imm_reg_reg_shift(179, r10, r9, 0), w_rcx, None), "4B8B8C0AB3000000", "movq 179(%r10,%r9,1), %rcx", )); insns.push(( - Inst::mov64_m_r(Amode::imm_reg_reg_shift(179, r10, r9, 0), w_r8), + Inst::mov64_m_r(Amode::imm_reg_reg_shift(179, r10, r9, 0), w_r8, None), "4F8B840AB3000000", "movq 179(%r10,%r9,1), %r8", )); @@ -1595,12 +1724,12 @@ fn test_x64_emit() { // ======================================================== // MovSX_RM_R insns.push(( - Inst::movsx_rm_r(ExtMode::BL, RegMem::reg(rcx), w_rsi), + Inst::movsx_rm_r(ExtMode::BL, RegMem::reg(rcx), w_rsi, None), "0FBEF1", "movsbl %cl, %esi", )); insns.push(( - Inst::movsx_rm_r(ExtMode::BL, RegMem::reg(r14), w_rsi), + Inst::movsx_rm_r(ExtMode::BL, RegMem::reg(r14), w_rsi, None), "410FBEF6", "movsbl %r14b, %esi", )); @@ -1609,6 +1738,7 @@ fn test_x64_emit() { ExtMode::BL, RegMem::mem(Amode::imm_reg(-7i32 as u32, rcx)), w_rsi, + None, ), "0FBE71F9", "movsbl -7(%rcx), %esi", @@ -1618,6 +1748,7 @@ fn test_x64_emit() { ExtMode::BL, RegMem::mem(Amode::imm_reg(-7i32 as u32, r8)), w_rbx, + None, ), "410FBE58F9", "movsbl -7(%r8), %ebx", @@ -1627,6 +1758,7 @@ fn test_x64_emit() { ExtMode::BL, RegMem::mem(Amode::imm_reg(-7i32 as u32, r10)), w_r9, + None, ), "450FBE4AF9", "movsbl -7(%r10), %r9d", @@ -1636,17 +1768,18 @@ fn test_x64_emit() { ExtMode::BL, RegMem::mem(Amode::imm_reg(-7i32 as u32, r11)), w_rdx, + None, ), "410FBE53F9", "movsbl -7(%r11), %edx", )); insns.push(( - Inst::movsx_rm_r(ExtMode::BQ, RegMem::reg(rcx), w_rsi), + Inst::movsx_rm_r(ExtMode::BQ, RegMem::reg(rcx), w_rsi, None), "480FBEF1", "movsbq %cl, %rsi", )); insns.push(( - Inst::movsx_rm_r(ExtMode::BQ, RegMem::reg(r15), w_rsi), + Inst::movsx_rm_r(ExtMode::BQ, RegMem::reg(r15), w_rsi, None), "490FBEF7", "movsbq %r15b, %rsi", )); @@ -1655,6 +1788,7 @@ fn test_x64_emit() { ExtMode::BQ, RegMem::mem(Amode::imm_reg(-7i32 as u32, rcx)), w_rsi, + None, ), "480FBE71F9", "movsbq -7(%rcx), %rsi", @@ -1664,6 +1798,7 @@ fn test_x64_emit() { ExtMode::BQ, RegMem::mem(Amode::imm_reg(-7i32 as u32, r8)), w_rbx, + None, ), "490FBE58F9", "movsbq -7(%r8), %rbx", @@ -1673,6 +1808,7 @@ fn test_x64_emit() { ExtMode::BQ, RegMem::mem(Amode::imm_reg(-7i32 as u32, r10)), w_r9, + None, ), "4D0FBE4AF9", "movsbq -7(%r10), %r9", @@ -1682,17 +1818,18 @@ fn test_x64_emit() { ExtMode::BQ, RegMem::mem(Amode::imm_reg(-7i32 as u32, r11)), w_rdx, + None, ), "490FBE53F9", "movsbq -7(%r11), %rdx", )); insns.push(( - Inst::movsx_rm_r(ExtMode::WL, RegMem::reg(rcx), w_rsi), + Inst::movsx_rm_r(ExtMode::WL, RegMem::reg(rcx), w_rsi, None), "0FBFF1", "movswl %cx, %esi", )); insns.push(( - Inst::movsx_rm_r(ExtMode::WL, RegMem::reg(r14), w_rsi), + Inst::movsx_rm_r(ExtMode::WL, RegMem::reg(r14), w_rsi, None), "410FBFF6", "movswl %r14w, %esi", )); @@ -1701,6 +1838,7 @@ fn test_x64_emit() { ExtMode::WL, RegMem::mem(Amode::imm_reg(-7i32 as u32, rcx)), w_rsi, + None, ), "0FBF71F9", "movswl -7(%rcx), %esi", @@ -1710,6 +1848,7 @@ fn test_x64_emit() { ExtMode::WL, RegMem::mem(Amode::imm_reg(-7i32 as u32, r8)), w_rbx, + None, ), "410FBF58F9", "movswl -7(%r8), %ebx", @@ -1719,6 +1858,7 @@ fn test_x64_emit() { ExtMode::WL, RegMem::mem(Amode::imm_reg(-7i32 as u32, r10)), w_r9, + None, ), "450FBF4AF9", "movswl -7(%r10), %r9d", @@ -1728,17 +1868,18 @@ fn test_x64_emit() { ExtMode::WL, RegMem::mem(Amode::imm_reg(-7i32 as u32, r11)), w_rdx, + None, ), "410FBF53F9", "movswl -7(%r11), %edx", )); insns.push(( - Inst::movsx_rm_r(ExtMode::WQ, RegMem::reg(rcx), w_rsi), + Inst::movsx_rm_r(ExtMode::WQ, RegMem::reg(rcx), w_rsi, None), "480FBFF1", "movswq %cx, %rsi", )); insns.push(( - Inst::movsx_rm_r(ExtMode::WQ, RegMem::reg(r13), w_rsi), + Inst::movsx_rm_r(ExtMode::WQ, RegMem::reg(r13), w_rsi, None), "490FBFF5", "movswq %r13w, %rsi", )); @@ -1747,6 +1888,7 @@ fn test_x64_emit() { ExtMode::WQ, RegMem::mem(Amode::imm_reg(-7i32 as u32, rcx)), w_rsi, + None, ), "480FBF71F9", "movswq -7(%rcx), %rsi", @@ -1756,6 +1898,7 @@ fn test_x64_emit() { ExtMode::WQ, RegMem::mem(Amode::imm_reg(-7i32 as u32, r8)), w_rbx, + None, ), "490FBF58F9", "movswq -7(%r8), %rbx", @@ -1765,6 +1908,7 @@ fn test_x64_emit() { ExtMode::WQ, RegMem::mem(Amode::imm_reg(-7i32 as u32, r10)), w_r9, + None, ), "4D0FBF4AF9", "movswq -7(%r10), %r9", @@ -1774,17 +1918,18 @@ fn test_x64_emit() { ExtMode::WQ, RegMem::mem(Amode::imm_reg(-7i32 as u32, r11)), w_rdx, + None, ), "490FBF53F9", "movswq -7(%r11), %rdx", )); insns.push(( - Inst::movsx_rm_r(ExtMode::LQ, RegMem::reg(rcx), w_rsi), + Inst::movsx_rm_r(ExtMode::LQ, RegMem::reg(rcx), w_rsi, None), "4863F1", "movslq %ecx, %rsi", )); insns.push(( - Inst::movsx_rm_r(ExtMode::LQ, RegMem::reg(r15), w_rsi), + Inst::movsx_rm_r(ExtMode::LQ, RegMem::reg(r15), w_rsi, None), "4963F7", "movslq %r15d, %rsi", )); @@ -1793,6 +1938,7 @@ fn test_x64_emit() { ExtMode::LQ, RegMem::mem(Amode::imm_reg(-7i32 as u32, rcx)), w_rsi, + None, ), "486371F9", "movslq -7(%rcx), %rsi", @@ -1802,6 +1948,7 @@ fn test_x64_emit() { ExtMode::LQ, RegMem::mem(Amode::imm_reg(-7i32 as u32, r8)), w_rbx, + None, ), "496358F9", "movslq -7(%r8), %rbx", @@ -1811,6 +1958,7 @@ fn test_x64_emit() { ExtMode::LQ, RegMem::mem(Amode::imm_reg(-7i32 as u32, r10)), w_r9, + None, ), "4D634AF9", "movslq -7(%r10), %r9", @@ -1820,6 +1968,7 @@ fn test_x64_emit() { ExtMode::LQ, RegMem::mem(Amode::imm_reg(-7i32 as u32, r11)), w_rdx, + None, ), "496353F9", "movslq -7(%r11), %rdx", @@ -1828,325 +1977,325 @@ fn test_x64_emit() { // ======================================================== // Mov_R_M. Byte stores are tricky. Check everything carefully. insns.push(( - Inst::mov_r_m(8, rax, Amode::imm_reg(99, rdi)), + Inst::mov_r_m(8, rax, Amode::imm_reg(99, rdi), None), "48894763", "movq %rax, 99(%rdi)", )); insns.push(( - Inst::mov_r_m(8, rbx, Amode::imm_reg(99, r8)), + Inst::mov_r_m(8, rbx, Amode::imm_reg(99, r8), None), "49895863", "movq %rbx, 99(%r8)", )); insns.push(( - Inst::mov_r_m(8, rcx, Amode::imm_reg(99, rsi)), + Inst::mov_r_m(8, rcx, Amode::imm_reg(99, rsi), None), "48894E63", "movq %rcx, 99(%rsi)", )); insns.push(( - Inst::mov_r_m(8, rdx, Amode::imm_reg(99, r9)), + Inst::mov_r_m(8, rdx, Amode::imm_reg(99, r9), None), "49895163", "movq %rdx, 99(%r9)", )); insns.push(( - Inst::mov_r_m(8, rsi, Amode::imm_reg(99, rax)), + Inst::mov_r_m(8, rsi, Amode::imm_reg(99, rax), None), "48897063", "movq %rsi, 99(%rax)", )); insns.push(( - Inst::mov_r_m(8, rdi, Amode::imm_reg(99, r15)), + Inst::mov_r_m(8, rdi, Amode::imm_reg(99, r15), None), "49897F63", "movq %rdi, 99(%r15)", )); insns.push(( - Inst::mov_r_m(8, rsp, Amode::imm_reg(99, rcx)), + Inst::mov_r_m(8, rsp, Amode::imm_reg(99, rcx), None), "48896163", "movq %rsp, 99(%rcx)", )); insns.push(( - Inst::mov_r_m(8, rbp, Amode::imm_reg(99, r14)), + Inst::mov_r_m(8, rbp, Amode::imm_reg(99, r14), None), "49896E63", "movq %rbp, 99(%r14)", )); insns.push(( - Inst::mov_r_m(8, r8, Amode::imm_reg(99, rdi)), + Inst::mov_r_m(8, r8, Amode::imm_reg(99, rdi), None), "4C894763", "movq %r8, 99(%rdi)", )); insns.push(( - Inst::mov_r_m(8, r9, Amode::imm_reg(99, r8)), + Inst::mov_r_m(8, r9, Amode::imm_reg(99, r8), None), "4D894863", "movq %r9, 99(%r8)", )); insns.push(( - Inst::mov_r_m(8, r10, Amode::imm_reg(99, rsi)), + Inst::mov_r_m(8, r10, Amode::imm_reg(99, rsi), None), "4C895663", "movq %r10, 99(%rsi)", )); insns.push(( - Inst::mov_r_m(8, r11, Amode::imm_reg(99, r9)), + Inst::mov_r_m(8, r11, Amode::imm_reg(99, r9), None), "4D895963", "movq %r11, 99(%r9)", )); insns.push(( - Inst::mov_r_m(8, r12, Amode::imm_reg(99, rax)), + Inst::mov_r_m(8, r12, Amode::imm_reg(99, rax), None), "4C896063", "movq %r12, 99(%rax)", )); insns.push(( - Inst::mov_r_m(8, r13, Amode::imm_reg(99, r15)), + Inst::mov_r_m(8, r13, Amode::imm_reg(99, r15), None), "4D896F63", "movq %r13, 99(%r15)", )); insns.push(( - Inst::mov_r_m(8, r14, Amode::imm_reg(99, rcx)), + Inst::mov_r_m(8, r14, Amode::imm_reg(99, rcx), None), "4C897163", "movq %r14, 99(%rcx)", )); insns.push(( - Inst::mov_r_m(8, r15, Amode::imm_reg(99, r14)), + Inst::mov_r_m(8, r15, Amode::imm_reg(99, r14), None), "4D897E63", "movq %r15, 99(%r14)", )); // insns.push(( - Inst::mov_r_m(4, rax, Amode::imm_reg(99, rdi)), + Inst::mov_r_m(4, rax, Amode::imm_reg(99, rdi), None), "894763", "movl %eax, 99(%rdi)", )); insns.push(( - Inst::mov_r_m(4, rbx, Amode::imm_reg(99, r8)), + Inst::mov_r_m(4, rbx, Amode::imm_reg(99, r8), None), "41895863", "movl %ebx, 99(%r8)", )); insns.push(( - Inst::mov_r_m(4, rcx, Amode::imm_reg(99, rsi)), + Inst::mov_r_m(4, rcx, Amode::imm_reg(99, rsi), None), "894E63", "movl %ecx, 99(%rsi)", )); insns.push(( - Inst::mov_r_m(4, rdx, Amode::imm_reg(99, r9)), + Inst::mov_r_m(4, rdx, Amode::imm_reg(99, r9), None), "41895163", "movl %edx, 99(%r9)", )); insns.push(( - Inst::mov_r_m(4, rsi, Amode::imm_reg(99, rax)), + Inst::mov_r_m(4, rsi, Amode::imm_reg(99, rax), None), "897063", "movl %esi, 99(%rax)", )); insns.push(( - Inst::mov_r_m(4, rdi, Amode::imm_reg(99, r15)), + Inst::mov_r_m(4, rdi, Amode::imm_reg(99, r15), None), "41897F63", "movl %edi, 99(%r15)", )); insns.push(( - Inst::mov_r_m(4, rsp, Amode::imm_reg(99, rcx)), + Inst::mov_r_m(4, rsp, Amode::imm_reg(99, rcx), None), "896163", "movl %esp, 99(%rcx)", )); insns.push(( - Inst::mov_r_m(4, rbp, Amode::imm_reg(99, r14)), + Inst::mov_r_m(4, rbp, Amode::imm_reg(99, r14), None), "41896E63", "movl %ebp, 99(%r14)", )); insns.push(( - Inst::mov_r_m(4, r8, Amode::imm_reg(99, rdi)), + Inst::mov_r_m(4, r8, Amode::imm_reg(99, rdi), None), "44894763", "movl %r8d, 99(%rdi)", )); insns.push(( - Inst::mov_r_m(4, r9, Amode::imm_reg(99, r8)), + Inst::mov_r_m(4, r9, Amode::imm_reg(99, r8), None), "45894863", "movl %r9d, 99(%r8)", )); insns.push(( - Inst::mov_r_m(4, r10, Amode::imm_reg(99, rsi)), + Inst::mov_r_m(4, r10, Amode::imm_reg(99, rsi), None), "44895663", "movl %r10d, 99(%rsi)", )); insns.push(( - Inst::mov_r_m(4, r11, Amode::imm_reg(99, r9)), + Inst::mov_r_m(4, r11, Amode::imm_reg(99, r9), None), "45895963", "movl %r11d, 99(%r9)", )); insns.push(( - Inst::mov_r_m(4, r12, Amode::imm_reg(99, rax)), + Inst::mov_r_m(4, r12, Amode::imm_reg(99, rax), None), "44896063", "movl %r12d, 99(%rax)", )); insns.push(( - Inst::mov_r_m(4, r13, Amode::imm_reg(99, r15)), + Inst::mov_r_m(4, r13, Amode::imm_reg(99, r15), None), "45896F63", "movl %r13d, 99(%r15)", )); insns.push(( - Inst::mov_r_m(4, r14, Amode::imm_reg(99, rcx)), + Inst::mov_r_m(4, r14, Amode::imm_reg(99, rcx), None), "44897163", "movl %r14d, 99(%rcx)", )); insns.push(( - Inst::mov_r_m(4, r15, Amode::imm_reg(99, r14)), + Inst::mov_r_m(4, r15, Amode::imm_reg(99, r14), None), "45897E63", "movl %r15d, 99(%r14)", )); // insns.push(( - Inst::mov_r_m(2, rax, Amode::imm_reg(99, rdi)), + Inst::mov_r_m(2, rax, Amode::imm_reg(99, rdi), None), "66894763", "movw %ax, 99(%rdi)", )); insns.push(( - Inst::mov_r_m(2, rbx, Amode::imm_reg(99, r8)), + Inst::mov_r_m(2, rbx, Amode::imm_reg(99, r8), None), "6641895863", "movw %bx, 99(%r8)", )); insns.push(( - Inst::mov_r_m(2, rcx, Amode::imm_reg(99, rsi)), + Inst::mov_r_m(2, rcx, Amode::imm_reg(99, rsi), None), "66894E63", "movw %cx, 99(%rsi)", )); insns.push(( - Inst::mov_r_m(2, rdx, Amode::imm_reg(99, r9)), + Inst::mov_r_m(2, rdx, Amode::imm_reg(99, r9), None), "6641895163", "movw %dx, 99(%r9)", )); insns.push(( - Inst::mov_r_m(2, rsi, Amode::imm_reg(99, rax)), + Inst::mov_r_m(2, rsi, Amode::imm_reg(99, rax), None), "66897063", "movw %si, 99(%rax)", )); insns.push(( - Inst::mov_r_m(2, rdi, Amode::imm_reg(99, r15)), + Inst::mov_r_m(2, rdi, Amode::imm_reg(99, r15), None), "6641897F63", "movw %di, 99(%r15)", )); insns.push(( - Inst::mov_r_m(2, rsp, Amode::imm_reg(99, rcx)), + Inst::mov_r_m(2, rsp, Amode::imm_reg(99, rcx), None), "66896163", "movw %sp, 99(%rcx)", )); insns.push(( - Inst::mov_r_m(2, rbp, Amode::imm_reg(99, r14)), + Inst::mov_r_m(2, rbp, Amode::imm_reg(99, r14), None), "6641896E63", "movw %bp, 99(%r14)", )); insns.push(( - Inst::mov_r_m(2, r8, Amode::imm_reg(99, rdi)), + Inst::mov_r_m(2, r8, Amode::imm_reg(99, rdi), None), "6644894763", "movw %r8w, 99(%rdi)", )); insns.push(( - Inst::mov_r_m(2, r9, Amode::imm_reg(99, r8)), + Inst::mov_r_m(2, r9, Amode::imm_reg(99, r8), None), "6645894863", "movw %r9w, 99(%r8)", )); insns.push(( - Inst::mov_r_m(2, r10, Amode::imm_reg(99, rsi)), + Inst::mov_r_m(2, r10, Amode::imm_reg(99, rsi), None), "6644895663", "movw %r10w, 99(%rsi)", )); insns.push(( - Inst::mov_r_m(2, r11, Amode::imm_reg(99, r9)), + Inst::mov_r_m(2, r11, Amode::imm_reg(99, r9), None), "6645895963", "movw %r11w, 99(%r9)", )); insns.push(( - Inst::mov_r_m(2, r12, Amode::imm_reg(99, rax)), + Inst::mov_r_m(2, r12, Amode::imm_reg(99, rax), None), "6644896063", "movw %r12w, 99(%rax)", )); insns.push(( - Inst::mov_r_m(2, r13, Amode::imm_reg(99, r15)), + Inst::mov_r_m(2, r13, Amode::imm_reg(99, r15), None), "6645896F63", "movw %r13w, 99(%r15)", )); insns.push(( - Inst::mov_r_m(2, r14, Amode::imm_reg(99, rcx)), + Inst::mov_r_m(2, r14, Amode::imm_reg(99, rcx), None), "6644897163", "movw %r14w, 99(%rcx)", )); insns.push(( - Inst::mov_r_m(2, r15, Amode::imm_reg(99, r14)), + Inst::mov_r_m(2, r15, Amode::imm_reg(99, r14), None), "6645897E63", "movw %r15w, 99(%r14)", )); // insns.push(( - Inst::mov_r_m(1, rax, Amode::imm_reg(99, rdi)), + Inst::mov_r_m(1, rax, Amode::imm_reg(99, rdi), None), "884763", "movb %al, 99(%rdi)", )); insns.push(( - Inst::mov_r_m(1, rbx, Amode::imm_reg(99, r8)), + Inst::mov_r_m(1, rbx, Amode::imm_reg(99, r8), None), "41885863", "movb %bl, 99(%r8)", )); insns.push(( - Inst::mov_r_m(1, rcx, Amode::imm_reg(99, rsi)), + Inst::mov_r_m(1, rcx, Amode::imm_reg(99, rsi), None), "884E63", "movb %cl, 99(%rsi)", )); insns.push(( - Inst::mov_r_m(1, rdx, Amode::imm_reg(99, r9)), + Inst::mov_r_m(1, rdx, Amode::imm_reg(99, r9), None), "41885163", "movb %dl, 99(%r9)", )); insns.push(( - Inst::mov_r_m(1, rsi, Amode::imm_reg(99, rax)), + Inst::mov_r_m(1, rsi, Amode::imm_reg(99, rax), None), "40887063", "movb %sil, 99(%rax)", )); insns.push(( - Inst::mov_r_m(1, rdi, Amode::imm_reg(99, r15)), + Inst::mov_r_m(1, rdi, Amode::imm_reg(99, r15), None), "41887F63", "movb %dil, 99(%r15)", )); insns.push(( - Inst::mov_r_m(1, rsp, Amode::imm_reg(99, rcx)), + Inst::mov_r_m(1, rsp, Amode::imm_reg(99, rcx), None), "40886163", "movb %spl, 99(%rcx)", )); insns.push(( - Inst::mov_r_m(1, rbp, Amode::imm_reg(99, r14)), + Inst::mov_r_m(1, rbp, Amode::imm_reg(99, r14), None), "41886E63", "movb %bpl, 99(%r14)", )); insns.push(( - Inst::mov_r_m(1, r8, Amode::imm_reg(99, rdi)), + Inst::mov_r_m(1, r8, Amode::imm_reg(99, rdi), None), "44884763", "movb %r8b, 99(%rdi)", )); insns.push(( - Inst::mov_r_m(1, r9, Amode::imm_reg(99, r8)), + Inst::mov_r_m(1, r9, Amode::imm_reg(99, r8), None), "45884863", "movb %r9b, 99(%r8)", )); insns.push(( - Inst::mov_r_m(1, r10, Amode::imm_reg(99, rsi)), + Inst::mov_r_m(1, r10, Amode::imm_reg(99, rsi), None), "44885663", "movb %r10b, 99(%rsi)", )); insns.push(( - Inst::mov_r_m(1, r11, Amode::imm_reg(99, r9)), + Inst::mov_r_m(1, r11, Amode::imm_reg(99, r9), None), "45885963", "movb %r11b, 99(%r9)", )); insns.push(( - Inst::mov_r_m(1, r12, Amode::imm_reg(99, rax)), + Inst::mov_r_m(1, r12, Amode::imm_reg(99, rax), None), "44886063", "movb %r12b, 99(%rax)", )); insns.push(( - Inst::mov_r_m(1, r13, Amode::imm_reg(99, r15)), + Inst::mov_r_m(1, r13, Amode::imm_reg(99, r15), None), "45886F63", "movb %r13b, 99(%r15)", )); insns.push(( - Inst::mov_r_m(1, r14, Amode::imm_reg(99, rcx)), + Inst::mov_r_m(1, r14, Amode::imm_reg(99, rcx), None), "44887163", "movb %r14b, 99(%rcx)", )); insns.push(( - Inst::mov_r_m(1, r15, Amode::imm_reg(99, r14)), + Inst::mov_r_m(1, r15, Amode::imm_reg(99, r14), None), "45887E63", "movb %r15b, 99(%r14)", )); @@ -2154,110 +2303,130 @@ fn test_x64_emit() { // ======================================================== // Shift_R insns.push(( - Inst::shift_r(false, ShiftKind::Left, None, w_rdi), + Inst::shift_r(false, ShiftKind::ShiftLeft, None, w_rdi), "D3E7", "shll %cl, %edi", )); insns.push(( - Inst::shift_r(false, ShiftKind::Left, None, w_r12), + Inst::shift_r(false, ShiftKind::ShiftLeft, None, w_r12), "41D3E4", "shll %cl, %r12d", )); insns.push(( - Inst::shift_r(false, ShiftKind::Left, Some(2), w_r8), + Inst::shift_r(false, ShiftKind::ShiftLeft, Some(2), w_r8), "41C1E002", "shll $2, %r8d", )); insns.push(( - Inst::shift_r(false, ShiftKind::Left, Some(31), w_r13), + Inst::shift_r(false, ShiftKind::ShiftLeft, Some(31), w_r13), "41C1E51F", "shll $31, %r13d", )); insns.push(( - Inst::shift_r(true, ShiftKind::Left, None, w_r13), + Inst::shift_r(true, ShiftKind::ShiftLeft, None, w_r13), "49D3E5", "shlq %cl, %r13", )); insns.push(( - Inst::shift_r(true, ShiftKind::Left, None, w_rdi), + Inst::shift_r(true, ShiftKind::ShiftLeft, None, w_rdi), "48D3E7", "shlq %cl, %rdi", )); insns.push(( - Inst::shift_r(true, ShiftKind::Left, Some(2), w_r8), + Inst::shift_r(true, ShiftKind::ShiftLeft, Some(2), w_r8), "49C1E002", "shlq $2, %r8", )); insns.push(( - Inst::shift_r(true, ShiftKind::Left, Some(3), w_rbx), + Inst::shift_r(true, ShiftKind::ShiftLeft, Some(3), w_rbx), "48C1E303", "shlq $3, %rbx", )); insns.push(( - Inst::shift_r(true, ShiftKind::Left, Some(63), w_r13), + Inst::shift_r(true, ShiftKind::ShiftLeft, Some(63), w_r13), "49C1E53F", "shlq $63, %r13", )); insns.push(( - Inst::shift_r(false, ShiftKind::RightZ, None, w_rdi), + Inst::shift_r(false, ShiftKind::ShiftRightLogical, None, w_rdi), "D3EF", "shrl %cl, %edi", )); insns.push(( - Inst::shift_r(false, ShiftKind::RightZ, Some(2), w_r8), + Inst::shift_r(false, ShiftKind::ShiftRightLogical, Some(2), w_r8), "41C1E802", "shrl $2, %r8d", )); insns.push(( - Inst::shift_r(false, ShiftKind::RightZ, Some(31), w_r13), + Inst::shift_r(false, ShiftKind::ShiftRightLogical, Some(31), w_r13), "41C1ED1F", "shrl $31, %r13d", )); insns.push(( - Inst::shift_r(true, ShiftKind::RightZ, None, w_rdi), + Inst::shift_r(true, ShiftKind::ShiftRightLogical, None, w_rdi), "48D3EF", "shrq %cl, %rdi", )); insns.push(( - Inst::shift_r(true, ShiftKind::RightZ, Some(2), w_r8), + Inst::shift_r(true, ShiftKind::ShiftRightLogical, Some(2), w_r8), "49C1E802", "shrq $2, %r8", )); insns.push(( - Inst::shift_r(true, ShiftKind::RightZ, Some(63), w_r13), + Inst::shift_r(true, ShiftKind::ShiftRightLogical, Some(63), w_r13), "49C1ED3F", "shrq $63, %r13", )); insns.push(( - Inst::shift_r(false, ShiftKind::RightS, None, w_rdi), + Inst::shift_r(false, ShiftKind::ShiftRightArithmetic, None, w_rdi), "D3FF", "sarl %cl, %edi", )); insns.push(( - Inst::shift_r(false, ShiftKind::RightS, Some(2), w_r8), + Inst::shift_r(false, ShiftKind::ShiftRightArithmetic, Some(2), w_r8), "41C1F802", "sarl $2, %r8d", )); insns.push(( - Inst::shift_r(false, ShiftKind::RightS, Some(31), w_r13), + Inst::shift_r(false, ShiftKind::ShiftRightArithmetic, Some(31), w_r13), "41C1FD1F", "sarl $31, %r13d", )); insns.push(( - Inst::shift_r(true, ShiftKind::RightS, None, w_rdi), + Inst::shift_r(true, ShiftKind::ShiftRightArithmetic, None, w_rdi), "48D3FF", "sarq %cl, %rdi", )); insns.push(( - Inst::shift_r(true, ShiftKind::RightS, Some(2), w_r8), + Inst::shift_r(true, ShiftKind::ShiftRightArithmetic, Some(2), w_r8), "49C1F802", "sarq $2, %r8", )); insns.push(( - Inst::shift_r(true, ShiftKind::RightS, Some(63), w_r13), + Inst::shift_r(true, ShiftKind::ShiftRightArithmetic, Some(63), w_r13), "49C1FD3F", "sarq $63, %r13", )); + insns.push(( + Inst::shift_r(true, ShiftKind::RotateLeft, None, w_r8), + "49D3C0", + "rolq %cl, %r8", + )); + insns.push(( + Inst::shift_r(false, ShiftKind::RotateLeft, Some(3), w_r9), + "41C1C103", + "roll $3, %r9d", + )); + insns.push(( + Inst::shift_r(false, ShiftKind::RotateRight, None, w_rsi), + "D3CE", + "rorl %cl, %esi", + )); + insns.push(( + Inst::shift_r(true, ShiftKind::RotateRight, Some(5), w_r15), + "49C1CF05", + "rorq $5, %r15", + )); // ======================================================== // CmpRMIR @@ -2774,12 +2943,12 @@ fn test_x64_emit() { "andnps %xmm4, %xmm11", )); insns.push(( - Inst::xmm_mov_rm_r(SseOpcode::Movaps, RegMem::reg(xmm5), w_xmm14), + Inst::xmm_mov_rm_r(SseOpcode::Movaps, RegMem::reg(xmm5), w_xmm14, None), "440F28F5", "movaps %xmm5, %xmm14", )); insns.push(( - Inst::xmm_mov_rm_r(SseOpcode::Movd, RegMem::reg(rax), w_xmm15), + Inst::xmm_mov_rm_r(SseOpcode::Movd, RegMem::reg(rax), w_xmm15, None), "66440F6EF8", "movd %eax, %xmm15", )); @@ -2790,19 +2959,24 @@ fn test_x64_emit() { )); insns.push(( - Inst::xmm_mov_r_m(SseOpcode::Movd, xmm0, Amode::imm_reg(321, rbx)), + Inst::xmm_mov_r_m(SseOpcode::Movd, xmm0, Amode::imm_reg(321, rbx), None), "660F7E8341010000", "movd %xmm0, 321(%rbx)", )); insns.push(( - Inst::xmm_mov_r_m(SseOpcode::Movss, xmm15, Amode::imm_reg(128, r12)), + Inst::xmm_mov_r_m(SseOpcode::Movss, xmm15, Amode::imm_reg(128, r12), None), "F3450F11BC2480000000", "movss %xmm15, 128(%r12)", )); insns.push(( - Inst::xmm_mov_rm_r(SseOpcode::Movd, RegMem::mem(Amode::imm_reg(2, r10)), w_xmm9), + Inst::xmm_mov_rm_r( + SseOpcode::Movd, + RegMem::mem(Amode::imm_reg(2, r10)), + w_xmm9, + None, + ), "66450F6E4A02", "movd 2(%r10), %xmm9", )); @@ -2813,12 +2987,12 @@ fn test_x64_emit() { "orps %xmm5, %xmm4", )); insns.push(( - Inst::xmm_mov_rm_r(SseOpcode::Movss, RegMem::reg(xmm13), w_xmm2), + Inst::xmm_mov_rm_r(SseOpcode::Movss, RegMem::reg(xmm13), w_xmm2, None), "F3410F10D5", "movss %xmm13, %xmm2", )); insns.push(( - Inst::xmm_mov_rm_r(SseOpcode::Movsd, RegMem::reg(xmm14), w_xmm3), + Inst::xmm_mov_rm_r(SseOpcode::Movsd, RegMem::reg(xmm14), w_xmm3, None), "F2410F10DE", "movsd %xmm14, %xmm3", )); diff --git a/cranelift/codegen/src/isa/x64/inst/mod.rs b/cranelift/codegen/src/isa/x64/inst/mod.rs index 20b7037ba458..d0c9549892b3 100644 --- a/cranelift/codegen/src/isa/x64/inst/mod.rs +++ b/cranelift/codegen/src/isa/x64/inst/mod.rs @@ -4,16 +4,17 @@ #![allow(non_snake_case)] #![allow(non_camel_case_types)] +use alloc::boxed::Box; use alloc::vec::Vec; -use smallvec::SmallVec; use std::fmt; use std::string::{String, ToString}; use regalloc::RegUsageCollector; use regalloc::{RealRegUniverse, Reg, RegClass, RegUsageMapper, SpillSlot, VirtualReg, Writable}; +use smallvec::SmallVec; use crate::binemit::CodeOffset; -use crate::ir::types::{B1, B128, B16, B32, B64, B8, F32, F64, I128, I16, I32, I64, I8}; +use crate::ir::types::*; use crate::ir::{ExternalName, Opcode, SourceLoc, TrapCode, Type}; use crate::machinst::*; use crate::settings::Flags; @@ -49,6 +50,14 @@ pub enum Inst { dst: Writable, }, + /// Instructions on GPR that only read src and defines dst (dst is not modified): bsr, etc. + UnaryRmR { + size: u8, // 2, 4 or 8 + op: UnaryRmROpcode, + src: RegMem, + dst: Writable, + }, + /// Integer quotient and remainder: (div idiv) $rax $rdx (reg addr) Div { size: u8, // 1, 2, 4 or 8 @@ -57,17 +66,25 @@ pub enum Inst { loc: SourceLoc, }, + /// The high bits (RDX) of a (un)signed multiply: RDX:RAX := RAX * rhs. + MulHi { size: u8, signed: bool, rhs: RegMem }, + /// A synthetic sequence to implement the right inline checks for remainder and division, - /// assuming the dividend is in $rax. - /// Puts the result back into $rax if is_div, $rdx if !is_div, to mimic what the div + /// assuming the dividend is in %rax. + /// Puts the result back into %rax if is_div, %rdx if !is_div, to mimic what the div /// instruction does. /// The generated code sequence is described in the emit's function match arm for this /// instruction. + /// + /// Note: %rdx is marked as modified by this instruction, to avoid an early clobber problem + /// with the temporary and divisor registers. Make sure to zero %rdx right before this + /// instruction, or you might run into regalloc failures where %rdx is live before its first + /// def! CheckedDivOrRemSeq { - is_div: bool, - is_signed: bool, + kind: DivOrRemKind, size: u8, divisor: Reg, + tmp: Option>, loc: SourceLoc, }, @@ -98,12 +115,16 @@ pub enum Inst { ext_mode: ExtMode, src: RegMem, dst: Writable, + /// Source location, if the memory access can be out-of-bounds. + srcloc: Option, }, /// A plain 64-bit integer load, since MovZX_RM_R can't represent that. Mov64_M_R { src: SyntheticAmode, dst: Writable, + /// Source location, if the memory access can be out-of-bounds. + srcloc: Option, }, /// Loads the memory address of addr into dst. @@ -117,6 +138,8 @@ pub enum Inst { ext_mode: ExtMode, src: RegMem, dst: Writable, + /// Source location, if the memory access can be out-of-bounds. + srcloc: Option, }, /// Integer stores: mov (b w l q) reg addr. @@ -124,6 +147,8 @@ pub enum Inst { size: u8, // 1, 2, 4 or 8. src: Reg, dst: SyntheticAmode, + /// Source location, if the memory access can be out-of-bounds. + srcloc: Option, }, /// Arithmetic shifts: (shl shr sar) (l q) imm reg. @@ -180,6 +205,8 @@ pub enum Inst { op: SseOpcode, src: RegMem, dst: Writable, + /// Source location, if the memory access can be out-of-bounds. + srcloc: Option, }, /// mov reg addr (good for all memory stores from xmm registers) @@ -187,6 +214,8 @@ pub enum Inst { op: SseOpcode, src: Reg, dst: SyntheticAmode, + /// Source location, if the memory access can be out-of-bounds. + srcloc: Option, }, // ===================================== @@ -255,6 +284,14 @@ pub enum Inst { /// An instruction that will always trigger the illegal instruction exception. Ud2 { trap_info: (SourceLoc, TrapCode) }, + /// Loads an external symbol in a register, with a relocation: movabsq $name, dst + LoadExtName { + dst: Writable, + name: Box, + srcloc: SourceLoc, + offset: i64, + }, + // ===================================== // Meta-instructions generating no code. /// Marker, no-op in generated code: SP "virtual offset" is adjusted. This @@ -262,15 +299,13 @@ pub enum Inst { VirtualSPOffsetAdj { offset: i64 }, } -// Handy constructors for Insts. - -// For various sizes, will some number of lowest bits sign extend to be the -// same as the whole value? -pub(crate) fn low32willSXto64(x: u64) -> bool { +pub(crate) fn low32_will_sign_extend_to_64(x: u64) -> bool { let xs = x as i64; xs == ((xs << 32) >> 32) } +// Handy constructors for Insts. + impl Inst { pub(crate) fn nop(len: u8) -> Self { debug_assert!(len <= 16); @@ -292,6 +327,17 @@ impl Inst { } } + pub(crate) fn unary_rm_r( + size: u8, + op: UnaryRmROpcode, + src: RegMem, + dst: Writable, + ) -> Self { + debug_assert!(dst.to_reg().get_class() == RegClass::I64); + debug_assert!(size == 8 || size == 4 || size == 2); + Self::UnaryRmR { size, op, src, dst } + } + pub(crate) fn div(size: u8, signed: bool, divisor: RegMem, loc: SourceLoc) -> Inst { debug_assert!(size == 8 || size == 4 || size == 2 || size == 1); Inst::Div { @@ -301,6 +347,12 @@ impl Inst { loc, } } + + pub(crate) fn mul_hi(size: u8, signed: bool, rhs: RegMem) -> Inst { + debug_assert!(size == 8 || size == 4 || size == 2 || size == 1); + Inst::MulHi { size, signed, rhs } + } + pub(crate) fn sign_extend_rax_to_rdx(size: u8) -> Inst { debug_assert!(size == 8 || size == 4 || size == 2); Inst::SignExtendRaxRdx { size } @@ -309,7 +361,11 @@ impl Inst { pub(crate) fn imm_r(dst_is_64: bool, simm64: u64, dst: Writable) -> Inst { debug_assert!(dst.to_reg().get_class() == RegClass::I64); if !dst_is_64 { - debug_assert!(low32willSXto64(simm64)); + debug_assert!( + low32_will_sign_extend_to_64(simm64), + "{} won't sign-extend to 64 bits!", + simm64 + ); } Inst::Imm_R { dst_is_64, @@ -324,9 +380,19 @@ impl Inst { Inst::Mov_R_R { is_64, src, dst } } - pub(crate) fn xmm_mov_rm_r(op: SseOpcode, src: RegMem, dst: Writable) -> Inst { + pub(crate) fn xmm_mov_rm_r( + op: SseOpcode, + src: RegMem, + dst: Writable, + srcloc: Option, + ) -> Inst { debug_assert!(dst.to_reg().get_class() == RegClass::V128); - Inst::XMM_Mov_RM_R { op, src, dst } + Inst::XMM_Mov_RM_R { + op, + src, + dst, + srcloc, + } } pub(crate) fn xmm_rm_r(op: SseOpcode, src: RegMem, dst: Writable) -> Self { @@ -334,37 +400,77 @@ impl Inst { Inst::XMM_RM_R { op, src, dst } } - pub(crate) fn xmm_mov_r_m(op: SseOpcode, src: Reg, dst: impl Into) -> Inst { + pub(crate) fn xmm_mov_r_m( + op: SseOpcode, + src: Reg, + dst: impl Into, + srcloc: Option, + ) -> Inst { debug_assert!(src.get_class() == RegClass::V128); Inst::XMM_Mov_R_M { op, src, dst: dst.into(), + srcloc, } } - pub(crate) fn movzx_rm_r(ext_mode: ExtMode, src: RegMem, dst: Writable) -> Inst { + pub(crate) fn movzx_rm_r( + ext_mode: ExtMode, + src: RegMem, + dst: Writable, + srcloc: Option, + ) -> Inst { debug_assert!(dst.to_reg().get_class() == RegClass::I64); - Inst::MovZX_RM_R { ext_mode, src, dst } + Inst::MovZX_RM_R { + ext_mode, + src, + dst, + srcloc, + } + } + + pub(crate) fn movsx_rm_r( + ext_mode: ExtMode, + src: RegMem, + dst: Writable, + srcloc: Option, + ) -> Inst { + debug_assert!(dst.to_reg().get_class() == RegClass::I64); + Inst::MovSX_RM_R { + ext_mode, + src, + dst, + srcloc, + } } - pub(crate) fn mov64_m_r(src: impl Into, dst: Writable) -> Inst { + pub(crate) fn mov64_m_r( + src: impl Into, + dst: Writable, + srcloc: Option, + ) -> Inst { debug_assert!(dst.to_reg().get_class() == RegClass::I64); Inst::Mov64_M_R { src: src.into(), dst, + srcloc, } } - pub(crate) fn movsx_rm_r(ext_mode: ExtMode, src: RegMem, dst: Writable) -> Inst { - debug_assert!(dst.to_reg().get_class() == RegClass::I64); - Inst::MovSX_RM_R { ext_mode, src, dst } + /// A convenience function to be able to use a RegMem as the source of a move. + pub(crate) fn mov64_rm_r(src: RegMem, dst: Writable, srcloc: Option) -> Inst { + match src { + RegMem::Reg { reg } => Self::mov_r_r(true, reg, dst), + RegMem::Mem { addr } => Self::mov64_m_r(addr, dst, srcloc), + } } pub(crate) fn mov_r_m( size: u8, // 1, 2, 4 or 8 src: Reg, dst: impl Into, + srcloc: Option, ) -> Inst { debug_assert!(size == 8 || size == 4 || size == 2 || size == 1); debug_assert!(src.get_class() == RegClass::I64); @@ -372,6 +478,7 @@ impl Inst { size, src, dst: dst.into(), + srcloc, } } @@ -548,6 +655,7 @@ impl ShowWithRRU for Inst { match self { Inst::Nop { len } => format!("{} len={}", ljustify("nop".to_string()), len), + Inst::Alu_RMI_R { is_64, op, @@ -559,6 +667,14 @@ impl ShowWithRRU for Inst { src.show_rru_sized(mb_rru, sizeLQ(*is_64)), show_ireg_sized(dst.to_reg(), mb_rru, sizeLQ(*is_64)), ), + + Inst::UnaryRmR { src, dst, op, size } => format!( + "{} {}, {}", + ljustify2(op.to_string(), suffixBWLQ(*size)), + src.show_rru_sized(mb_rru, *size), + show_ireg_sized(dst.to_reg(), mb_rru, *size), + ), + Inst::Div { size, signed, @@ -573,16 +689,30 @@ impl ShowWithRRU for Inst { }), divisor.show_rru_sized(mb_rru, *size) ), + Inst::MulHi { + size, signed, rhs, .. + } => format!( + "{} {}", + ljustify(if *signed { + "imul".to_string() + } else { + "mul".to_string() + }), + rhs.show_rru_sized(mb_rru, *size) + ), Inst::CheckedDivOrRemSeq { - is_div, - is_signed, + kind, size, divisor, .. } => format!( - "{}{} $rax:$rdx, {}", - if *is_signed { "s" } else { "u" }, - if *is_div { "div " } else { "rem " }, + "{} $rax:$rdx, {}", + match kind { + DivOrRemKind::SignedDiv => "sdiv", + DivOrRemKind::UnsignedDiv => "udiv", + DivOrRemKind::SignedRem => "srem", + DivOrRemKind::UnsignedRem => "urem", + }, show_ireg_sized(*divisor, mb_rru, *size), ), Inst::SignExtendRaxRdx { size } => match size { @@ -592,13 +722,13 @@ impl ShowWithRRU for Inst { _ => unreachable!(), } .into(), - Inst::XMM_Mov_RM_R { op, src, dst } => format!( + Inst::XMM_Mov_RM_R { op, src, dst, .. } => format!( "{} {}, {}", ljustify(op.to_string()), src.show_rru_sized(mb_rru, op.src_size()), show_ireg_sized(dst.to_reg(), mb_rru, 8), ), - Inst::XMM_Mov_R_M { op, src, dst } => format!( + Inst::XMM_Mov_R_M { op, src, dst, .. } => format!( "{} {}, {}", ljustify(op.to_string()), show_ireg_sized(*src, mb_rru, 8), @@ -637,7 +767,9 @@ impl ShowWithRRU for Inst { show_ireg_sized(*src, mb_rru, sizeLQ(*is_64)), show_ireg_sized(dst.to_reg(), mb_rru, sizeLQ(*is_64)) ), - Inst::MovZX_RM_R { ext_mode, src, dst } => { + Inst::MovZX_RM_R { + ext_mode, src, dst, .. + } => { if *ext_mode == ExtMode::LQ { format!( "{} {}, {}", @@ -654,7 +786,7 @@ impl ShowWithRRU for Inst { ) } } - Inst::Mov64_M_R { src, dst } => format!( + Inst::Mov64_M_R { src, dst, .. } => format!( "{} {}, {}", ljustify("movq".to_string()), src.show_rru(mb_rru), @@ -666,13 +798,15 @@ impl ShowWithRRU for Inst { addr.show_rru(mb_rru), dst.show_rru(mb_rru) ), - Inst::MovSX_RM_R { ext_mode, src, dst } => format!( + Inst::MovSX_RM_R { + ext_mode, src, dst, .. + } => format!( "{} {}, {}", ljustify2("movs".to_string(), ext_mode.to_string()), src.show_rru_sized(mb_rru, ext_mode.src_size()), show_ireg_sized(dst.to_reg(), mb_rru, ext_mode.dst_size()) ), - Inst::Mov_R_M { size, src, dst } => format!( + Inst::Mov_R_M { size, src, dst, .. } => format!( "{} {}, {}", ljustify2("mov".to_string(), suffixBWLQ(*size)), show_ireg_sized(*src, mb_rru, *size), @@ -753,6 +887,15 @@ impl ShowWithRRU for Inst { Inst::TrapIf { cc, trap_code, .. } => { format!("j{} ; ud2 {} ;", cc.invert().to_string(), trap_code) } + Inst::LoadExtName { + dst, name, offset, .. + } => format!( + "{} {}+{}, {}", + ljustify("movaps".into()), + name, + offset, + show_ireg_sized(dst.to_reg(), mb_rru, 8), + ), Inst::VirtualSPOffsetAdj { offset } => format!("virtual_sp_offset_adjust {}", offset), Inst::Hlt => "hlt".into(), Inst::Ud2 { trap_info } => format!("ud2 {}", trap_info.1), @@ -774,12 +917,7 @@ fn x64_get_regs(inst: &Inst, collector: &mut RegUsageCollector) { // regalloc.rs will "fix" this for us by removing the the modified set from the use and def // sets. match inst { - Inst::Alu_RMI_R { - is_64: _, - op: _, - src, - dst, - } => { + Inst::Alu_RMI_R { src, dst, .. } => { src.get_regs_as_uses(collector); collector.add_mod(*dst); } @@ -788,16 +926,27 @@ fn x64_get_regs(inst: &Inst, collector: &mut RegUsageCollector) { collector.add_mod(Writable::from_reg(regs::rdx())); divisor.get_regs_as_uses(collector); } - Inst::CheckedDivOrRemSeq { divisor, .. } => { + Inst::MulHi { rhs, .. } => { + collector.add_mod(Writable::from_reg(regs::rax())); + collector.add_def(Writable::from_reg(regs::rdx())); + rhs.get_regs_as_uses(collector); + } + Inst::CheckedDivOrRemSeq { divisor, tmp, .. } => { + // Mark both fixed registers as mods, to avoid an early clobber problem in codegen + // (i.e. the temporary is allocated one of the fixed registers). This requires writing + // the rdx register *before* the instruction, which is not too bad. collector.add_mod(Writable::from_reg(regs::rax())); collector.add_mod(Writable::from_reg(regs::rdx())); collector.add_use(*divisor); + if let Some(tmp) = tmp { + collector.add_def(*tmp); + } } Inst::SignExtendRaxRdx { .. } => { collector.add_use(regs::rax()); collector.add_mod(Writable::from_reg(regs::rdx())); } - Inst::XMM_Mov_RM_R { src, dst, .. } => { + Inst::UnaryRmR { src, dst, .. } | Inst::XMM_Mov_RM_R { src, dst, .. } => { src.get_regs_as_uses(collector); collector.add_def(*dst); } @@ -820,7 +969,7 @@ fn x64_get_regs(inst: &Inst, collector: &mut RegUsageCollector) { src.get_regs_as_uses(collector); collector.add_def(*dst); } - Inst::Mov64_M_R { src, dst } | Inst::LoadEffectiveAddress { addr: src, dst } => { + Inst::Mov64_M_R { src, dst, .. } | Inst::LoadEffectiveAddress { addr: src, dst } => { src.get_regs_as_uses(collector); collector.add_def(*dst) } @@ -832,18 +981,13 @@ fn x64_get_regs(inst: &Inst, collector: &mut RegUsageCollector) { collector.add_use(*src); dst.get_regs_as_uses(collector); } - Inst::Shift_R { - is_64: _, - kind: _, - num_bits, - dst, - } => { + Inst::Shift_R { num_bits, dst, .. } => { if num_bits.is_none() { collector.add_use(regs::rcx()); } collector.add_mod(*dst); } - Inst::Cmp_RMI_R { size: _, src, dst } => { + Inst::Cmp_RMI_R { src, dst, .. } => { src.get_regs_as_uses(collector); collector.add_use(*dst); // yes, really `add_use` } @@ -852,7 +996,7 @@ fn x64_get_regs(inst: &Inst, collector: &mut RegUsageCollector) { } Inst::Cmove { src, dst, .. } => { src.get_regs_as_uses(collector); - collector.add_def(*dst); + collector.add_mod(*dst); } Inst::Push64 { src } => { src.get_regs_as_uses(collector); @@ -891,12 +1035,19 @@ fn x64_get_regs(inst: &Inst, collector: &mut RegUsageCollector) { collector.add_def(*tmp2); } + Inst::JmpUnknown { target } => { + target.get_regs_as_uses(collector); + } + + Inst::LoadExtName { dst, .. } => { + collector.add_def(*dst); + } + Inst::Ret | Inst::EpiloguePlaceholder | Inst::JmpKnown { .. } | Inst::JmpCond { .. } | Inst::Nop { .. } - | Inst::JmpUnknown { .. } | Inst::TrapIf { .. } | Inst::VirtualSPOffsetAdj { .. } | Inst::Hlt @@ -933,15 +1084,11 @@ fn map_mod(m: &RUM, r: &mut Writable) { impl Amode { fn map_uses(&mut self, map: &RUM) { match self { - Amode::ImmReg { - simm32: _, - ref mut base, - } => map_use(map, base), + Amode::ImmReg { ref mut base, .. } => map_use(map, base), Amode::ImmRegRegShift { - simm32: _, ref mut base, ref mut index, - shift: _, + .. } => { map_use(map, base); map_use(map, index); @@ -958,7 +1105,7 @@ impl RegMemImm { match self { RegMemImm::Reg { ref mut reg } => map_use(map, reg), RegMemImm::Mem { ref mut addr } => addr.map_uses(map), - RegMemImm::Imm { simm32: _ } => {} + RegMemImm::Imm { .. } => {} } } } @@ -967,7 +1114,7 @@ impl RegMem { fn map_uses(&mut self, map: &RUM) { match self { RegMem::Reg { ref mut reg } => map_use(map, reg), - RegMem::Mem { ref mut addr } => addr.map_uses(map), + RegMem::Mem { ref mut addr, .. } => addr.map_uses(map), } } } @@ -977,23 +1124,31 @@ fn x64_map_regs(inst: &mut Inst, mapper: &RUM) { match inst { // ** Nop Inst::Alu_RMI_R { - is_64: _, - op: _, ref mut src, ref mut dst, + .. } => { src.map_uses(mapper); map_mod(mapper, dst); } Inst::Div { divisor, .. } => divisor.map_uses(mapper), - Inst::CheckedDivOrRemSeq { divisor, .. } => { + Inst::MulHi { rhs, .. } => rhs.map_uses(mapper), + Inst::CheckedDivOrRemSeq { divisor, tmp, .. } => { map_use(mapper, divisor); + if let Some(tmp) = tmp { + map_def(mapper, tmp) + } } Inst::SignExtendRaxRdx { .. } => {} Inst::XMM_Mov_RM_R { ref mut src, ref mut dst, .. + } + | Inst::UnaryRmR { + ref mut src, + ref mut dst, + .. } => { src.map_uses(mapper); map_def(mapper, dst); @@ -1014,15 +1169,11 @@ fn x64_map_regs(inst: &mut Inst, mapper: &RUM) { map_use(mapper, src); dst.map_uses(mapper); } - Inst::Imm_R { - dst_is_64: _, - simm64: _, - ref mut dst, - } => map_def(mapper, dst), + Inst::Imm_R { ref mut dst, .. } => map_def(mapper, dst), Inst::Mov_R_R { - is_64: _, ref mut src, ref mut dst, + .. } => { map_use(mapper, src); map_def(mapper, dst); @@ -1035,7 +1186,7 @@ fn x64_map_regs(inst: &mut Inst, mapper: &RUM) { src.map_uses(mapper); map_def(mapper, dst); } - Inst::Mov64_M_R { src, dst } | Inst::LoadEffectiveAddress { addr: src, dst } => { + Inst::Mov64_M_R { src, dst, .. } | Inst::LoadEffectiveAddress { addr: src, dst } => { src.map_uses(mapper); map_def(mapper, dst); } @@ -1055,18 +1206,13 @@ fn x64_map_regs(inst: &mut Inst, mapper: &RUM) { map_use(mapper, src); dst.map_uses(mapper); } - Inst::Shift_R { - is_64: _, - kind: _, - num_bits: _, - ref mut dst, - } => { + Inst::Shift_R { ref mut dst, .. } => { map_mod(mapper, dst); } Inst::Cmp_RMI_R { - size: _, ref mut src, ref mut dst, + .. } => { src.map_uses(mapper); map_use(mapper, dst); @@ -1078,7 +1224,7 @@ fn x64_map_regs(inst: &mut Inst, mapper: &RUM) { .. } => { src.map_uses(mapper); - map_def(mapper, dst) + map_mod(mapper, dst) } Inst::Push64 { ref mut src } => src.map_uses(mapper), Inst::Pop64 { ref mut dst } => { @@ -1124,12 +1270,15 @@ fn x64_map_regs(inst: &mut Inst, mapper: &RUM) { map_def(mapper, tmp2); } + Inst::JmpUnknown { ref mut target } => target.map_uses(mapper), + + Inst::LoadExtName { ref mut dst, .. } => map_def(mapper, dst), + Inst::Ret | Inst::EpiloguePlaceholder | Inst::JmpKnown { .. } | Inst::JmpCond { .. } | Inst::Nop { .. } - | Inst::JmpUnknown { .. } | Inst::TrapIf { .. } | Inst::VirtualSPOffsetAdj { .. } | Inst::Ud2 { .. } @@ -1157,8 +1306,10 @@ impl MachInst for Inst { // conceivably use `movl %reg, %reg` to zero out the top 32 bits of // %reg. match self { - Self::Mov_R_R { is_64, src, dst } if *is_64 => Some((*dst, *src)), - Self::XMM_Mov_RM_R { op, src, dst } + Self::Mov_R_R { + is_64, src, dst, .. + } if *is_64 => Some((*dst, *src)), + Self::XMM_Mov_RM_R { op, src, dst, .. } if *op == SseOpcode::Movss || *op == SseOpcode::Movsd || *op == SseOpcode::Movaps => @@ -1187,9 +1338,7 @@ impl MachInst for Inst { &Self::Ret | &Self::EpiloguePlaceholder => MachTerminator::Ret, &Self::JmpKnown { dst } => MachTerminator::Uncond(dst.as_label().unwrap()), &Self::JmpCond { - cc: _, - taken, - not_taken, + taken, not_taken, .. } => MachTerminator::Cond(taken.as_label().unwrap(), not_taken.as_label().unwrap()), &Self::JmpTableSeq { ref targets_for_term, @@ -1208,8 +1357,8 @@ impl MachInst for Inst { match rc_dst { RegClass::I64 => Inst::mov_r_r(true, src_reg, dst_reg), RegClass::V128 => match ty { - F32 => Inst::xmm_mov_rm_r(SseOpcode::Movss, RegMem::reg(src_reg), dst_reg), - F64 => Inst::xmm_mov_rm_r(SseOpcode::Movsd, RegMem::reg(src_reg), dst_reg), + F32 => Inst::xmm_mov_rm_r(SseOpcode::Movss, RegMem::reg(src_reg), dst_reg, None), + F64 => Inst::xmm_mov_rm_r(SseOpcode::Movsd, RegMem::reg(src_reg), dst_reg, None), _ => panic!("unexpected V128 type in gen_move"), }, _ => panic!("gen_move(x64): unhandled regclass"), @@ -1232,6 +1381,7 @@ impl MachInst for Inst { match ty { I8 | I16 | I32 | I64 | B1 | B8 | B16 | B32 | B64 => Ok(RegClass::I64), F32 | F64 | I128 | B128 => Ok(RegClass::V128), + IFLAGS | FFLAGS => Ok(RegClass::I64), _ => Err(CodegenError::Unsupported(format!( "Unexpected SSA-value type: {}", ty @@ -1243,10 +1393,11 @@ impl MachInst for Inst { Inst::jmp_known(BranchTarget::Label(label)) } - fn gen_constant(to_reg: Writable, value: u64, _: Type) -> SmallVec<[Self; 4]> { + fn gen_constant(to_reg: Writable, value: u64, ty: Type) -> SmallVec<[Self; 4]> { let mut ret = SmallVec::new(); - let is64 = value > 0xffff_ffff; - ret.push(Inst::imm_r(is64, value, to_reg)); + debug_assert!(ty.is_int(), "float constants NYI"); + let is_64 = ty == I64 && value > 0x7fffffff; + ret.push(Inst::imm_r(is_64, value, to_reg)); ret } diff --git a/cranelift/codegen/src/isa/x64/inst/regs.rs b/cranelift/codegen/src/isa/x64/inst/regs.rs index 087103b4ac11..4f23ab11b04b 100644 --- a/cranelift/codegen/src/isa/x64/inst/regs.rs +++ b/cranelift/codegen/src/isa/x64/inst/regs.rs @@ -33,46 +33,55 @@ fn gpr(enc: u8, index: u8) -> Reg { } pub(crate) fn r12() -> Reg { - gpr(ENC_R12, 0) + gpr(ENC_R12, 16) } pub(crate) fn r13() -> Reg { - gpr(ENC_R13, 1) + gpr(ENC_R13, 17) } pub(crate) fn r14() -> Reg { - gpr(ENC_R14, 2) -} -pub(crate) fn r15() -> Reg { - gpr(ENC_R15, 3) + gpr(ENC_R14, 18) } pub(crate) fn rbx() -> Reg { - gpr(ENC_RBX, 4) + gpr(ENC_RBX, 19) } pub(crate) fn rsi() -> Reg { - gpr(6, 5) + gpr(6, 20) } pub(crate) fn rdi() -> Reg { - gpr(7, 6) + gpr(7, 21) } pub(crate) fn rax() -> Reg { - gpr(0, 7) + gpr(0, 22) } pub(crate) fn rcx() -> Reg { - gpr(1, 8) + gpr(1, 23) } pub(crate) fn rdx() -> Reg { - gpr(2, 9) + gpr(2, 24) } pub(crate) fn r8() -> Reg { - gpr(8, 10) + gpr(8, 25) } pub(crate) fn r9() -> Reg { - gpr(9, 11) + gpr(9, 26) } pub(crate) fn r10() -> Reg { - gpr(10, 12) + gpr(10, 27) } pub(crate) fn r11() -> Reg { - gpr(11, 13) + gpr(11, 28) +} + +pub(crate) fn r15() -> Reg { + // r15 is put aside since this is the pinned register. + gpr(ENC_R15, 29) +} + +/// The pinned register on this architecture. +/// It must be the same as Spidermonkey's HeapReg, as found in this file. +/// https://searchfox.org/mozilla-central/source/js/src/jit/x64/Assembler-x64.h#99 +pub(crate) fn pinned_reg() -> Reg { + r15() } fn fpr(enc: u8, index: u8) -> Reg { @@ -80,52 +89,52 @@ fn fpr(enc: u8, index: u8) -> Reg { } pub(crate) fn xmm0() -> Reg { - fpr(0, 14) + fpr(0, 0) } pub(crate) fn xmm1() -> Reg { - fpr(1, 15) + fpr(1, 1) } pub(crate) fn xmm2() -> Reg { - fpr(2, 16) + fpr(2, 2) } pub(crate) fn xmm3() -> Reg { - fpr(3, 17) + fpr(3, 3) } pub(crate) fn xmm4() -> Reg { - fpr(4, 18) + fpr(4, 4) } pub(crate) fn xmm5() -> Reg { - fpr(5, 19) + fpr(5, 5) } pub(crate) fn xmm6() -> Reg { - fpr(6, 20) + fpr(6, 6) } pub(crate) fn xmm7() -> Reg { - fpr(7, 21) + fpr(7, 7) } pub(crate) fn xmm8() -> Reg { - fpr(8, 22) + fpr(8, 8) } pub(crate) fn xmm9() -> Reg { - fpr(9, 23) + fpr(9, 9) } pub(crate) fn xmm10() -> Reg { - fpr(10, 24) + fpr(10, 10) } pub(crate) fn xmm11() -> Reg { - fpr(11, 25) + fpr(11, 11) } pub(crate) fn xmm12() -> Reg { - fpr(12, 26) + fpr(12, 12) } pub(crate) fn xmm13() -> Reg { - fpr(13, 27) + fpr(13, 13) } pub(crate) fn xmm14() -> Reg { - fpr(14, 28) + fpr(14, 14) } pub(crate) fn xmm15() -> Reg { - fpr(15, 29) + fpr(15, 15) } pub(crate) fn rsp() -> Reg { @@ -139,18 +148,40 @@ pub(crate) fn rbp() -> Reg { /// /// The ordering of registers matters, as commented in the file doc comment: assumes the /// calling-convention is SystemV, at the moment. -pub(crate) fn create_reg_universe_systemv(_flags: &settings::Flags) -> RealRegUniverse { +pub(crate) fn create_reg_universe_systemv(flags: &settings::Flags) -> RealRegUniverse { let mut regs = Vec::<(RealReg, String)>::new(); let mut allocable_by_class = [None; NUM_REG_CLASSES]; + let use_pinned_reg = flags.enable_pinned_reg(); + + // XMM registers + let first_fpr = regs.len(); + regs.push((xmm0().to_real_reg(), "%xmm0".into())); + regs.push((xmm1().to_real_reg(), "%xmm1".into())); + regs.push((xmm2().to_real_reg(), "%xmm2".into())); + regs.push((xmm3().to_real_reg(), "%xmm3".into())); + regs.push((xmm4().to_real_reg(), "%xmm4".into())); + regs.push((xmm5().to_real_reg(), "%xmm5".into())); + regs.push((xmm6().to_real_reg(), "%xmm6".into())); + regs.push((xmm7().to_real_reg(), "%xmm7".into())); + regs.push((xmm8().to_real_reg(), "%xmm8".into())); + regs.push((xmm9().to_real_reg(), "%xmm9".into())); + regs.push((xmm10().to_real_reg(), "%xmm10".into())); + regs.push((xmm11().to_real_reg(), "%xmm11".into())); + regs.push((xmm12().to_real_reg(), "%xmm12".into())); + regs.push((xmm13().to_real_reg(), "%xmm13".into())); + regs.push((xmm14().to_real_reg(), "%xmm14".into())); + regs.push((xmm15().to_real_reg(), "%xmm15".into())); + let last_fpr = regs.len() - 1; + // Integer regs. - let mut base = regs.len(); + let first_gpr = regs.len(); // Callee-saved, in the SystemV x86_64 ABI. regs.push((r12().to_real_reg(), "%r12".into())); regs.push((r13().to_real_reg(), "%r13".into())); regs.push((r14().to_real_reg(), "%r14".into())); - regs.push((r15().to_real_reg(), "%r15".into())); + regs.push((rbx().to_real_reg(), "%rbx".into())); // Caller-saved, in the SystemV x86_64 ABI. @@ -164,41 +195,38 @@ pub(crate) fn create_reg_universe_systemv(_flags: &settings::Flags) -> RealRegUn regs.push((r10().to_real_reg(), "%r10".into())); regs.push((r11().to_real_reg(), "%r11".into())); + // Other regs, not available to the allocator. + debug_assert_eq!(r15(), pinned_reg()); + let allocable = if use_pinned_reg { + // The pinned register is not allocatable in this case, so record the length before adding + // it. + let len = regs.len(); + regs.push((r15().to_real_reg(), "%r15/pinned".into())); + len + } else { + regs.push((r15().to_real_reg(), "%r15".into())); + regs.len() + }; + let last_gpr = allocable - 1; + + regs.push((rsp().to_real_reg(), "%rsp".into())); + regs.push((rbp().to_real_reg(), "%rbp".into())); + allocable_by_class[RegClass::I64.rc_to_usize()] = Some(RegClassInfo { - first: base, - last: regs.len() - 1, + first: first_gpr, + last: last_gpr, suggested_scratch: Some(r12().get_index()), }); - - // XMM registers - base = regs.len(); - regs.push((xmm0().to_real_reg(), "%xmm0".into())); - regs.push((xmm1().to_real_reg(), "%xmm1".into())); - regs.push((xmm2().to_real_reg(), "%xmm2".into())); - regs.push((xmm3().to_real_reg(), "%xmm3".into())); - regs.push((xmm4().to_real_reg(), "%xmm4".into())); - regs.push((xmm5().to_real_reg(), "%xmm5".into())); - regs.push((xmm6().to_real_reg(), "%xmm6".into())); - regs.push((xmm7().to_real_reg(), "%xmm7".into())); - regs.push((xmm8().to_real_reg(), "%xmm8".into())); - regs.push((xmm9().to_real_reg(), "%xmm9".into())); - regs.push((xmm10().to_real_reg(), "%xmm10".into())); - regs.push((xmm11().to_real_reg(), "%xmm11".into())); - regs.push((xmm12().to_real_reg(), "%xmm12".into())); - regs.push((xmm13().to_real_reg(), "%xmm13".into())); - regs.push((xmm14().to_real_reg(), "%xmm14".into())); - regs.push((xmm15().to_real_reg(), "%xmm15".into())); - allocable_by_class[RegClass::V128.rc_to_usize()] = Some(RegClassInfo { - first: base, - last: regs.len() - 1, + first: first_fpr, + last: last_fpr, suggested_scratch: Some(xmm15().get_index()), }); - // Other regs, not available to the allocator. - let allocable = regs.len(); - regs.push((rsp().to_real_reg(), "%rsp".into())); - regs.push((rbp().to_real_reg(), "%rbp".into())); + // Sanity-check: the index passed to the Reg ctor must match the order in the register list. + for (i, reg) in regs.iter().enumerate() { + assert_eq!(i, reg.0.get_index()); + } RealRegUniverse { regs, diff --git a/cranelift/codegen/src/isa/x64/lower.rs b/cranelift/codegen/src/isa/x64/lower.rs index 3228c8d5ab70..4e4074e6f573 100644 --- a/cranelift/codegen/src/isa/x64/lower.rs +++ b/cranelift/codegen/src/isa/x64/lower.rs @@ -6,6 +6,7 @@ use log::trace; use regalloc::{Reg, RegClass, Writable}; use smallvec::SmallVec; +use alloc::boxed::Box; use alloc::vec::Vec; use std::convert::TryFrom; @@ -120,12 +121,55 @@ struct InsnOutput { output: usize, } -fn input_to_reg<'a>(ctx: Ctx<'a>, spec: InsnInput) -> Reg { +fn input_to_reg(ctx: Ctx, spec: InsnInput) -> Reg { let inputs = ctx.get_input(spec.insn, spec.input); ctx.use_input_reg(inputs); inputs.reg } +enum ExtSpec { + ZeroExtendTo32, + ZeroExtendTo64, + SignExtendTo32, + SignExtendTo64, +} + +fn extend_input_to_reg(ctx: Ctx, spec: InsnInput, ext_spec: ExtSpec) -> Reg { + let requested_size = match ext_spec { + ExtSpec::ZeroExtendTo32 | ExtSpec::SignExtendTo32 => 32, + ExtSpec::ZeroExtendTo64 | ExtSpec::SignExtendTo64 => 64, + }; + let input_size = ctx.input_ty(spec.insn, spec.input).bits(); + + let ext_mode = match (input_size, requested_size) { + (a, b) if a == b => return input_to_reg(ctx, spec), + (a, 32) if a == 1 || a == 8 => ExtMode::BL, + (a, 64) if a == 1 || a == 8 => ExtMode::BQ, + (16, 32) => ExtMode::WL, + (16, 64) => ExtMode::WQ, + (32, 64) => ExtMode::LQ, + _ => unreachable!(), + }; + + let requested_ty = if requested_size == 32 { I32 } else { I64 }; + + let src = input_to_reg_mem(ctx, spec); + let dst = ctx.alloc_tmp(RegClass::I64, requested_ty); + match ext_spec { + ExtSpec::ZeroExtendTo32 | ExtSpec::ZeroExtendTo64 => { + ctx.emit(Inst::movzx_rm_r( + ext_mode, src, dst, /* infallible */ None, + )) + } + ExtSpec::SignExtendTo32 | ExtSpec::SignExtendTo64 => { + ctx.emit(Inst::movsx_rm_r( + ext_mode, src, dst, /* infallible */ None, + )) + } + } + dst.to_reg() +} + fn input_to_reg_mem(ctx: Ctx, spec: InsnInput) -> RegMem { // TODO handle memory. RegMem::reg(input_to_reg(ctx, spec)) @@ -135,11 +179,11 @@ fn input_to_reg_mem(ctx: Ctx, spec: InsnInput) -> RegMem { /// TODO: handle memory as well! fn input_to_reg_mem_imm(ctx: Ctx, spec: InsnInput) -> RegMemImm { let imm = ctx.get_input(spec.insn, spec.input).constant.and_then(|x| { - let as_u32 = x as u32; - let extended = as_u32 as u64; - // If the truncation and sign-extension don't change the value, use it. - if extended == x { - Some(as_u32) + // For i64 instructions (prefixed with REX.W), require that the immediate will sign-extend + // to 64 bits. For other sizes, it doesn't matter and we can just use the plain + // constant. + if ctx.input_ty(spec.insn, spec.input).bytes() != 8 || low32_will_sign_extend_to_64(x) { + Some(x as u32) } else { None } @@ -150,7 +194,7 @@ fn input_to_reg_mem_imm(ctx: Ctx, spec: InsnInput) -> RegMemImm { } } -fn output_to_reg<'a>(ctx: Ctx<'a>, spec: InsnOutput) -> Writable { +fn output_to_reg(ctx: Ctx, spec: InsnOutput) -> Writable { ctx.get_output(spec.insn, spec.output) } @@ -195,9 +239,7 @@ fn lower_insn_to_regs>( match op { Opcode::Iconst => { if let Some(w64) = iri_to_u64_imm(ctx, insn) { - // Get exactly the bit pattern in 'w64' into the dest. No - // monkeying with sign extension etc. - let dst_is_64 = w64 > 0xFFFF_FFFF; + let dst_is_64 = w64 > 0x7fffffff; let dst = output_to_reg(ctx, outputs[0]); ctx.emit(Inst::imm_r(dst_is_64, w64, dst)); } else { @@ -228,28 +270,407 @@ fn lower_insn_to_regs>( ctx.emit(Inst::alu_rmi_r(is_64, alu_op, rhs, dst)); } - Opcode::Ishl | Opcode::Ushr | Opcode::Sshr => { - // TODO: implement imm shift value into insn + Opcode::Ishl | Opcode::Ushr | Opcode::Sshr | Opcode::Rotl | Opcode::Rotr => { let dst_ty = ctx.output_ty(insn, 0); - assert_eq!(ctx.input_ty(insn, 0), dst_ty); - assert!(dst_ty == types::I32 || dst_ty == types::I64); + debug_assert_eq!(ctx.input_ty(insn, 0), dst_ty); + debug_assert!(dst_ty == types::I32 || dst_ty == types::I64); let lhs = input_to_reg(ctx, inputs[0]); - let rhs = input_to_reg(ctx, inputs[1]); + + let (count, rhs) = if let Some(cst) = ctx.get_constant(inputs[1].insn) { + let cst = if op == Opcode::Rotl || op == Opcode::Rotr { + // Mask rotation count, according to Cranelift's semantics. + (cst as u8) & (dst_ty.bits() as u8 - 1) + } else { + cst as u8 + }; + (Some(cst), None) + } else { + (None, Some(input_to_reg(ctx, inputs[1]))) + }; + let dst = output_to_reg(ctx, outputs[0]); let shift_kind = match op { - Opcode::Ishl => ShiftKind::Left, - Opcode::Ushr => ShiftKind::RightZ, - Opcode::Sshr => ShiftKind::RightS, + Opcode::Ishl => ShiftKind::ShiftLeft, + Opcode::Ushr => ShiftKind::ShiftRightLogical, + Opcode::Sshr => ShiftKind::ShiftRightArithmetic, + Opcode::Rotl => ShiftKind::RotateLeft, + Opcode::Rotr => ShiftKind::RotateRight, _ => unreachable!(), }; let is_64 = dst_ty == types::I64; let w_rcx = Writable::from_reg(regs::rcx()); ctx.emit(Inst::mov_r_r(true, lhs, dst)); - ctx.emit(Inst::mov_r_r(true, rhs, w_rcx)); - ctx.emit(Inst::shift_r(is_64, shift_kind, None /*%cl*/, dst)); + if count.is_none() { + ctx.emit(Inst::mov_r_r(true, rhs.unwrap(), w_rcx)); + } + ctx.emit(Inst::shift_r(is_64, shift_kind, count, dst)); + } + + Opcode::Clz => { + // TODO when the x86 flags have use_lzcnt, we can use LZCNT. + + // General formula using bit-scan reverse (BSR): + // mov -1, %dst + // bsr %src, %tmp + // cmovz %dst, %tmp + // mov $(size_bits - 1), %dst + // sub %tmp, %dst + + let (ext_spec, ty) = match ctx.input_ty(insn, 0) { + I8 | I16 => (Some(ExtSpec::ZeroExtendTo32), I32), + a if a == I32 || a == I64 => (None, a), + _ => unreachable!(), + }; + + let src = if let Some(ext_spec) = ext_spec { + RegMem::reg(extend_input_to_reg(ctx, inputs[0], ext_spec)) + } else { + input_to_reg_mem(ctx, inputs[0]) + }; + let dst = output_to_reg(ctx, outputs[0]); + + let tmp = ctx.alloc_tmp(RegClass::I64, ty); + ctx.emit(Inst::imm_r(ty == I64, u64::max_value(), dst)); + + ctx.emit(Inst::unary_rm_r( + ty.bytes() as u8, + UnaryRmROpcode::Bsr, + src, + tmp, + )); + + ctx.emit(Inst::cmove( + ty.bytes() as u8, + CC::Z, + RegMem::reg(dst.to_reg()), + tmp, + )); + + ctx.emit(Inst::imm_r(ty == I64, ty.bits() as u64 - 1, dst)); + + ctx.emit(Inst::alu_rmi_r( + ty == I64, + AluRmiROpcode::Sub, + RegMemImm::reg(tmp.to_reg()), + dst, + )); + } + + Opcode::Ctz => { + // TODO when the x86 flags have use_bmi1, we can use TZCNT. + + // General formula using bit-scan forward (BSF): + // bsf %src, %dst + // mov $(size_bits), %tmp + // cmovz %tmp, %dst + let ty = ctx.input_ty(insn, 0); + let ty = if ty.bits() < 32 { I32 } else { ty }; + debug_assert!(ty == I32 || ty == I64); + + let src = input_to_reg_mem(ctx, inputs[0]); + let dst = output_to_reg(ctx, outputs[0]); + + let tmp = ctx.alloc_tmp(RegClass::I64, ty); + ctx.emit(Inst::imm_r(false /* 64 bits */, ty.bits() as u64, tmp)); + + ctx.emit(Inst::unary_rm_r( + ty.bytes() as u8, + UnaryRmROpcode::Bsf, + src, + dst, + )); + + ctx.emit(Inst::cmove( + ty.bytes() as u8, + CC::Z, + RegMem::reg(tmp.to_reg()), + dst, + )); + } + + Opcode::Popcnt => { + // TODO when the x86 flags have use_popcnt, we can use the popcnt instruction. + + let (ext_spec, ty) = match ctx.input_ty(insn, 0) { + I8 | I16 => (Some(ExtSpec::ZeroExtendTo32), I32), + a if a == I32 || a == I64 => (None, a), + _ => unreachable!(), + }; + + let src = if let Some(ext_spec) = ext_spec { + RegMem::reg(extend_input_to_reg(ctx, inputs[0], ext_spec)) + } else { + input_to_reg_mem(ctx, inputs[0]) + }; + let dst = output_to_reg(ctx, outputs[0]); + + if ty == I64 { + let is_64 = true; + + let tmp1 = ctx.alloc_tmp(RegClass::I64, I64); + let tmp2 = ctx.alloc_tmp(RegClass::I64, I64); + let cst = ctx.alloc_tmp(RegClass::I64, I64); + + // mov src, tmp1 + ctx.emit(Inst::mov64_rm_r(src.clone(), tmp1, None)); + + // shr $1, tmp1 + ctx.emit(Inst::shift_r( + is_64, + ShiftKind::ShiftRightLogical, + Some(1), + tmp1, + )); + + // mov 0x7777_7777_7777_7777, cst + ctx.emit(Inst::imm_r(is_64, 0x7777777777777777, cst)); + + // andq cst, tmp1 + ctx.emit(Inst::alu_rmi_r( + is_64, + AluRmiROpcode::And, + RegMemImm::reg(cst.to_reg()), + tmp1, + )); + + // mov src, tmp2 + ctx.emit(Inst::mov64_rm_r(src, tmp2, None)); + + // sub tmp1, tmp2 + ctx.emit(Inst::alu_rmi_r( + is_64, + AluRmiROpcode::Sub, + RegMemImm::reg(tmp1.to_reg()), + tmp2, + )); + + // shr $1, tmp1 + ctx.emit(Inst::shift_r( + is_64, + ShiftKind::ShiftRightLogical, + Some(1), + tmp1, + )); + + // and cst, tmp1 + ctx.emit(Inst::alu_rmi_r( + is_64, + AluRmiROpcode::And, + RegMemImm::reg(cst.to_reg()), + tmp1, + )); + + // sub tmp1, tmp2 + ctx.emit(Inst::alu_rmi_r( + is_64, + AluRmiROpcode::Sub, + RegMemImm::reg(tmp1.to_reg()), + tmp2, + )); + + // shr $1, tmp1 + ctx.emit(Inst::shift_r( + is_64, + ShiftKind::ShiftRightLogical, + Some(1), + tmp1, + )); + + // and cst, tmp1 + ctx.emit(Inst::alu_rmi_r( + is_64, + AluRmiROpcode::And, + RegMemImm::reg(cst.to_reg()), + tmp1, + )); + + // sub tmp1, tmp2 + ctx.emit(Inst::alu_rmi_r( + is_64, + AluRmiROpcode::Sub, + RegMemImm::reg(tmp1.to_reg()), + tmp2, + )); + + // mov tmp2, dst + ctx.emit(Inst::mov64_rm_r(RegMem::reg(tmp2.to_reg()), dst, None)); + + // shr $4, dst + ctx.emit(Inst::shift_r( + is_64, + ShiftKind::ShiftRightLogical, + Some(4), + dst, + )); + + // add tmp2, dst + ctx.emit(Inst::alu_rmi_r( + is_64, + AluRmiROpcode::Add, + RegMemImm::reg(tmp2.to_reg()), + dst, + )); + + // mov $0x0F0F_0F0F_0F0F_0F0F, cst + ctx.emit(Inst::imm_r(is_64, 0x0F0F0F0F0F0F0F0F, cst)); + + // and cst, dst + ctx.emit(Inst::alu_rmi_r( + is_64, + AluRmiROpcode::And, + RegMemImm::reg(cst.to_reg()), + dst, + )); + + // mov $0x0101_0101_0101_0101, cst + ctx.emit(Inst::imm_r(is_64, 0x0101010101010101, cst)); + + // mul cst, dst + ctx.emit(Inst::alu_rmi_r( + is_64, + AluRmiROpcode::Mul, + RegMemImm::reg(cst.to_reg()), + dst, + )); + + // shr $56, dst + ctx.emit(Inst::shift_r( + is_64, + ShiftKind::ShiftRightLogical, + Some(56), + dst, + )); + } else { + assert_eq!(ty, I32); + let is_64 = false; + + let tmp1 = ctx.alloc_tmp(RegClass::I64, I64); + let tmp2 = ctx.alloc_tmp(RegClass::I64, I64); + + // mov src, tmp1 + ctx.emit(Inst::mov64_rm_r(src.clone(), tmp1, None)); + + // shr $1, tmp1 + ctx.emit(Inst::shift_r( + is_64, + ShiftKind::ShiftRightLogical, + Some(1), + tmp1, + )); + + // andq $0x7777_7777, tmp1 + ctx.emit(Inst::alu_rmi_r( + is_64, + AluRmiROpcode::And, + RegMemImm::imm(0x77777777), + tmp1, + )); + + // mov src, tmp2 + ctx.emit(Inst::mov64_rm_r(src, tmp2, None)); + + // sub tmp1, tmp2 + ctx.emit(Inst::alu_rmi_r( + is_64, + AluRmiROpcode::Sub, + RegMemImm::reg(tmp1.to_reg()), + tmp2, + )); + + // shr $1, tmp1 + ctx.emit(Inst::shift_r( + is_64, + ShiftKind::ShiftRightLogical, + Some(1), + tmp1, + )); + + // and 0x7777_7777, tmp1 + ctx.emit(Inst::alu_rmi_r( + is_64, + AluRmiROpcode::And, + RegMemImm::imm(0x77777777), + tmp1, + )); + + // sub tmp1, tmp2 + ctx.emit(Inst::alu_rmi_r( + is_64, + AluRmiROpcode::Sub, + RegMemImm::reg(tmp1.to_reg()), + tmp2, + )); + + // shr $1, tmp1 + ctx.emit(Inst::shift_r( + is_64, + ShiftKind::ShiftRightLogical, + Some(1), + tmp1, + )); + + // and $0x7777_7777, tmp1 + ctx.emit(Inst::alu_rmi_r( + is_64, + AluRmiROpcode::And, + RegMemImm::imm(0x77777777), + tmp1, + )); + + // sub tmp1, tmp2 + ctx.emit(Inst::alu_rmi_r( + is_64, + AluRmiROpcode::Sub, + RegMemImm::reg(tmp1.to_reg()), + tmp2, + )); + + // mov tmp2, dst + ctx.emit(Inst::mov64_rm_r(RegMem::reg(tmp2.to_reg()), dst, None)); + + // shr $4, dst + ctx.emit(Inst::shift_r( + is_64, + ShiftKind::ShiftRightLogical, + Some(4), + dst, + )); + + // add tmp2, dst + ctx.emit(Inst::alu_rmi_r( + is_64, + AluRmiROpcode::Add, + RegMemImm::reg(tmp2.to_reg()), + dst, + )); + + // and $0x0F0F_0F0F, dst + ctx.emit(Inst::alu_rmi_r( + is_64, + AluRmiROpcode::And, + RegMemImm::imm(0x0F0F0F0F), + dst, + )); + + // mul $0x0101_0101, dst + ctx.emit(Inst::alu_rmi_r( + is_64, + AluRmiROpcode::Mul, + RegMemImm::imm(0x01010101), + dst, + )); + + // shr $24, dst + ctx.emit(Inst::shift_r( + is_64, + ShiftKind::ShiftRightLogical, + Some(24), + dst, + )); + } } Opcode::Uextend @@ -261,37 +682,46 @@ fn lower_insn_to_regs>( let src_ty = ctx.input_ty(insn, 0); let dst_ty = ctx.output_ty(insn, 0); - // TODO: if the source operand is a load, incorporate that. - let src = input_to_reg(ctx, inputs[0]); + let src = input_to_reg_mem(ctx, inputs[0]); let dst = output_to_reg(ctx, outputs[0]); let ext_mode = match (src_ty.bits(), dst_ty.bits()) { - (1, 32) | (8, 32) => ExtMode::BL, - (1, 64) | (8, 64) => ExtMode::BQ, - (16, 32) => ExtMode::WL, - (16, 64) => ExtMode::WQ, - (32, 64) => ExtMode::LQ, + (1, 32) | (8, 32) => Some(ExtMode::BL), + (1, 64) | (8, 64) => Some(ExtMode::BQ), + (16, 32) => Some(ExtMode::WL), + (16, 64) => Some(ExtMode::WQ), + (32, 64) => Some(ExtMode::LQ), + (x, y) if x >= y => None, _ => unreachable!( "unexpected extension kind from {:?} to {:?}", src_ty, dst_ty ), }; - if op == Opcode::Sextend { - ctx.emit(Inst::movsx_rm_r(ext_mode, RegMem::reg(src), dst)); + // All of these other opcodes are simply a move from a zero-extended source. Here + // is why this works, in each case: + // + // - Bint: Bool-to-int. We always represent a bool as a 0 or 1, so we + // merely need to zero-extend here. + // + // - Breduce, Bextend: changing width of a boolean. We represent a + // bool as a 0 or 1, so again, this is a zero-extend / no-op. + // + // - Ireduce: changing width of an integer. Smaller ints are stored + // with undefined high-order bits, so we can simply do a copy. + + if let Some(ext_mode) = ext_mode { + if op == Opcode::Sextend { + ctx.emit(Inst::movsx_rm_r( + ext_mode, src, dst, /* infallible */ None, + )); + } else { + ctx.emit(Inst::movzx_rm_r( + ext_mode, src, dst, /* infallible */ None, + )); + } } else { - // All of these other opcodes are simply a move from a zero-extended source. Here - // is why this works, in each case: - // - // - Bint: Bool-to-int. We always represent a bool as a 0 or 1, so we - // merely need to zero-extend here. - // - // - Breduce, Bextend: changing width of a boolean. We represent a - // bool as a 0 or 1, so again, this is a zero-extend / no-op. - // - // - Ireduce: changing width of an integer. Smaller ints are stored - // with undefined high-order bits, so we can simply do a copy. - ctx.emit(Inst::movzx_rm_r(ext_mode, RegMem::reg(src), dst)); + ctx.emit(Inst::mov64_rm_r(src, dst, /* infallible */ None)); } } @@ -308,15 +738,8 @@ fn lower_insn_to_regs>( for i in 0..ctx.num_inputs(insn) { let src_reg = input_to_reg(ctx, inputs[i]); let retval_reg = ctx.retval(i); - if src_reg.get_class() == RegClass::I64 { - ctx.emit(Inst::mov_r_r(true, src_reg, retval_reg)); - } else if src_reg.get_class() == RegClass::V128 { - ctx.emit(Inst::xmm_mov_rm_r( - SseOpcode::Movsd, - RegMem::reg(src_reg), - retval_reg, - )); - } + let ty = ctx.input_ty(insn, i); + ctx.emit(Inst::gen_move(retval_reg, src_reg, ty)); } // N.B.: the Ret itself is generated by the ABI. } @@ -364,7 +787,7 @@ fn lower_insn_to_regs>( ctx.emit(Inst::Hlt); } - Opcode::Trap => { + Opcode::Trap | Opcode::ResumableTrap => { let trap_info = (ctx.srcloc(insn), inst_trapcode(ctx.data(insn)).unwrap()); ctx.emit(Inst::Ud2 { trap_info }) } @@ -383,7 +806,12 @@ fn lower_insn_to_regs>( // TODO Fmax, Fmin. _ => unimplemented!(), }; - ctx.emit(Inst::xmm_mov_rm_r(SseOpcode::Movss, RegMem::reg(lhs), dst)); + ctx.emit(Inst::xmm_mov_rm_r( + SseOpcode::Movss, + RegMem::reg(lhs), + dst, + None, + )); ctx.emit(Inst::xmm_rm_r(sse_op, RegMem::reg(rhs), dst)); } else { unimplemented!("unimplemented lowering for opcode {:?}", op); @@ -410,17 +838,20 @@ fn lower_insn_to_regs>( SseOpcode::Movd, RegMem::reg(tmp_gpr1.to_reg()), tmp_xmm1, + None, )); ctx.emit(Inst::xmm_mov_rm_r( SseOpcode::Movaps, RegMem::reg(tmp_xmm1.to_reg()), dst, + None, )); ctx.emit(Inst::xmm_rm_r(SseOpcode::Andnps, RegMem::reg(lhs), dst)); ctx.emit(Inst::xmm_mov_rm_r( SseOpcode::Movss, RegMem::reg(rhs), tmp_xmm2, + None, )); ctx.emit(Inst::xmm_rm_r( SseOpcode::Andps, @@ -521,25 +952,37 @@ fn lower_insn_to_regs>( _ => unreachable!(), }; + let srcloc = Some(ctx.srcloc(insn)); + let dst = output_to_reg(ctx, outputs[0]); match (sign_extend, is_float) { (true, false) => { // The load is sign-extended only when the output size is lower than 64 bits, // so ext-mode is defined in this case. - ctx.emit(Inst::movsx_rm_r(ext_mode.unwrap(), RegMem::mem(addr), dst)); + ctx.emit(Inst::movsx_rm_r( + ext_mode.unwrap(), + RegMem::mem(addr), + dst, + srcloc, + )); } (false, false) => { if elem_ty.bytes() == 8 { // Use a plain load. - ctx.emit(Inst::mov64_m_r(addr, dst)) + ctx.emit(Inst::mov64_m_r(addr, dst, srcloc)) } else { // Use a zero-extended load. - ctx.emit(Inst::movzx_rm_r(ext_mode.unwrap(), RegMem::mem(addr), dst)) + ctx.emit(Inst::movzx_rm_r( + ext_mode.unwrap(), + RegMem::mem(addr), + dst, + srcloc, + )) } } (_, true) => { ctx.emit(match elem_ty { - F32 => Inst::xmm_mov_rm_r(SseOpcode::Movss, RegMem::mem(addr), dst), + F32 => Inst::xmm_mov_rm_r(SseOpcode::Movss, RegMem::mem(addr), dst, srcloc), _ => unimplemented!("FP load not 32-bit"), }); } @@ -595,16 +1038,44 @@ fn lower_insn_to_regs>( let src = input_to_reg(ctx, inputs[0]); + let srcloc = Some(ctx.srcloc(insn)); + if is_float { ctx.emit(match elem_ty { - F32 => Inst::xmm_mov_r_m(SseOpcode::Movss, src, addr), + F32 => Inst::xmm_mov_r_m(SseOpcode::Movss, src, addr, srcloc), _ => unimplemented!("FP store not 32-bit"), }); } else { - ctx.emit(Inst::mov_r_m(elem_ty.bytes() as u8, src, addr)); + ctx.emit(Inst::mov_r_m(elem_ty.bytes() as u8, src, addr, srcloc)); } } + Opcode::FuncAddr => { + let dst = output_to_reg(ctx, outputs[0]); + let (extname, _) = ctx.call_target(insn).unwrap(); + let extname = extname.clone(); + let loc = ctx.srcloc(insn); + ctx.emit(Inst::LoadExtName { + dst, + name: Box::new(extname), + srcloc: loc, + offset: 0, + }); + } + + Opcode::SymbolValue => { + let dst = output_to_reg(ctx, outputs[0]); + let (extname, _, offset) = ctx.symbol_value(insn).unwrap(); + let extname = extname.clone(); + let loc = ctx.srcloc(insn); + ctx.emit(Inst::LoadExtName { + dst, + name: Box::new(extname), + srcloc: loc, + offset, + }); + } + Opcode::StackAddr => { let (stack_slot, offset) = match *ctx.data(insn) { InstructionData::StackLoad { @@ -616,7 +1087,6 @@ fn lower_insn_to_regs>( }; let dst = output_to_reg(ctx, outputs[0]); let offset: i32 = offset.into(); - println!("stackslot_addr: {:?} @ off{}", stack_slot, offset); let inst = ctx .abi() .stackslot_addr(stack_slot, u32::try_from(offset).unwrap(), dst); @@ -655,8 +1125,8 @@ fn lower_insn_to_regs>( if size == 1 { // Sign-extend operands to 32, then do a cmove of size 4. let lhs_se = ctx.alloc_tmp(RegClass::I64, I32); - ctx.emit(Inst::movsx_rm_r(ExtMode::BL, lhs, lhs_se)); - ctx.emit(Inst::movsx_rm_r(ExtMode::BL, RegMem::reg(rhs), dst)); + ctx.emit(Inst::movsx_rm_r(ExtMode::BL, lhs, lhs_se, None)); + ctx.emit(Inst::movsx_rm_r(ExtMode::BL, RegMem::reg(rhs), dst, None)); ctx.emit(Inst::cmove(4, cc, RegMem::reg(lhs_se.to_reg()), dst)); } else { ctx.emit(Inst::gen_move(dst, rhs, ty)); @@ -665,8 +1135,14 @@ fn lower_insn_to_regs>( } Opcode::Udiv | Opcode::Urem | Opcode::Sdiv | Opcode::Srem => { - let is_div = op == Opcode::Udiv || op == Opcode::Sdiv; - let is_signed = op == Opcode::Sdiv || op == Opcode::Srem; + let kind = match op { + Opcode::Udiv => DivOrRemKind::UnsignedDiv, + Opcode::Sdiv => DivOrRemKind::SignedDiv, + Opcode::Urem => DivOrRemKind::UnsignedRem, + Opcode::Srem => DivOrRemKind::SignedRem, + _ => unreachable!(), + }; + let is_div = kind.is_div(); let input_ty = ctx.input_ty(insn, 0); let size = input_ty.bytes() as u8; @@ -686,22 +1162,28 @@ fn lower_insn_to_regs>( // pc-relative offsets that must not change, thus requiring regalloc to not // interfere by introducing spills and reloads. // - // Note it keeps the result in $rax (if is_div) or $rdx (if !is_div), so that + // Note it keeps the result in $rax (for divide) or $rdx (for rem), so that // regalloc is aware of the coalescing opportunity between rax/rdx and the // destination register. let divisor = input_to_reg(ctx, inputs[1]); + let tmp = if op == Opcode::Sdiv && size == 8 { + Some(ctx.alloc_tmp(RegClass::I64, I64)) + } else { + None + }; + ctx.emit(Inst::imm_r(true, 0, Writable::from_reg(regs::rdx()))); ctx.emit(Inst::CheckedDivOrRemSeq { - is_div, - is_signed, + kind, size, divisor, + tmp, loc: srcloc, }); } else { let divisor = input_to_reg_mem(ctx, inputs[1]); // Fill in the high parts: - if is_signed { + if kind.is_signed() { // sign-extend the sign-bit of rax into rdx, for signed opcodes. ctx.emit(Inst::sign_extend_rax_to_rdx(size)); } else { @@ -714,7 +1196,7 @@ fn lower_insn_to_regs>( } // Emit the actual idiv. - ctx.emit(Inst::div(size, is_signed, divisor, ctx.srcloc(insn))); + ctx.emit(Inst::div(size, kind.is_signed(), divisor, ctx.srcloc(insn))); } // Move the result back into the destination reg. @@ -727,6 +1209,43 @@ fn lower_insn_to_regs>( } } + Opcode::Umulhi | Opcode::Smulhi => { + let input_ty = ctx.input_ty(insn, 0); + let size = input_ty.bytes() as u8; + + let lhs = input_to_reg(ctx, inputs[0]); + let rhs = input_to_reg_mem(ctx, inputs[1]); + let dst = output_to_reg(ctx, outputs[0]); + + // Move lhs in %rax. + ctx.emit(Inst::gen_move( + Writable::from_reg(regs::rax()), + lhs, + input_ty, + )); + + // Emit the actual mul or imul. + let signed = op == Opcode::Smulhi; + ctx.emit(Inst::mul_hi(size, signed, rhs)); + + // Read the result from the high part (stored in %rdx). + ctx.emit(Inst::gen_move(dst, regs::rdx(), input_ty)); + } + + Opcode::GetPinnedReg => { + let dst = output_to_reg(ctx, outputs[0]); + ctx.emit(Inst::gen_move(dst, regs::pinned_reg(), I64)); + } + + Opcode::SetPinnedReg => { + let src = input_to_reg(ctx, inputs[0]); + ctx.emit(Inst::gen_move( + Writable::from_reg(regs::pinned_reg()), + src, + I64, + )); + } + Opcode::IaddImm | Opcode::ImulImm | Opcode::UdivImm @@ -876,35 +1395,14 @@ impl LowerBackend for X64Backend { assert!(jt_size <= u32::max_value() as usize); let jt_size = jt_size as u32; - let idx_size_bits = ctx.input_ty(branches[0], 0).bits(); - - // Zero-extend to 32-bits if needed. - // TODO consider factoring this out? - let idx = if idx_size_bits < 32 { - let ext_mode = match idx_size_bits { - 1 | 8 => ExtMode::BL, - 16 => ExtMode::WL, - _ => unreachable!(), - }; - let idx = input_to_reg_mem( - ctx, - InsnInput { - insn: branches[0], - input: 0, - }, - ); - let tmp_idx = ctx.alloc_tmp(RegClass::I64, I32); - ctx.emit(Inst::movzx_rm_r(ext_mode, idx, tmp_idx)); - tmp_idx.to_reg() - } else { - input_to_reg( - ctx, - InsnInput { - insn: branches[0], - input: 0, - }, - ) - }; + let idx = extend_input_to_reg( + ctx, + InsnInput { + insn: branches[0], + input: 0, + }, + ExtSpec::ZeroExtendTo32, + ); // Bounds-check (compute flags from idx - jt_size) and branch to default. ctx.emit(Inst::cmp_rmi_r(4, RegMemImm::imm(jt_size), idx)); @@ -951,4 +1449,8 @@ impl LowerBackend for X64Backend { Ok(()) } + + fn maybe_pinned_reg(&self) -> Option { + Some(regs::pinned_reg()) + } }