Skip to content

Commit

Permalink
x64: Lower stack_addr, udiv, sdiv, urem, srem, umulhi, smulhi in ISLE (
Browse files Browse the repository at this point in the history
…#4741)

Lower stack_addr, udiv, sdiv, urem, srem, umulhi, and smulhi in ISLE.

For udiv, sdiv, urem, and srem I opted to move the original lowering into an extern constructor, as the interactions with rax and rdx for the div instruction didn't seem meaningful to implement in ISLE. However, I'm happy to revisit this choice and move more of the embedding into ISLE.
  • Loading branch information
elliottt committed Aug 23, 2022
1 parent 3b68d76 commit b5f1ab7
Show file tree
Hide file tree
Showing 13 changed files with 585 additions and 159 deletions.
21 changes: 20 additions & 1 deletion cranelift/codegen/src/isa/x64/inst.isle
Original file line number Diff line number Diff line change
Expand Up @@ -2738,7 +2738,7 @@
(rule (mul_hi ty signed src1 src2)
(let ((dst_lo WritableGpr (temp_writable_gpr))
(dst_hi WritableGpr (temp_writable_gpr))
(size OperandSize (operand_size_of_type_32_64 ty))
(size OperandSize (raw_operand_size_of_type ty))
(_ Unit (emit (MInst.MulHi size
signed
src1
Expand Down Expand Up @@ -3587,6 +3587,25 @@
(rule (bitcast_gpr_to_xmm $I64 src)
(gpr_to_xmm (SseOpcode.Movq) src (OperandSize.Size64)))

;;;; Stack Addresses ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;

(decl stack_addr_impl (StackSlot Offset32) Gpr)
(rule (stack_addr_impl stack_slot offset)
(let ((dst WritableGpr (temp_writable_gpr))
(_ Unit (emit (abi_stackslot_addr dst stack_slot offset))))
dst))

;;;; Division/Remainders ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;

(decl emit_div_or_rem (DivOrRemKind Type WritableGpr Gpr Gpr) Unit)
(extern constructor emit_div_or_rem emit_div_or_rem)

(decl div_or_rem (DivOrRemKind Value Value) Gpr)
(rule (div_or_rem kind a @ (value_type ty) b)
(let ((dst WritableGpr (temp_writable_gpr))
(_ Unit (emit_div_or_rem kind ty dst a b)))
dst))

;;;; Automatic conversions ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;

(convert Gpr InstOutput output_gpr)
Expand Down
2 changes: 1 addition & 1 deletion cranelift/codegen/src/isa/x64/inst/args.rs
Original file line number Diff line number Diff line change
Expand Up @@ -1575,7 +1575,7 @@ impl fmt::Display for ShiftKind {
}

/// What kind of division or remainer instruction this is?
#[derive(Clone)]
#[derive(Clone, Eq, PartialEq)]
pub enum DivOrRemKind {
SignedDiv,
UnsignedDiv,
Expand Down
17 changes: 17 additions & 0 deletions cranelift/codegen/src/isa/x64/inst/emit_tests.rs
Original file line number Diff line number Diff line change
Expand Up @@ -49,6 +49,23 @@ impl Inst {
dst: WritableXmm::from_writable_reg(dst).unwrap(),
}
}

fn mul_hi(size: OperandSize, signed: bool, rhs: RegMem) -> Inst {
debug_assert!(size.is_one_of(&[
OperandSize::Size16,
OperandSize::Size32,
OperandSize::Size64
]));
rhs.assert_regclass_is(RegClass::Int);
Inst::MulHi {
size,
signed,
src1: Gpr::new(regs::rax()).unwrap(),
src2: GprMem::new(rhs).unwrap(),
dst_lo: WritableGpr::from_reg(Gpr::new(regs::rax()).unwrap()),
dst_hi: WritableGpr::from_reg(Gpr::new(regs::rdx()).unwrap()),
}
}
}

#[test]
Expand Down
17 changes: 0 additions & 17 deletions cranelift/codegen/src/isa/x64/inst/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -208,23 +208,6 @@ impl Inst {
}
}

pub(crate) fn mul_hi(size: OperandSize, signed: bool, rhs: RegMem) -> Inst {
debug_assert!(size.is_one_of(&[
OperandSize::Size16,
OperandSize::Size32,
OperandSize::Size64
]));
rhs.assert_regclass_is(RegClass::Int);
Inst::MulHi {
size,
signed,
src1: Gpr::new(regs::rax()).unwrap(),
src2: GprMem::new(rhs).unwrap(),
dst_lo: WritableGpr::from_reg(Gpr::new(regs::rax()).unwrap()),
dst_hi: WritableGpr::from_reg(Gpr::new(regs::rdx()).unwrap()),
}
}

pub(crate) fn checked_div_or_rem_seq(
kind: DivOrRemKind,
size: OperandSize,
Expand Down
59 changes: 59 additions & 0 deletions cranelift/codegen/src/isa/x64/lower.isle
Original file line number Diff line number Diff line change
Expand Up @@ -3426,3 +3426,62 @@

(rule (lower (has_type (use_sse41) (trunc a @ (value_type $F64X2))))
(x64_roundpd a (RoundImm.RoundZero)))

;; Rules for `stack_addr` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;

(rule (lower (stack_addr stack_slot offset))
(stack_addr_impl stack_slot offset))

;; Rules for `udiv` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;

(rule (lower (udiv a @ (value_type ty) b))
(div_or_rem (DivOrRemKind.UnsignedDiv) a b))

;; Rules for `sdiv` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;

(rule (lower (sdiv a @ (value_type ty) b))
(div_or_rem (DivOrRemKind.SignedDiv) a b))

;; Rules for `urem` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;

(rule (lower (urem a @ (value_type ty) b))
(div_or_rem (DivOrRemKind.UnsignedRem) a b))

;; Rules for `srem` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;

(rule (lower (srem a @ (value_type ty) b))
(div_or_rem (DivOrRemKind.SignedRem) a b))

;; Rules for `umulhi` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;

(rule (lower (umulhi a @ (value_type $I16) b))
(let ((res ValueRegs (mul_hi $I16 $false a b))
(hi Gpr (value_regs_get_gpr res 1)))
hi))

(rule (lower (umulhi a @ (value_type $I32) b))
(let ((res ValueRegs (mul_hi $I32 $false a b))
(hi Gpr (value_regs_get_gpr res 1)))
hi))

(rule (lower (umulhi a @ (value_type $I64) b))
(let ((res ValueRegs (mul_hi $I64 $false a b))
(hi Gpr (value_regs_get_gpr res 1)))
hi))

;; Rules for `smulhi` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;

(rule (lower (smulhi a @ (value_type $I16) b))
(let ((res ValueRegs (mul_hi $I16 $true a b))
(hi Gpr (value_regs_get_gpr res 1)))
hi))

(rule (lower (smulhi a @ (value_type $I32) b))
(let ((res ValueRegs (mul_hi $I32 $true a b))
(hi Gpr (value_regs_get_gpr res 1)))
hi))

(rule (lower (smulhi a @ (value_type $I64) b))
(let ((res ValueRegs (mul_hi $I64 $true a b))
(hi Gpr (value_regs_get_gpr res 1)))
hi))
148 changes: 8 additions & 140 deletions cranelift/codegen/src/isa/x64/lower.rs
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,6 @@ use crate::machinst::*;
use crate::result::CodegenResult;
use crate::settings::{Flags, TlsModel};
use smallvec::SmallVec;
use std::convert::TryFrom;
use target_lexicon::Triple;

//=============================================================================
Expand Down Expand Up @@ -574,150 +573,19 @@ fn lower_insn_to_regs(
| Opcode::Ceil
| Opcode::Floor
| Opcode::Nearest
| Opcode::Trunc => {
| Opcode::Trunc
| Opcode::StackAddr
| Opcode::Udiv
| Opcode::Urem
| Opcode::Sdiv
| Opcode::Srem
| Opcode::Umulhi
| Opcode::Smulhi => {
implemented_in_isle(ctx);
}

Opcode::DynamicStackAddr => unimplemented!("DynamicStackAddr"),

Opcode::StackAddr => {
let (stack_slot, offset) = match *ctx.data(insn) {
InstructionData::StackLoad {
opcode: Opcode::StackAddr,
stack_slot,
offset,
} => (stack_slot, offset),
_ => unreachable!(),
};
let dst = get_output_reg(ctx, outputs[0]).only_reg().unwrap();
let offset: i32 = offset.into();
let inst =
ctx.abi()
.sized_stackslot_addr(stack_slot, u32::try_from(offset).unwrap(), dst);
ctx.emit(inst);
}

Opcode::Udiv | Opcode::Urem | Opcode::Sdiv | Opcode::Srem => {
let kind = match op {
Opcode::Udiv => DivOrRemKind::UnsignedDiv,
Opcode::Sdiv => DivOrRemKind::SignedDiv,
Opcode::Urem => DivOrRemKind::UnsignedRem,
Opcode::Srem => DivOrRemKind::SignedRem,
_ => unreachable!(),
};
let is_div = kind.is_div();

let input_ty = ctx.input_ty(insn, 0);
let size = OperandSize::from_ty(input_ty);

let dividend = put_input_in_reg(ctx, inputs[0]);
let dst = get_output_reg(ctx, outputs[0]).only_reg().unwrap();

ctx.emit(Inst::gen_move(
Writable::from_reg(regs::rax()),
dividend,
input_ty,
));

// Always do explicit checks for `srem`: otherwise, INT_MIN % -1 is not handled properly.
if flags.avoid_div_traps() || op == Opcode::Srem {
// A vcode meta-instruction is used to lower the inline checks, since they embed
// pc-relative offsets that must not change, thus requiring regalloc to not
// interfere by introducing spills and reloads.
//
// Note it keeps the result in $rax (for divide) or $rdx (for rem), so that
// regalloc is aware of the coalescing opportunity between rax/rdx and the
// destination register.
let divisor = put_input_in_reg(ctx, inputs[1]);

let divisor_copy = ctx.alloc_tmp(types::I64).only_reg().unwrap();
ctx.emit(Inst::gen_move(divisor_copy, divisor, types::I64));

let tmp = if op == Opcode::Sdiv && size == OperandSize::Size64 {
Some(ctx.alloc_tmp(types::I64).only_reg().unwrap())
} else {
None
};
// TODO use xor
ctx.emit(Inst::imm(
OperandSize::Size32,
0,
Writable::from_reg(regs::rdx()),
));
ctx.emit(Inst::checked_div_or_rem_seq(kind, size, divisor_copy, tmp));
} else {
// We don't want more than one trap record for a single instruction,
// so let's not allow the "mem" case (load-op merging) here; force
// divisor into a register instead.
let divisor = RegMem::reg(put_input_in_reg(ctx, inputs[1]));

// Fill in the high parts:
if kind.is_signed() {
// sign-extend the sign-bit of al into ah for size 1, or rax into rdx, for
// signed opcodes.
ctx.emit(Inst::sign_extend_data(size));
} else if input_ty == types::I8 {
ctx.emit(Inst::movzx_rm_r(
ExtMode::BL,
RegMem::reg(regs::rax()),
Writable::from_reg(regs::rax()),
));
} else {
// zero for unsigned opcodes.
ctx.emit(Inst::imm(
OperandSize::Size64,
0,
Writable::from_reg(regs::rdx()),
));
}

// Emit the actual idiv.
ctx.emit(Inst::div(size, kind.is_signed(), divisor));
}

// Move the result back into the destination reg.
if is_div {
// The quotient is in rax.
ctx.emit(Inst::gen_move(dst, regs::rax(), input_ty));
} else {
if size == OperandSize::Size8 {
// The remainder is in AH. Right-shift by 8 bits then move from rax.
ctx.emit(Inst::shift_r(
OperandSize::Size64,
ShiftKind::ShiftRightLogical,
Some(8),
Writable::from_reg(regs::rax()),
));
ctx.emit(Inst::gen_move(dst, regs::rax(), input_ty));
} else {
// The remainder is in rdx.
ctx.emit(Inst::gen_move(dst, regs::rdx(), input_ty));
}
}
}

Opcode::Umulhi | Opcode::Smulhi => {
let input_ty = ctx.input_ty(insn, 0);

let lhs = put_input_in_reg(ctx, inputs[0]);
let rhs = input_to_reg_mem(ctx, inputs[1]);
let dst = get_output_reg(ctx, outputs[0]).only_reg().unwrap();

// Move lhs in %rax.
ctx.emit(Inst::gen_move(
Writable::from_reg(regs::rax()),
lhs,
input_ty,
));

// Emit the actual mul or imul.
let signed = op == Opcode::Smulhi;
ctx.emit(Inst::mul_hi(OperandSize::from_ty(input_ty), signed, rhs));

// Read the result from the high part (stored in %rdx).
ctx.emit(Inst::gen_move(dst, regs::rdx(), input_ty));
}

Opcode::GetPinnedReg => {
let dst = get_output_reg(ctx, outputs[0]).only_reg().unwrap();
ctx.emit(Inst::gen_move(dst, regs::pinned_reg(), types::I64));
Expand Down
Loading

0 comments on commit b5f1ab7

Please sign in to comment.