Skip to content

Commit

Permalink
Merge pull request #3005 from afonso360/aarch64-i128-extend
Browse files Browse the repository at this point in the history
aarch64: Implement uextend/sextend for  i128 values
  • Loading branch information
cfallin committed Jun 22, 2021
2 parents 1a865fb + f25f5b2 commit fa1a04d
Show file tree
Hide file tree
Showing 4 changed files with 230 additions and 26 deletions.
50 changes: 41 additions & 9 deletions cranelift/codegen/src/isa/aarch64/lower_inst.rs
Original file line number Diff line number Diff line change
Expand Up @@ -613,9 +613,30 @@ pub(crate) fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(
let to_bits = ty_bits(output_ty) as u8;
let to_bits = std::cmp::max(32, to_bits);
assert!(from_bits <= to_bits);
if from_bits < to_bits {
let signed = op == Opcode::Sextend;
let rd = get_output_reg(ctx, outputs[0]).only_reg().unwrap();

let signed = op == Opcode::Sextend;
let dst = get_output_reg(ctx, outputs[0]);
let src =
if let Some(extract_insn) = maybe_input_insn(ctx, inputs[0], Opcode::Extractlane) {
put_input_in_regs(
ctx,
InsnInput {
insn: extract_insn,
input: 0,
},
)
} else {
put_input_in_regs(ctx, inputs[0])
};

let needs_extend = from_bits < to_bits && to_bits <= 64;
// For i128, we want to extend the lower half, except if it is already 64 bits.
let needs_lower_extend = to_bits > 64 && from_bits < 64;
let pass_through_lower = to_bits > 64 && !needs_lower_extend;

if needs_extend || needs_lower_extend {
let rn = src.regs()[0];
let rd = dst.regs()[0];

if let Some(extract_insn) = maybe_input_insn(ctx, inputs[0], Opcode::Extractlane) {
let idx =
Expand All @@ -624,11 +645,7 @@ pub(crate) fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(
} else {
unreachable!();
};
let input = InsnInput {
insn: extract_insn,
input: 0,
};
let rn = put_input_in_reg(ctx, input, NarrowValueMode::None);

let size = VectorSize::from_ty(ctx.input_ty(extract_insn, 0));

if signed {
Expand All @@ -654,8 +671,23 @@ pub(crate) fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(
rn,
signed,
from_bits,
to_bits,
to_bits: std::cmp::min(64, to_bits),
});
}
} else if pass_through_lower {
ctx.emit(Inst::gen_move(dst.regs()[0], src.regs()[0], I64));
}

if output_ty == I128 {
if signed {
ctx.emit(Inst::AluRRImmShift {
alu_op: ALUOp::Asr64,
rd: dst.regs()[1],
rn: dst.regs()[0].to_reg(),
immshift: ImmShift::maybe_from_u64(63).unwrap(),
});
} else {
lower_constant_u64(ctx, dst.regs()[1], 0);
}
}
}
Expand Down
106 changes: 106 additions & 0 deletions cranelift/filetests/filetests/isa/aarch64/extend-op.clif
Original file line number Diff line number Diff line change
Expand Up @@ -16,3 +16,109 @@ block0(v0: i8):
; nextln: add x0, x1, x0, SXTB
; nextln: ldp fp, lr, [sp], #16
; nextln: ret


function %i128_uextend_i64(i64) -> i128 {
block0(v0: i64):
v1 = uextend.i128 v0
return v1
}

; check: stp fp, lr, [sp, #-16]!
; nextln: mov fp, sp
; nextln: movz x1, #0
; nextln: ldp fp, lr, [sp], #16
; nextln: ret

function %i128_sextend_i64(i64) -> i128 {
block0(v0: i64):
v1 = sextend.i128 v0
return v1
}

; check: stp fp, lr, [sp, #-16]!
; nextln: mov fp, sp
; nextln: asr x1, x0, #63
; nextln: ldp fp, lr, [sp], #16
; nextln: ret


function %i128_uextend_i32(i32) -> i128 {
block0(v0: i32):
v1 = uextend.i128 v0
return v1
}

; check: stp fp, lr, [sp, #-16]!
; nextln: mov fp, sp
; nextln: mov w0, w0
; nextln: movz x1, #0
; nextln: ldp fp, lr, [sp], #16
; nextln: ret

function %i128_sextend_i32(i32) -> i128 {
block0(v0: i32):
v1 = sextend.i128 v0
return v1
}

; check: stp fp, lr, [sp, #-16]!
; nextln: mov fp, sp
; nextln: sxtw x0, w0
; nextln: asr x1, x0, #63
; nextln: ldp fp, lr, [sp], #16
; nextln: ret


function %i128_uextend_i16(i16) -> i128 {
block0(v0: i16):
v1 = uextend.i128 v0
return v1
}

; check: stp fp, lr, [sp, #-16]!
; nextln: mov fp, sp
; nextln: uxth w0, w0
; nextln: movz x1, #0
; nextln: ldp fp, lr, [sp], #16
; nextln: ret

function %i128_sextend_i16(i16) -> i128 {
block0(v0: i16):
v1 = sextend.i128 v0
return v1
}

; check: stp fp, lr, [sp, #-16]!
; nextln: mov fp, sp
; nextln: sxth x0, w0
; nextln: asr x1, x0, #63
; nextln: ldp fp, lr, [sp], #16
; nextln: ret


function %i128_uextend_i8(i8) -> i128 {
block0(v0: i8):
v1 = uextend.i128 v0
return v1
}

; check: stp fp, lr, [sp, #-16]!
; nextln: mov fp, sp
; nextln: uxtb w0, w0
; nextln: movz x1, #0
; nextln: ldp fp, lr, [sp], #16
; nextln: ret

function %i128_sextend_i8(i8) -> i128 {
block0(v0: i8):
v1 = sextend.i128 v0
return v1
}

; check: stp fp, lr, [sp, #-16]!
; nextln: mov fp, sp
; nextln: sxtb x0, w0
; nextln: asr x1, x0, #63
; nextln: ldp fp, lr, [sp], #16
; nextln: ret
69 changes: 69 additions & 0 deletions cranelift/filetests/filetests/runtests/i128-extend-2.clif
Original file line number Diff line number Diff line change
@@ -0,0 +1,69 @@
test run
target aarch64
target x86_64 machinst
; TODO: Merge this file with i128-extend once the x86 legacy backend is removed

function %i128_uextend_i32(i32) -> i64, i64 {
block0(v0: i32):
v1 = uextend.i128 v0
v2, v3 = isplit v1
return v2, v3
}
; run: %i128_uextend_i32(0) == [0, 0]
; run: %i128_uextend_i32(-1) == [0xffff_ffff, 0]
; run: %i128_uextend_i32(0xffff_eeee) == [0xffff_eeee, 0]

function %i128_sextend_i32(i32) -> i64, i64 {
block0(v0: i32):
v1 = sextend.i128 v0
v2, v3 = isplit v1
return v2, v3
}
; run: %i128_sextend_i32(0) == [0, 0]
; run: %i128_sextend_i32(-1) == [-1, -1]
; run: %i128_sextend_i32(0x7fff_ffff) == [0x7fff_ffff, 0x0000_0000_0000_0000]
; run: %i128_sextend_i32(0xffff_eeee) == [0xffff_ffff_ffff_eeee, 0xffff_ffff_ffff_ffff]


function %i128_uextend_i16(i16) -> i64, i64 {
block0(v0: i16):
v1 = uextend.i128 v0
v2, v3 = isplit v1
return v2, v3
}
; run: %i128_uextend_i16(0) == [0, 0]
; run: %i128_uextend_i16(-1) == [0xffff, 0]
; run: %i128_uextend_i16(0xffee) == [0xffee, 0]

function %i128_sextend_i16(i16) -> i64, i64 {
block0(v0: i16):
v1 = sextend.i128 v0
v2, v3 = isplit v1
return v2, v3
}
; run: %i128_sextend_i16(0) == [0, 0]
; run: %i128_sextend_i16(-1) == [-1, -1]
; run: %i128_sextend_i16(0x7fff) == [0x7fff, 0x0000_0000_0000_0000]
; run: %i128_sextend_i16(0xffee) == [0xffff_ffff_ffff_ffee, 0xffff_ffff_ffff_ffff]


function %i128_uextend_i8(i8) -> i64, i64 {
block0(v0: i8):
v1 = uextend.i128 v0
v2, v3 = isplit v1
return v2, v3
}
; run: %i128_uextend_i8(0) == [0, 0]
; run: %i128_uextend_i8(-1) == [0xff, 0]
; run: %i128_uextend_i8(0xfe) == [0xfe, 0]

function %i128_sextend_i8(i8) -> i64, i64 {
block0(v0: i8):
v1 = sextend.i128 v0
v2, v3 = isplit v1
return v2, v3
}
; run: %i128_sextend_i8(0) == [0, 0]
; run: %i128_sextend_i8(-1) == [-1, -1]
; run: %i128_sextend_i8(0x7f) == [0x7f, 0x0000_0000_0000_0000]
; run: %i128_sextend_i8(0xfe) == [0xffff_ffff_ffff_fffe, 0xffff_ffff_ffff_ffff]
31 changes: 14 additions & 17 deletions cranelift/filetests/filetests/runtests/i128-extend.clif
Original file line number Diff line number Diff line change
@@ -1,29 +1,26 @@
test run
; target aarch64 TODO: Not yet implemented on aarch64
target aarch64
; target s390x TODO: Not yet implemented on s390x
target x86_64 machinst
target x86_64 legacy

function %i128_uextend() -> b1 {
block0:
v0 = iconst.i64 0xffff_ffff_eeee_0000
function %i128_uextend_i64(i64) -> i64, i64 {
block0(v0: i64):
v1 = uextend.i128 v0
v2, v3 = isplit v1
v4 = icmp_imm eq v2, 0xffff_ffff_eeee_0000
v5 = icmp_imm eq v3, 0
v6 = band v4, v5
return v6
return v2, v3
}
; run
; run: %i128_uextend_i64(0) == [0, 0]
; run: %i128_uextend_i64(-1) == [-1, 0]
; run: %i128_uextend_i64(0xffff_ffff_eeee_0000) == [0xffff_ffff_eeee_0000, 0]

function %i128_sextend() -> b1 {
block0:
v0 = iconst.i64 0xffff_ffff_eeee_0000
function %i128_sextend_i64(i64) -> i64, i64 {
block0(v0: i64):
v1 = sextend.i128 v0
v2, v3 = isplit v1
v4 = icmp_imm eq v2, 0xffff_ffff_eeee_0000
v5 = icmp_imm eq v3, 0xffff_ffff_ffff_ffff
v6 = band v4, v5
return v6
return v2, v3
}
; run
; run: %i128_sextend_i64(0) == [0, 0]
; run: %i128_sextend_i64(-1) == [-1, -1]
; run: %i128_sextend_i64(0x7fff_ffff_ffff_ffff) == [0x7fff_ffff_ffff_ffff, 0x0000_0000_0000_0000]
; run: %i128_sextend_i64(0xffff_ffff_eeee_0000) == [0xffff_ffff_eeee_0000, 0xffff_ffff_ffff_ffff]

0 comments on commit fa1a04d

Please sign in to comment.