-
Notifications
You must be signed in to change notification settings - Fork 1.6k
NaN-canonicalization without branching on x64 #8313
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from all commits
764979b
48c3712
90b9e1a
f2e156e
302cd3c
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -5,7 +5,7 @@ | |
| use crate::cursor::{Cursor, FuncCursor}; | ||
| use crate::ir::condcodes::FloatCC; | ||
| use crate::ir::immediates::{Ieee32, Ieee64}; | ||
| use crate::ir::types; | ||
| use crate::ir::types::{self}; | ||
| use crate::ir::{Function, Inst, InstBuilder, InstructionData, Opcode, Value}; | ||
| use crate::opts::MemFlags; | ||
| use crate::timing; | ||
|
|
@@ -15,13 +15,13 @@ static CANON_32BIT_NAN: u32 = 0b01111111110000000000000000000000; | |
| static CANON_64BIT_NAN: u64 = 0b0111111111111000000000000000000000000000000000000000000000000000; | ||
|
|
||
| /// Perform the NaN canonicalization pass. | ||
| pub fn do_nan_canonicalization(func: &mut Function) { | ||
| pub fn do_nan_canonicalization(func: &mut Function, has_vector_support: bool) { | ||
| let _tt = timing::canonicalize_nans(); | ||
| let mut pos = FuncCursor::new(func); | ||
| while let Some(_block) = pos.next_block() { | ||
| while let Some(inst) = pos.next_inst() { | ||
| if is_fp_arith(&mut pos, inst) { | ||
| add_nan_canon_seq(&mut pos, inst); | ||
| add_nan_canon_seq(&mut pos, inst, has_vector_support); | ||
| } | ||
| } | ||
| } | ||
|
|
@@ -57,24 +57,36 @@ fn is_fp_arith(pos: &mut FuncCursor, inst: Inst) -> bool { | |
| } | ||
|
|
||
| /// Append a sequence of canonicalizing instructions after the given instruction. | ||
| fn add_nan_canon_seq(pos: &mut FuncCursor, inst: Inst) { | ||
| fn add_nan_canon_seq(pos: &mut FuncCursor, inst: Inst, has_vector_support: bool) { | ||
| // Select the instruction result, result type. Replace the instruction | ||
| // result and step forward before inserting the canonicalization sequence. | ||
| let val = pos.func.dfg.first_result(inst); | ||
| let val_type = pos.func.dfg.value_type(val); | ||
| let new_res = pos.func.dfg.replace_result(val, val_type); | ||
| let _next_inst = pos.next_inst().expect("block missing terminator!"); | ||
|
|
||
| // Insert a comparison instruction, to check if `inst_res` is NaN. Select | ||
| // the canonical NaN value if `val` is NaN, assign the result to `inst`. | ||
| let is_nan = pos.ins().fcmp(FloatCC::NotEqual, new_res, new_res); | ||
|
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Without any of the other changes, just changing this comparison from |
||
| // Insert a comparison instruction, to check if `inst_res` is NaN (comparing | ||
| // against NaN is always unordered). Select the canonical NaN value if `val` | ||
| // is NaN, assign the result to `inst`. | ||
| let comparison = FloatCC::Unordered; | ||
|
|
||
| let vectorized_scalar_select = |pos: &mut FuncCursor, canon_nan: Value, ty: types::Type| { | ||
| let canon_nan = pos.ins().scalar_to_vector(ty, canon_nan); | ||
| let new_res = pos.ins().scalar_to_vector(ty, new_res); | ||
afonso360 marked this conversation as resolved.
Show resolved
Hide resolved
|
||
| let is_nan = pos.ins().fcmp(comparison, new_res, new_res); | ||
| let is_nan = pos.ins().bitcast(ty, MemFlags::new(), is_nan); | ||
| let simd_result = pos.ins().bitselect(is_nan, canon_nan, new_res); | ||
| pos.ins().with_result(val).extractlane(simd_result, 0); | ||
| }; | ||
| let scalar_select = |pos: &mut FuncCursor, canon_nan: Value| { | ||
| let is_nan = pos.ins().fcmp(comparison, new_res, new_res); | ||
| pos.ins() | ||
| .with_result(val) | ||
| .select(is_nan, canon_nan, new_res); | ||
| }; | ||
|
|
||
| let vector_select = |pos: &mut FuncCursor, canon_nan: Value| { | ||
| let is_nan = pos.ins().fcmp(comparison, new_res, new_res); | ||
| let is_nan = pos.ins().bitcast(val_type, MemFlags::new(), is_nan); | ||
| pos.ins() | ||
| .with_result(val) | ||
|
|
@@ -84,11 +96,19 @@ fn add_nan_canon_seq(pos: &mut FuncCursor, inst: Inst) { | |
| match val_type { | ||
| types::F32 => { | ||
| let canon_nan = pos.ins().f32const(Ieee32::with_bits(CANON_32BIT_NAN)); | ||
| scalar_select(pos, canon_nan); | ||
| if has_vector_support { | ||
| vectorized_scalar_select(pos, canon_nan, types::F32X4); | ||
| } else { | ||
| scalar_select(pos, canon_nan); | ||
| } | ||
| } | ||
| types::F64 => { | ||
| let canon_nan = pos.ins().f64const(Ieee64::with_bits(CANON_64BIT_NAN)); | ||
| scalar_select(pos, canon_nan); | ||
| if has_vector_support { | ||
| vectorized_scalar_select(pos, canon_nan, types::F64X2); | ||
| } else { | ||
| scalar_select(pos, canon_nan); | ||
| } | ||
| } | ||
| types::F32X4 => { | ||
| let canon_nan = pos.ins().f32const(Ieee32::with_bits(CANON_32BIT_NAN)); | ||
|
|
||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,140 @@ | ||
| test compile precise-output | ||
| set enable_nan_canonicalization=true | ||
| target x86_64 sse41 | ||
|
|
||
| function %f0(f32x4, f32x4) -> f32x4 { | ||
| block0(v0: f32x4, v1: f32x4): | ||
| v2 = fadd v0, v1 | ||
| return v2 | ||
| } | ||
|
|
||
| ; VCode: | ||
| ; pushq %rbp | ||
| ; movq %rsp, %rbp | ||
| ; block0: | ||
| ; addps %xmm0, %xmm1, %xmm0 | ||
| ; movl $2143289344, %r10d | ||
| ; movd %r10d, %xmm7 | ||
| ; shufps $0, %xmm7, const(0), %xmm7 | ||
| ; movdqa %xmm0, %xmm1 | ||
| ; cmpps $3, %xmm1, %xmm0, %xmm1 | ||
| ; movdqa %xmm0, %xmm2 | ||
| ; movdqa %xmm1, %xmm0 | ||
| ; movdqa %xmm2, %xmm1 | ||
| ; pblendvb %xmm1, %xmm7, %xmm1 | ||
| ; movdqa %xmm1, %xmm0 | ||
| ; movq %rbp, %rsp | ||
| ; popq %rbp | ||
| ; ret | ||
| ; | ||
| ; Disassembled: | ||
| ; block0: ; offset 0x0 | ||
| ; pushq %rbp | ||
| ; movq %rsp, %rbp | ||
| ; block1: ; offset 0x4 | ||
| ; addps %xmm1, %xmm0 | ||
| ; movl $0x7fc00000, %r10d | ||
| ; movd %r10d, %xmm7 | ||
| ; shufps $0, 0x26(%rip), %xmm7 | ||
| ; movdqa %xmm0, %xmm1 | ||
| ; cmpunordps %xmm0, %xmm1 | ||
| ; movdqa %xmm0, %xmm2 | ||
| ; movdqa %xmm1, %xmm0 | ||
| ; movdqa %xmm2, %xmm1 | ||
| ; pblendvb %xmm0, %xmm7, %xmm1 | ||
| ; movdqa %xmm1, %xmm0 | ||
| ; movq %rbp, %rsp | ||
| ; popq %rbp | ||
| ; retq | ||
| ; addb %al, (%rax) | ||
| ; addb %al, (%rax) | ||
| ; addb %al, (%rax) | ||
| ; sarb $0, (%rdi) | ||
| ; addb %al, (%rax) | ||
| ; addb %al, (%rax) | ||
| ; addb %al, (%rax) | ||
| ; addb %al, (%rax) | ||
| ; addb %al, (%rax) | ||
|
|
||
| function %f1(f64, f64) -> f64 { | ||
| block0(v0: f64, v1: f64): | ||
| v2 = fadd v0, v1 | ||
| return v2 | ||
| } | ||
|
|
||
| ; VCode: | ||
| ; pushq %rbp | ||
| ; movq %rsp, %rbp | ||
| ; block0: | ||
| ; addsd %xmm0, %xmm1, %xmm0 | ||
| ; movabsq $9221120237041090560, %r9 | ||
| ; movq %r9, %xmm1 | ||
| ; movdqa %xmm0, %xmm7 | ||
| ; cmppd $3, %xmm7, %xmm0, %xmm7 | ||
| ; movdqa %xmm0, %xmm5 | ||
| ; movdqa %xmm7, %xmm0 | ||
| ; pblendvb %xmm5, %xmm1, %xmm5 | ||
| ; movdqa %xmm5, %xmm0 | ||
| ; movq %rbp, %rsp | ||
| ; popq %rbp | ||
| ; ret | ||
| ; | ||
| ; Disassembled: | ||
| ; block0: ; offset 0x0 | ||
| ; pushq %rbp | ||
| ; movq %rsp, %rbp | ||
| ; block1: ; offset 0x4 | ||
| ; addsd %xmm1, %xmm0 | ||
| ; movabsq $0x7ff8000000000000, %r9 | ||
| ; movq %r9, %xmm1 | ||
| ; movdqa %xmm0, %xmm7 | ||
| ; cmpunordpd %xmm0, %xmm7 | ||
| ; movdqa %xmm0, %xmm5 | ||
| ; movdqa %xmm7, %xmm0 | ||
| ; pblendvb %xmm0, %xmm1, %xmm5 | ||
| ; movdqa %xmm5, %xmm0 | ||
| ; movq %rbp, %rsp | ||
| ; popq %rbp | ||
| ; retq | ||
|
|
||
| function %f1(f32, f32) -> f32 { | ||
| block0(v0: f32, v1: f32): | ||
| v2 = fadd v0, v1 | ||
| return v2 | ||
| } | ||
|
|
||
| ; VCode: | ||
| ; pushq %rbp | ||
| ; movq %rsp, %rbp | ||
| ; block0: | ||
| ; addss %xmm0, %xmm1, %xmm0 | ||
| ; movl $2143289344, %r9d | ||
| ; movd %r9d, %xmm1 | ||
| ; movdqa %xmm0, %xmm7 | ||
| ; cmpps $3, %xmm7, %xmm0, %xmm7 | ||
| ; movdqa %xmm0, %xmm5 | ||
| ; movdqa %xmm7, %xmm0 | ||
| ; pblendvb %xmm5, %xmm1, %xmm5 | ||
| ; movdqa %xmm5, %xmm0 | ||
| ; movq %rbp, %rsp | ||
| ; popq %rbp | ||
| ; ret | ||
| ; | ||
| ; Disassembled: | ||
| ; block0: ; offset 0x0 | ||
| ; pushq %rbp | ||
| ; movq %rsp, %rbp | ||
| ; block1: ; offset 0x4 | ||
| ; addss %xmm1, %xmm0 | ||
| ; movl $0x7fc00000, %r9d | ||
| ; movd %r9d, %xmm1 | ||
| ; movdqa %xmm0, %xmm7 | ||
| ; cmpunordps %xmm0, %xmm7 | ||
| ; movdqa %xmm0, %xmm5 | ||
| ; movdqa %xmm7, %xmm0 | ||
| ; pblendvb %xmm0, %xmm1, %xmm5 | ||
| ; movdqa %xmm5, %xmm0 | ||
| ; movq %rbp, %rsp | ||
| ; popq %rbp | ||
| ; retq | ||
|
|
Uh oh!
There was an error while loading. Please reload this page.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
The original pattern was never triggered when doing NaN-canonicalization because
fcmpwill result in either anI32X4orI64X2which always needs to be bitcast back toF32X4orF64X2before it can be passed tobitselect.