Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

x64: Implement some minor optimizations related to SIMD lowerings #8839

Merged
merged 3 commits into from
Jun 19, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
35 changes: 24 additions & 11 deletions cranelift/codegen/src/isa/x64/lower.isle
Original file line number Diff line number Diff line change
Expand Up @@ -3308,6 +3308,12 @@
(rule 2 (lower_branch (brif (maybe_uextend (fcmp cc a b)) _ _) (two_targets then else))
(emit_side_effect (jmp_cond_fcmp (emit_fcmp cc a b) then else)))

(rule 2 (lower_branch (brif (maybe_uextend (vany_true a)) _ _) (two_targets then else))
(emit_side_effect (jmp_cond_icmp (emit_vany_true a) then else)))

(rule 2 (lower_branch (brif (maybe_uextend (vall_true a)) _ _) (two_targets then else))
(emit_side_effect (jmp_cond_icmp (emit_vall_true a) then else)))

(rule 1 (lower_branch (brif val @ (value_type $I128) _ _)
(two_targets then else))
(emit_side_effect (jmp_cond_icmp (cmp_zero_i128 (CC.Z) val) then else)))
Expand Down Expand Up @@ -4263,10 +4269,9 @@
;; TODO use Inst::gen_constant() instead.
(x64_xmm_load_const ty (const_to_vconst const)))

;; Special case for a zero-vector: don't load, xor instead.
(rule 1 (lower (has_type ty (vconst (u128_from_constant 0))))
(let ((dst Xmm (xmm_uninit_value)))
(x64_pxor dst dst)))
;; Special cases for known constant patterns to skip a 16-byte load.
(rule 1 (lower (has_type ty (vconst (u128_from_constant 0)))) (xmm_zero ty))
(rule 1 (lower (has_type ty (vconst (u128_from_constant -1)))) (vector_all_ones))

;; Rules for `shuffle` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;

Expand Down Expand Up @@ -4630,30 +4635,38 @@
;; 0xffff then every byte was equal to zero, so test if the comparison is
;; not-equal or NZ.
(rule (lower (vany_true val))
(lower_icmp_bool (emit_vany_true val)))

(decl emit_vany_true (Value) IcmpCondResult)
(rule (emit_vany_true val)
(let (
(any_byte_zero Xmm (x64_pcmpeqb val (xmm_zero $I8X16)))
(mask Gpr (x64_pmovmskb (OperandSize.Size32) any_byte_zero))
)
(with_flags (x64_cmp_imm (OperandSize.Size32) mask 0xffff)
(x64_setcc (CC.NZ)))))
(icmp_cond_result (x64_cmp_imm (OperandSize.Size32) mask 0xffff)
(CC.NZ))))

;; Rules for `vall_true` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;

(rule 1 (lower (vall_true val @ (value_type ty)))
(rule (lower (vall_true val))
(lower_icmp_bool (emit_vall_true val)))

(decl emit_vall_true (Value) IcmpCondResult)
(rule 1 (emit_vall_true val @ (value_type ty))
(if-let $true (use_sse41))
(let ((src Xmm val)
(zeros Xmm (xmm_zero ty))
(cmp Xmm (x64_pcmpeq (vec_int_type ty) src zeros)))
(with_flags (x64_ptest cmp cmp) (x64_setcc (CC.Z)))))
(icmp_cond_result (x64_ptest cmp cmp) (CC.Z))))

;; Perform an appropriately-sized lane-wise comparison with zero. If the
;; result is all 0s then all of them are true because nothing was equal to
;; zero.
(rule (lower (vall_true val @ (value_type ty)))
(rule (emit_vall_true val @ (value_type ty))
(let ((lanes_with_zero Xmm (x64_pcmpeq (vec_int_type ty) val (xmm_zero ty)))
(mask Gpr (x64_pmovmskb (OperandSize.Size32) lanes_with_zero)))
(with_flags (x64_test (OperandSize.Size32) mask mask)
(x64_setcc (CC.Z)))))
(icmp_cond_result (x64_test (OperandSize.Size32) mask mask)
(CC.Z))))

;; Rules for `vhigh_bits` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;

Expand Down
125 changes: 125 additions & 0 deletions tests/disas/x64-simd-test-and-branch.wat
Original file line number Diff line number Diff line change
@@ -0,0 +1,125 @@
;;! target = "x86_64"
;;! test = "compile"
;;! flags = ["-Ccranelift-sse41"]

(module
(func $i8x16.all_true (param v128) (result i32)
local.get 0
i8x16.all_true
if (result i32)
i32.const 100
else
i32.const 200
end
)

(func $i16x8.all_true (param v128) (result i32)
local.get 0
i16x8.all_true
if (result i32)
i32.const 100
else
i32.const 200
end
)

(func $i32x4.all_true (param v128) (result i32)
local.get 0
i32x4.all_true
if (result i32)
i32.const 100
else
i32.const 200
end
)

(func $i64x2.all_true (param v128) (result i32)
local.get 0
i64x2.all_true
if (result i32)
i32.const 100
else
i32.const 200
end
)

(func $v128.any_true (param v128) (result i32)
local.get 0
v128.any_true
if (result i32)
i32.const 100
else
i32.const 200
end
)
)
;; wasm[0]::function[0]::i8x16.all_true:
;; pushq %rbp
;; movq %rsp, %rbp
;; pxor %xmm7, %xmm7
;; pcmpeqb %xmm7, %xmm0
;; ptest %xmm0, %xmm0
;; je 0x21
;; 17: movl $0xc8, %eax
;; jmp 0x26
;; 21: movl $0x64, %eax
;; movq %rbp, %rsp
;; popq %rbp
;; retq
;;
;; wasm[0]::function[1]::i16x8.all_true:
;; pushq %rbp
;; movq %rsp, %rbp
;; pxor %xmm7, %xmm7
;; pcmpeqw %xmm7, %xmm0
;; ptest %xmm0, %xmm0
;; je 0x61
;; 57: movl $0xc8, %eax
;; jmp 0x66
;; 61: movl $0x64, %eax
;; movq %rbp, %rsp
;; popq %rbp
;; retq
;;
;; wasm[0]::function[2]::i32x4.all_true:
;; pushq %rbp
;; movq %rsp, %rbp
;; pxor %xmm7, %xmm7
;; pcmpeqd %xmm7, %xmm0
;; ptest %xmm0, %xmm0
;; je 0xa1
;; 97: movl $0xc8, %eax
;; jmp 0xa6
;; a1: movl $0x64, %eax
;; movq %rbp, %rsp
;; popq %rbp
;; retq
;;
;; wasm[0]::function[3]::i64x2.all_true:
;; pushq %rbp
;; movq %rsp, %rbp
;; pxor %xmm7, %xmm7
;; pcmpeqq %xmm7, %xmm0
;; ptest %xmm0, %xmm0
;; je 0xe2
;; d8: movl $0xc8, %eax
;; jmp 0xe7
;; e2: movl $0x64, %eax
;; movq %rbp, %rsp
;; popq %rbp
;; retq
;;
;; wasm[0]::function[4]::v128.any_true:
;; pushq %rbp
;; movq %rsp, %rbp
;; pxor %xmm7, %xmm7
;; pcmpeqb %xmm7, %xmm0
;; pmovmskb %xmm0, %ecx
;; cmpl $0xffff, %ecx
;; jne 0x126
;; 11c: movl $0xc8, %eax
;; jmp 0x12b
;; 126: movl $0x64, %eax
;; movq %rbp, %rsp
;; popq %rbp
;; retq
22 changes: 22 additions & 0 deletions tests/disas/x64-vector-patterns.wat
Original file line number Diff line number Diff line change
@@ -0,0 +1,22 @@
;;! target = "x86_64"
;;! test = "compile"

(module
(func $zero (result v128) v128.const i64x2 0 0)
(func $ones (result v128) v128.const i64x2 -1 -1)
)
;; wasm[0]::function[0]::zero:
;; pushq %rbp
;; movq %rsp, %rbp
;; pxor %xmm0, %xmm0
;; movq %rbp, %rsp
;; popq %rbp
;; retq
;;
;; wasm[0]::function[1]::ones:
;; pushq %rbp
;; movq %rsp, %rbp
;; pcmpeqd %xmm0, %xmm0
;; movq %rbp, %rsp
;; popq %rbp
;; retq
Loading