|
17 | 17 | @ThreadGroupSize_Z = constant i32 1 |
18 | 18 |
|
19 | 19 | define spir_kernel void @test1(i32 %src1, i32 %val, i32 addrspace(1)* %dst) { |
20 | | -entry: |
21 | | -; CHECK: _main_0: |
| 20 | +; CHECK: _main_0: |
| 21 | +; CHECK-NEXT: mov (M1, 16) svn(0,0)<1> threadIdInGroupX(0,0)<1;1,0> |
| 22 | +; CHECK-NEXT: mov (M1, 16) nonuniform(0,0)<1> svn_0(0,0)<1;1,0> |
| 23 | +; CHECK-NEXT: add (M1_NM, 1) offset(0,0)<1> src1(0,0)<0;1,0> 0x1:w |
| 24 | +; CHECK-NEXT: mov (M1_NM, 1) offsetTrunc(0,0)<1> offset(0,0)<0;1,0> |
| 25 | +; CHECK-NEXT: mov (M1, 16) offsetTruncBroadcast(0,0)<1> offsetTrunc(0,0)<0;1,0> |
| 26 | +; CHECK-NEXT: setp (M1_NM, 16) P1 0x0:ud |
| 27 | +; CHECK-NEXT: setp (M1_NM, 16) P2 0x0:ud |
| 28 | +; CHECK-NEXT: setp (M1_NM, 16) P3 0x0:ud |
| 29 | +; CHECK-NEXT: lifetime.start call_ |
| 30 | +; |
| 31 | +; CHECK: _test1_001__opt_resource_loop: |
| 32 | +; CHECK-NEXT: setp (M1_NM, 16) P4 0x0:ud |
| 33 | +; CHECK-NEXT: setp (M1_NM, 16) P5 0x0:ud |
| 34 | +; CHECK-NEXT: cmp.eq (M1, 16) P5 V0032(0,0)<0;1,0> V0032(0,0)<0;1,0> |
| 35 | +; CHECK-NEXT: mov (M1_NM, 1) V0033(0,0)<1> P5 |
| 36 | +; CHECK-NEXT: fbl (M1_NM, 1) V0034(0,0)<1> V0033(0,0)<0;1,0> |
| 37 | +; CHECK-NEXT: shl (M1_NM, 1) V0034(0,0)<1> V0034(0,0)<0;1,0> 0x2:w |
| 38 | +; CHECK-NEXT: addr_add (M1_NM, 1) A0(0)<1> &nonuniform V0035(0,0)<0;1,0> |
| 39 | +; CHECK-NEXT: mov (M1_NM, 1) V0036(0,0)<1> r[A0(0),0]<0;1,0>:d |
| 40 | +; CHECK-NEXT: cmp.eq (M1, 16) P6 V0036(0,0)<0;1,0> nonuniform(0,0)<1;1,0> |
| 41 | +; CHECK-NEXT: (P6) lsc_load.ugm.ca.ca (M1, 16) call_:d32x3 bss(V0036)[offsetTruncBroadcast]:a32 |
| 42 | +; CHECK-NEXT: or (M1_NM, 16) P4 P4 P6 |
| 43 | +; CHECK-NEXT: xor (M1_NM, 16) P5 P5 P6 |
| 44 | +; CHECK-NEXT: mov (M1_NM, 1) V0033(0,0)<1> P5 |
| 45 | +; CHECK-NEXT: fbl (M1_NM, 1) V0037(0,0)<1> V0033(0,0)<0;1,0> |
| 46 | +; CHECK-NEXT: and (M1_NM, 1) V0037(0,0)<1> V0037(0,0)<0;1,0> 0xf:ud |
| 47 | +; CHECK-NEXT: shl (M1_NM, 1) V0037(0,0)<1> V0037(0,0)<0;1,0> 0x2:w |
| 48 | +; CHECK-NEXT: addr_add (M1_NM, 1) A1(0)<1> &nonuniform V0038(0,0)<0;1,0> |
| 49 | +; CHECK-NEXT: mov (M1_NM, 1) V0039(0,0)<1> r[A1(0),0]<0;1,0>:d |
| 50 | +; CHECK-NEXT: cmp.eq (M1, 16) P7 V0039(0,0)<0;1,0> nonuniform(0,0)<1;1,0> |
| 51 | +; CHECK-NEXT: and (M1_NM, 16) P7 P7 P5 |
| 52 | +; CHECK-NEXT: (P7) lsc_load.ugm.ca.ca (M1, 16) call_:d32x3 bss(V0039)[offsetTruncBroadcast]:a32 |
| 53 | +; CHECK-NEXT: or (M1_NM, 16) P4 P4 P7 |
| 54 | +; CHECK-NEXT: xor (M1_NM, 16) P5 P5 P7 |
| 55 | +; CHECK-NEXT: mov (M1_NM, 1) V0033(0,0)<1> P5 |
| 56 | +; CHECK-NEXT: fbl (M1_NM, 1) V0040(0,0)<1> V0033(0,0)<0;1,0> |
| 57 | +; CHECK-NEXT: and (M1_NM, 1) V0040(0,0)<1> V0040(0,0)<0;1,0> 0xf:ud |
| 58 | +; CHECK-NEXT: shl (M1_NM, 1) V0040(0,0)<1> V0040(0,0)<0;1,0> 0x2:w |
| 59 | +; CHECK-NEXT: addr_add (M1_NM, 1) A2(0)<1> &nonuniform V0041(0,0)<0;1,0> |
| 60 | +; CHECK-NEXT: mov (M1_NM, 1) V0042(0,0)<1> r[A2(0),0]<0;1,0>:d |
| 61 | +; CHECK-NEXT: cmp.eq (M1, 16) P8 V0042(0,0)<0;1,0> nonuniform(0,0)<1;1,0> |
| 62 | +; CHECK-NEXT: and (M1_NM, 16) P8 P8 P5 |
| 63 | +; CHECK-NEXT: (P8) lsc_load.ugm.ca.ca (M1, 16) call_:d32x3 bss(V0042)[offsetTruncBroadcast]:a32 |
| 64 | +; CHECK-NEXT: or (M1_NM, 16) P4 P4 P8 |
| 65 | +; CHECK-NEXT: xor (M1_NM, 16) P5 P5 P8 |
| 66 | +; CHECK-NEXT: mov (M1_NM, 1) V0033(0,0)<1> P5 |
| 67 | +; CHECK-NEXT: fbl (M1_NM, 1) V0043(0,0)<1> V0033(0,0)<0;1,0> |
| 68 | +; CHECK-NEXT: and (M1_NM, 1) V0043(0,0)<1> V0043(0,0)<0;1,0> 0xf:ud |
| 69 | +; CHECK-NEXT: shl (M1_NM, 1) V0043(0,0)<1> V0043(0,0)<0;1,0> 0x2:w |
| 70 | +; CHECK-NEXT: addr_add (M1_NM, 1) A3(0)<1> &nonuniform V0044(0,0)<0;1,0> |
| 71 | +; CHECK-NEXT: mov (M1_NM, 1) V0045(0,0)<1> r[A3(0),0]<0;1,0>:d |
| 72 | +; CHECK-NEXT: cmp.eq (M1, 16) P9 V0045(0,0)<0;1,0> nonuniform(0,0)<1;1,0> |
| 73 | +; CHECK-NEXT: and (M1_NM, 16) P9 P9 P5 |
| 74 | +; CHECK-NEXT: (P9) lsc_load.ugm.ca.ca (M1, 16) call_:d32x3 bss(V0045)[offsetTruncBroadcast]:a32 |
| 75 | +; CHECK-NEXT: or (M1_NM, 16) P4 P4 P9 |
| 76 | +; CHECK-NEXT: (!P4) goto (M1, 16) _test1_001__opt_resource_loop |
| 77 | +; CHECK-NEXT: mul (M1_NM, 1) V0046(0,0)<1> val_0(0,0)<0;1,0> 0x40:uw |
| 78 | +; CHECK-NEXT: addr_add (M1_NM, 1) A4(0)<1> &call_ V0046(0,0)<0;1,0> |
| 79 | +; CHECK-NEXT: mov (M1, 16) out(0,0)<1> r[A4(0),0]<8;8,1>:d |
| 80 | +; CHECK-NEXT: mov (M1_NM, 1) dst_0(0,0)<1> dst(0,0)<0;1,0> |
| 81 | +; CHECK-NEXT: mov (M1, 16) dstBroadcast_0(0,0)<2> dst_1(0,0)<0;1,0> |
| 82 | +; CHECK-NEXT: mov (M1, 16) dstBroadcast_0(0,1)<2> dst_1(0,1)<0;1,0> |
| 83 | +; CHECK-NEXT: lsc_store.ugm.wb.wb (M1, 16) flat[dstBroadcast]:a64 out:d32 |
| 84 | +; CHECK-NEXT: ret (M1, 1) |
22 | 85 |
|
23 | 86 | %svn = call i16 @llvm.genx.GenISA.DCL.SystemValue.i16(i32 17) |
24 | | -; CHECK: mov (M1, 16) svn(0,0)<1> threadIdInGroupX(0,0)<1;1,0> |
25 | 87 |
|
26 | 88 | %nonuniform = zext i16 %svn to i32 |
27 | | -; CHECK: mov (M1, 16) nonuniform(0,0)<1> svn_0(0,0)<1;1,0> |
28 | 89 |
|
29 | 90 | %NonUniformResource = inttoptr i32 %nonuniform to <4 x float> addrspace(2621440)* |
30 | 91 | %offset = add i32 %src1, 1 |
31 | | -; CHECK: add (M1_NM, 1) offset(0,0)<1> src1(0,0)<0;1,0> 0x1:w |
32 | | -; CHECK: mov (M1_NM, 1) offsetTrunc(0,0)<1> offset(0,0)<0;1,0> |
33 | | -; CHECK: mov (M1, 16) offsetTruncBroadcast(0,0)<1> offsetTrunc(0,0)<0;1,0> |
34 | 92 |
|
35 | 93 | %call = call <3 x i32> @llvm.genx.GenISA.ldrawvector.indexed.v3i32.p2621440v4f32(<4 x float> addrspace(2621440)* %NonUniformResource, i32 %offset, i32 4, i1 false) |
36 | | -; CHECK: _test1_001__opt_resource_loop: |
37 | | -; CHECK: setp (M1_NM, 16) P4 0x0:ud |
38 | | -; CHECK: setp (M1_NM, 16) P5 0x0:ud |
39 | | -; CHECK: cmp.eq (M1, 16) P5 V0032(0,0)<0;1,0> V0032(0,0)<0;1,0> |
40 | | -; CHECK: mov (M1_NM, 1) V0033(0,0)<1> P5 |
41 | | -; CHECK: fbl (M1_NM, 1) V0034(0,0)<1> V0033(0,0)<0;1,0> |
42 | | -; CHECK: shl (M1_NM, 1) V0034(0,0)<1> V0034(0,0)<0;1,0> 0x2:w |
43 | | -; CHECK: addr_add (M1_NM, 1) A0(0)<1> &nonuniform V0035(0,0)<0;1,0> |
44 | | -; CHECK: mov (M1_NM, 1) V0036(0,0)<1> r[A0(0),0]<0;1,0>:d |
45 | | -; CHECK: cmp.eq (M1, 16) P6 V0036(0,0)<0;1,0> nonuniform(0,0)<1;1,0> |
46 | | -; CHECK: (P6) lsc_load.ugm.ca.ca (M1, 16) call_:d32x3 bss(V0036)[offsetTruncBroadcast]:a32 |
47 | | -; CHECK: or (M1_NM, 16) P4 P4 P6 |
48 | | -; CHECK: xor (M1_NM, 16) P5 P5 P6 |
49 | | -; CHECK: mov (M1_NM, 1) V0033(0,0)<1> P5 |
50 | | -; CHECK: fbl (M1_NM, 1) V0037(0,0)<1> V0033(0,0)<0;1,0> |
51 | | -; CHECK: and (M1_NM, 1) V0037(0,0)<1> V0037(0,0)<0;1,0> 0xf:ud |
52 | | -; CHECK: shl (M1_NM, 1) V0037(0,0)<1> V0037(0,0)<0;1,0> 0x2:w |
53 | | -; CHECK: addr_add (M1_NM, 1) A1(0)<1> &nonuniform V0038(0,0)<0;1,0> |
54 | | -; CHECK: mov (M1_NM, 1) V0039(0,0)<1> r[A1(0),0]<0;1,0>:d |
55 | | -; CHECK: cmp.eq (M1, 16) P7 V0039(0,0)<0;1,0> nonuniform(0,0)<1;1,0> |
56 | | -; CHECK: and (M1_NM, 16) P7 P7 P5 |
57 | | -; CHECK: (P7) lsc_load.ugm.ca.ca (M1, 16) call_:d32x3 bss(V0039)[offsetTruncBroadcast]:a32 |
58 | | -; CHECK: or (M1_NM, 16) P4 P4 P7 |
59 | | -; CHECK: xor (M1_NM, 16) P5 P5 P7 |
60 | | -; CHECK: mov (M1_NM, 1) V0033(0,0)<1> P5 |
61 | | -; CHECK: fbl (M1_NM, 1) V0040(0,0)<1> V0033(0,0)<0;1,0> |
62 | | -; CHECK: and (M1_NM, 1) V0040(0,0)<1> V0040(0,0)<0;1,0> 0xf:ud |
63 | | -; CHECK: shl (M1_NM, 1) V0040(0,0)<1> V0040(0,0)<0;1,0> 0x2:w |
64 | | -; CHECK: addr_add (M1_NM, 1) A2(0)<1> &nonuniform V0041(0,0)<0;1,0> |
65 | | -; CHECK: mov (M1_NM, 1) V0042(0,0)<1> r[A2(0),0]<0;1,0>:d |
66 | | -; CHECK: cmp.eq (M1, 16) P8 V0042(0,0)<0;1,0> nonuniform(0,0)<1;1,0> |
67 | | -; CHECK: and (M1_NM, 16) P8 P8 P5 |
68 | | -; CHECK: (P8) lsc_load.ugm.ca.ca (M1, 16) call_:d32x3 bss(V0042)[offsetTruncBroadcast]:a32 |
69 | | -; CHECK: or (M1_NM, 16) P4 P4 P8 |
70 | | -; CHECK: xor (M1_NM, 16) P5 P5 P8 |
71 | | -; CHECK: mov (M1_NM, 1) V0033(0,0)<1> P5 |
72 | | -; CHECK: fbl (M1_NM, 1) V0043(0,0)<1> V0033(0,0)<0;1,0> |
73 | | -; CHECK: and (M1_NM, 1) V0043(0,0)<1> V0043(0,0)<0;1,0> 0xf:ud |
74 | | -; CHECK: shl (M1_NM, 1) V0043(0,0)<1> V0043(0,0)<0;1,0> 0x2:w |
75 | | -; CHECK: addr_add (M1_NM, 1) A3(0)<1> &nonuniform V0044(0,0)<0;1,0> |
76 | | -; CHECK: mov (M1_NM, 1) V0045(0,0)<1> r[A3(0),0]<0;1,0>:d |
77 | | -; CHECK: cmp.eq (M1, 16) P9 V0045(0,0)<0;1,0> nonuniform(0,0)<1;1,0> |
78 | | -; CHECK: and (M1_NM, 16) P9 P9 P5 |
79 | | -; CHECK: (P9) lsc_load.ugm.ca.ca (M1, 16) call_:d32x3 bss(V0045)[offsetTruncBroadcast]:a32 |
80 | | -; CHECK: or (M1_NM, 16) P4 P4 P9 |
81 | | -; CHECK: (!P4) goto (M1, 16) _test1_001__opt_resource_loop |
82 | 94 | %out = extractelement <3 x i32> %call, i32 %val |
83 | 95 | store i32 %out, i32 addrspace(1)* %dst, align 1 |
84 | 96 | ret void |
|
0 commit comments