/
rvv-vp-intrinsic-rem.mlir
74 lines (64 loc) · 3.75 KB
/
rvv-vp-intrinsic-rem.mlir
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
memref.global "private" @gv_i32 : memref<20xi32> = dense<[0, 1, 2, 3, 4, 5, 6, 7, 8, 9,
10, 11, 12, 13, 14, 15, 16, 17, 18, 19]>
memref.global "private" @gv_f32 : memref<20xf32> = dense<[0. , 1. , 2. , 3. , 4. , 5. , 6. , 7. , 8. , 9. ,
10., 11., 12., 13., 14., 15., 16., 17., 18., 19.]>
func.func @main() -> i32 {
%mem_i32 = memref.get_global @gv_i32 : memref<20xi32>
%mem_f32 = memref.get_global @gv_f32 : memref<20xf32>
%c0 = arith.constant 0 : index
%c1 = arith.constant 1 : index
%c6 = arith.constant 6 : index
%c10 = arith.constant 10 : index
%mask6 = arith.constant dense<[1, 1, 1, 1, 1, 1, 0, 0]> : vector<8xi1>
%evl8 = arith.constant 8 : i32
%mask8 = arith.constant dense<[1, 1, 1, 1, 1, 1, 1, 1]> : vector<8xi1>
%evl6 = arith.constant 6 : i32
%c1_i32 = arith.constant 1 : i32
//===--------------------------------------------------------------------===//
// VP Intrinsic FRem Operation + Fixed Vector Type
//===--------------------------------------------------------------------===//
// // Mask-Driven Error
// %vec1 = vector.load %mem_f32[%c0] : memref<20xf32>, vector<8xf32>
// %vec2 = vector.load %mem_f32[%c10] : memref<20xf32>, vector<8xf32>
// %res_frem_mask_driven = "llvm.intr.vp.frem" (%vec2, %vec1, %mask6, %evl8) :
// (vector<8xf32>, vector<8xf32>, vector<8xi1>, i32) -> vector<8xf32>
// vector.print %res_frem_mask_driven : vector<8xf32>
// // EVL-Driven Error
// %vec3 = vector.load %mem_f32[%c0] : memref<20xf32>, vector<8xf32>
// %vec4 = vector.load %mem_f32[%c10] : memref<20xf32>, vector<8xf32>
// %res_frem_evl_driven = "llvm.intr.vp.frem" (%vec4, %vec3, %mask8, %evl6) :
// (vector<8xf32>, vector<8xf32>, vector<8xi1>, i32) -> vector<8xf32>
// vector.print %res_frem_evl_driven : vector<8xf32>
//===--------------------------------------------------------------------===//
// VP Intrinsic SRem Operation + Fixed Vector Type
//===--------------------------------------------------------------------===//
// Mask-Driven
%vec5 = vector.load %mem_i32[%c0] : memref<20xi32>, vector<8xi32>
%vec6 = vector.load %mem_i32[%c10] : memref<20xi32>, vector<8xi32>
%res_srem_mask_driven = "llvm.intr.vp.srem" (%vec6, %vec5, %mask6, %evl8) :
(vector<8xi32>, vector<8xi32>, vector<8xi1>, i32) -> vector<8xi32>
vector.print %res_srem_mask_driven : vector<8xi32>
// EVL-Driven
%vec7 = vector.load %mem_i32[%c0] : memref<20xi32>, vector<8xi32>
%vec8 = vector.load %mem_i32[%c10] : memref<20xi32>, vector<8xi32>
%res_srem_evl_driven = "llvm.intr.vp.srem" (%vec8, %vec7, %mask8, %evl6) :
(vector<8xi32>, vector<8xi32>, vector<8xi1>, i32) -> vector<8xi32>
vector.print %res_srem_evl_driven : vector<8xi32>
//===--------------------------------------------------------------------===//
// VP Intrinsic URem Operation + Fixed Vector Type
//===--------------------------------------------------------------------===//
// Mask-Driven
%vec9 = vector.load %mem_i32[%c0] : memref<20xi32>, vector<8xi32>
%vec10 = vector.load %mem_i32[%c10] : memref<20xi32>, vector<8xi32>
%res_urem_mask_driven = "llvm.intr.vp.urem" (%vec10, %vec9, %mask6, %evl8) :
(vector<8xi32>, vector<8xi32>, vector<8xi1>, i32) -> vector<8xi32>
vector.print %res_urem_mask_driven : vector<8xi32>
// EVL-Driven
%vec11 = vector.load %mem_i32[%c0] : memref<20xi32>, vector<8xi32>
%vec12 = vector.load %mem_i32[%c10] : memref<20xi32>, vector<8xi32>
%res_urem_evl_driven = "llvm.intr.vp.urem" (%vec12, %vec11, %mask8, %evl6) :
(vector<8xi32>, vector<8xi32>, vector<8xi1>, i32) -> vector<8xi32>
vector.print %res_urem_evl_driven : vector<8xi32>
%ret = arith.constant 0 : i32
return %ret : i32
}