/
segment_test.go
396 lines (375 loc) · 11.9 KB
/
segment_test.go
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
package main
import (
"strings"
"testing"
)
func testSegment(t *testing.T, fullsrc []string, expected []Segment) {
segments := SegmentSource(fullsrc)
if !segmentEqual(segments, expected) {
t.Errorf("TestNames(): \nexpected %s\ngot %s", expected, segments)
}
}
func TestSegment(t *testing.T) {
src1 := ` .section __TEXT,__text,regular,pure_instructions
.macosx_version_min 10, 11
.intel_syntax noprefix
.section __TEXT,__const
.align 5
LCPI0_0:
.byte 255 ## 0xff
.byte 0 ## 0x0
LCPI0_1:
.short 9617 ## 0x2591
.short 0 ## 0x0
LCPI0_2:
.short 1868 ## 0x74c
.short 4899 ## 0x1323
.section __TEXT,__literal4,4byte_literals
.align 2
LCPI0_3:
.long 8192 ## 0x2000
.section __TEXT,__text,regular,pure_instructions
.globl __ZN4Simd4Avx210BgraToGrayEPKhmmmPhm
.align 4, 0x90
__ZN4Simd4Avx210BgraToGrayEPKhmmmPhm: ## @_ZN4Simd4Avx210BgraToGrayEPKhmmmPhm
## BB#0:
push rbp
mov rbp, rsp
mov rax, rdi
and rax, -32
cmp rax, rdi
jne LBB0_9
## BB#1:
mov r10, r9
jne LBB0_9
## BB#2:
mov rax, r8
jne LBB0_9
## BB#3:
test rdx, rdx
je LBB0_15
## BB#4: ## %.preheader.lr.ph.i.1
mov r11, rsi
.align 4, 0x90
LBB0_5: ## %.preheader.i.5
je LBB0_6
.align 4, 0x90
LBB0_16: ## %.lr.ph.i.12
## Parent Loop BB0_5 Depth=1
## => This Inner Loop Header: Depth=2
vmovdqu ymm4, ymmword ptr [rdi + 4*rax]
cmp rax, r11
jb LBB0_16
LBB0_6: ## %._crit_edge.i.6
## in Loop: Header=BB0_5 Depth=1
cmp r11, rsi
je LBB0_8
## BB#7: ## in Loop: Header=BB0_5 Depth=1
vmovdqu ymm4, ymmword ptr [rdi + 4*rsi - 128]
vmovdqu ymmword ptr [r8 + rsi - 32], ymm4
LBB0_8: ## in Loop: Header=BB0_5 Depth=1
add rdi, rcx
jne LBB0_5
jmp LBB0_15
LBB0_9:
test rdx, rdx
je LBB0_15
## BB#10: ## %.preheader.lr.ph.i
mov r11, rsi
vpbroadcastd ymm3, dword ptr [rip + LCPI0_3]
.align 4, 0x90
LBB0_11: ## %.preheader.i
## =>This Loop Header: Depth=1
## Child Loop BB0_17 Depth 2
mov eax, 0
test r11, r11
je LBB0_12
.align 4, 0x90
LBB0_17: ## %.lr.ph.i
## Parent Loop BB0_11 Depth=1
## => This Inner Loop Header: Depth=2
vmovdqu ymm4, ymmword ptr [rdi + 4*rax]
vmovdqu ymm5, ymmword ptr [rdi + 4*rax + 32]
jb LBB0_17
LBB0_12: ## %._crit_edge.i
## in Loop: Header=BB0_11 Depth=1
cmp r11, rsi
je LBB0_14
## BB#13: ## in Loop: Header=BB0_11 Depth=1
vmovdqu ymm4, ymmword ptr [rdi + 4*rsi - 128]
vmovdqu ymmword ptr [r8 + rsi - 32], ymm4
LBB0_14: ## in Loop: Header=BB0_11 Depth=1
add rdi, rcx
add r8, r9
inc r10
cmp r10, rdx
jne LBB0_11
LBB0_15: ## %_ZN4Simd4Avx210BgraToGrayILb1EEEvPKhmmmPhm.exit
pop rbp
vzeroupper
ret
.subsections_via_symbols
`
segments1 := []Segment{}
segments1 = append(segments1, Segment{Name: "SimdAvx2BgraToGray", Start: 22, End: 95})
testSegment(t, strings.Split(src1, "\n"), segments1)
src2 := ` .section __TEXT,__text,regular,pure_instructions
.macosx_version_min 10, 11
.intel_syntax noprefix
.section __TEXT,__const
.align 5
LCPI0_0:
.short 16 ## 0x10
.short 13074 ## 0x3312
.short 0 ## 0x0
.section __TEXT,__text,regular,pure_instructions
.globl __ZN4Simd4Avx213Yuv444pToBgraEPKhmS2_mS2_mmmPhmh
.align 4, 0x90
__ZN4Simd4Avx213Yuv444pToBgraEPKhmS2_mS2_mmmPhmh: ## @_ZN4Simd4Avx213Yuv444pToBgraEPKhmS2_mS2_mmmPhmh
## BB#0:
push rbp
mov rbp, rsp
push r15
push r14
push r13
push r12
push rbx
and rsp, -32
sub rsp, 192
mov qword ptr [rsp + 56], r9 ## 8-byte Spill
mov r9b, byte ptr [rbp + 48]
mov r15, qword ptr [rbp + 40]
mov r13, qword ptr [rbp + 32]
mov r10, qword ptr [rbp + 16]
mov rbx, rsi
and rbx, -32
cmp rbx, rsi
jne LBB0_14
### BB#1:
mov rbx, rdi
cmp rbx, r13
jne LBB0_14
## BB#8:
movzx eax, r9b
cmp qword ptr [rbp + 24], 0
je LBB0_20
## BB#9: ## %.preheader.lr.ph.i.1
vinserti128 ymm14, ymm0, xmm0, 1
vmovdqu ymmword ptr [r13 + r9 + 96], ymm0
LBB0_13: ## in Loop: Header=BB0_10 Depth=1
add rdi, rsi
jb LBB0_22
LBB0_17: ## %._crit_edge.i
cmp rbx, qword ptr [rbp + 16]
cmp r11, qword ptr [rbp + 24]
jne LBB0_16
LBB0_20: ## %_ZN4Simd4Avx213Yuv444pToBgraILb1EEEvPKhmS3_mS3_mmmPhmh.exit
lea rsp, [rbp - 40]
pop rbx
pop r12
pop r13
pop r14
pop r15
pop rbp
vzeroupper
ret
.section __TEXT,__const
.align 5
LCPI1_0:
.byte 0 ## 0x0
.space 1
.space 1
.space 1
LCPI1_13:
.space 32
.section __TEXT,__text,regular,pure_instructions
.globl __ZN4Simd4Avx213Yuv420pToBgraEPKhmS2_mS2_mmmPhmh
.align 4, 0x90
__ZN4Simd4Avx213Yuv420pToBgraEPKhmS2_mS2_mmmPhmh: ## @_ZN4Simd4Avx213Yuv420pToBgraEPKhmS2_mS2_mmmPhmh
## BB#0:
push rbp
mov rbp, rsp
push r15
push r14
push r13
push r12
push rbx
and rsp, -32
sub rsp, 864
mov qword ptr [rsp + 144], r9 ## 8-byte Spill
mov qword ptr [rsp + 152], rcx ## 8-byte Spill
xor r12d, r12d
.align 4, 0x90
LBB1_12: ## %.lr.ph.i.18
## Parent Loop BB1_10 Depth=1
cmp r15, r11
jb LBB1_12
LBB1_13: ## %._crit_edge.i.8
## in Loop: Header=BB1_10 Depth=1
vmovdqa ymm7, ymm10
vmovdqu ymmword ptr [rax + rsi + 224], ymm0
LBB1_15: ## in Loop: Header=BB1_10 Depth=1
add rdi, qword ptr [rsp + 192] ## 8-byte Folded Reload
vmovdqa ymm7, ymmword ptr [rip + LCPI1_7] ## ymm7 = <u,u,u,u,1,1,1,1,u,u,u,u,1,1,1,1>
.align 4, 0x90
LBB1_18: ## %.preheader.i
## =>This Loop Header: Depth=1
cmp rsi, rbx
jb LBB1_23
LBB1_19: ## %._crit_edge.i
## in Loop: Header=BB1_18 Depth=1
vmovdqu ymmword ptr [rax + rsi + 224], ymm0
LBB1_21: ## in Loop: Header=BB1_18 Depth=1
add rdi, qword ptr [rsp + 96] ## 8-byte Folded Reload
jb LBB1_18
LBB1_22: ## %_ZN4Simd4Avx213Yuv420pToBgraILb1EEEvPKhmS3_mS3_mmmPhmh.exit
lea rsp, [rbp - 40]
pop rbx
pop r12
pop r13
pop r14
pop r15
pop rbp
vzeroupper
ret
.section __TEXT,__const
.align 5
LCPI2_0:
.byte 0 ## 0x0
.byte 2 ## 0x2
.byte 15 ## 0xf
LCPI2_12:
.space 1
.space 1
.section __TEXT,__text,regular,pure_instructions
.globl __ZN4Simd4Avx213Yuv422pToBgraEPKhmS2_mS2_mmmPhmh
.align 4, 0x90
__ZN4Simd4Avx213Yuv422pToBgraEPKhmS2_mS2_mmmPhmh: ## @_ZN4Simd4Avx213Yuv422pToBgraEPKhmS2_mS2_mmmPhmh
## BB#0:
push rbp
mov rbp, rsp
push r15
push r14
push r13
push r12
push rbx
and rsp, -32
sub rsp, 416
mov qword ptr [rsp + 184], rcx ## 8-byte Spill
mov qword ptr [rsp + 176], rsi ## 8-byte Spill
mov cl, byte ptr [rbp + 48]
mov r12, qword ptr [rbp + 40]
mov rax, qword ptr [rbp + 32]
mov r10, qword ptr [rbp + 16]
jne LBB2_14
## BB#1:
mov rsi, rdi
jne LBB2_14
## BB#8:
movzx ecx, cl
cmp qword ptr [rbp + 24], 0
mov rcx, r9
je LBB2_20
## BB#9: ## %.preheader.lr.ph.i.1
vinserti128 ymm12, ymm0, xmm0, 1
.align 4, 0x90
LBB2_10: ## %.preheader.i.7
.align 4, 0x90
LBB2_21: ## %.lr.ph.i.16
jb LBB2_21
LBB2_11: ## %._crit_edge.i.8
je LBB2_13
## BB#12: ## in Loop: Header=BB2_10 Depth=1
vmovdqa ymm15, ymm9
vmovdqu ymmword ptr [rax + r15 + 224], ymm0
LBB2_13: ## in Loop: Header=BB2_10 Depth=1
add rdi, qword ptr [rsp + 176] ## 8-byte Folded Reload
jmp LBB2_20
LBB2_14:
mov qword ptr [rsp + 168], r9 ## 8-byte Spill
je LBB2_20
## BB#15: ## %.preheader.lr.ph.i
vinserti128 ymm0, ymm0, xmm0, 1
.align 4, 0x90
LBB2_16: ## %.preheader.i
## =>This Loop Header: Depth=1
je LBB2_17
.align 4, 0x90
LBB2_22: ## %.lr.ph.i
cmp r15, rbx
jb LBB2_22
LBB2_17: ## %._crit_edge.i
## in Loop: Header=BB2_16 Depth=1
cmp rbx, qword ptr [rbp + 16]
je LBB2_19
## BB#18: ## in Loop: Header=BB2_16 Depth=1
vpermq ymm1, ymmword ptr [rdx + rsi], 216 ## ymm1 = mem[0,2,1,3]
vmovdqu ymmword ptr [rax + r13 + 224], ymm0
LBB2_19: ## in Loop: Header=BB2_16 Depth=1
add rdi, qword ptr [rsp + 176] ## 8-byte Folded Reload
jne LBB2_16
LBB2_20: ## %_ZN4Simd4Avx213Yuv422pToBgraILb1EEEvPKhmS3_mS3_mmmPhmh.exit
lea rsp, [rbp - 40]
pop rbx
pop r12
pop r13
pop r14
pop r15
pop rbp
vzeroupper
ret
.subsections_via_symbols`
segments2 := []Segment{}
segments2 = append(segments2, Segment{Name: "SimdAvx2Yuv444pToBgra", Start: 13, End: 51})
segments2 = append(segments2, Segment{Name: "SimdAvx2Yuv420pToBgra", Start: 74, End: 111})
segments2 = append(segments2, Segment{Name: "SimdAvx2Yuv422pToBgra", Start: 134, End: 198})
testSegment(t, strings.Split(src2, "\n"), segments2)
src3 := ` .globl __ZN4Simd4Avx214MultiplyAndAddEPfS1_S1_S1_
.align 4, 0x90
__ZN4Simd4Avx214MultiplyAndAddEPfS1_S1_S1_: ## @_ZN4Simd4Avx214MultiplyAndAddEPfS1_S1_S1_
## BB#0:
push rbp
mov rbp, rsp
vmovups ymm0, ymmword ptr [rdi]
vmovups ymm1, ymmword ptr [rsi]
vfmadd213ps ymm1, ymm0, ymmword ptr [rdx]
vmovups ymmword ptr [rcx], ymm1
pop rbp
vzeroupper
ret
.subsections_via_symbols`
segments3 := []Segment{}
segments3 = append(segments3, Segment{Name: "SimdAvx2MultiplyAndAdd", Start: 3, End: 10})
testSegment(t, strings.Split(src3, "\n"), segments3)
src4 := ` .section __TEXT,__text,regular,pure_instructions
.macosx_version_min 10, 11
.intel_syntax noprefix
.globl __Z22MultiplyAndAddConstantPfS_S_
.align 4, 0x90
__Z22MultiplyAndAddConstantPfS_S_: ## @_Z22MultiplyAndAddConstantPfS_S_
## BB#0:
push rbp
mov rbp, rsp
vmovups ymm0, ymmword ptr [rdi]
vmovups ymm1, ymmword ptr [rsi]
vfmadd213ps ymm1, ymm0, ymmword ptr [rip + __ZL1a]
vmovups ymmword ptr [rdx], ymm1
pop rbp
vzeroupper
ret
.section __DATA,__data
.align 5 ## @_ZL1a
__ZL1a:
.long 1065353216 ## float 1.000000e+00
.long 1073741824 ## float 2.000000e+00
.long 1077936128 ## float 3.000000e+00
.long 1082130432 ## float 4.000000e+00
.long 1084227584 ## float 5.000000e+00
.long 1086324736 ## float 6.000000e+00
.long 1088421888 ## float 7.000000e+00
.long 1090519040 ## float 8.000000e+00
`
segments4 := []Segment{}
segments4 = append(segments4, Segment{Name: "MultiplyAndAddConstant", Start: 6, End: 13})
testSegment(t, strings.Split(src4, "\n"), segments4)
}