/
avx512_vbmi.s
415 lines (413 loc) · 28.7 KB
/
avx512_vbmi.s
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
// Code generated by avx512test. DO NOT EDIT.
#include "../../../../../../runtime/textflag.h"
TEXT asmtest_avx512_vbmi(SB), NOSPLIT, $0
VPERMB X26, X20, K1, X23 // 62825d018dfa
VPERMB X19, X20, K1, X23 // 62a25d018dfb
VPERMB X0, X20, K1, X23 // 62e25d018df8
VPERMB 7(SI)(DI*4), X20, K1, X23 // 62e25d018dbcbe07000000
VPERMB -7(DI)(R8*2), X20, K1, X23 // 62a25d018dbc47f9ffffff
VPERMB X26, X2, K1, X23 // 62826d098dfa
VPERMB X19, X2, K1, X23 // 62a26d098dfb
VPERMB X0, X2, K1, X23 // 62e26d098df8
VPERMB 7(SI)(DI*4), X2, K1, X23 // 62e26d098dbcbe07000000
VPERMB -7(DI)(R8*2), X2, K1, X23 // 62a26d098dbc47f9ffffff
VPERMB X26, X9, K1, X23 // 628235098dfa
VPERMB X19, X9, K1, X23 // 62a235098dfb
VPERMB X0, X9, K1, X23 // 62e235098df8
VPERMB 7(SI)(DI*4), X9, K1, X23 // 62e235098dbcbe07000000
VPERMB -7(DI)(R8*2), X9, K1, X23 // 62a235098dbc47f9ffffff
VPERMB X26, X20, K1, X30 // 62025d018df2
VPERMB X19, X20, K1, X30 // 62225d018df3
VPERMB X0, X20, K1, X30 // 62625d018df0
VPERMB 7(SI)(DI*4), X20, K1, X30 // 62625d018db4be07000000
VPERMB -7(DI)(R8*2), X20, K1, X30 // 62225d018db447f9ffffff
VPERMB X26, X2, K1, X30 // 62026d098df2
VPERMB X19, X2, K1, X30 // 62226d098df3
VPERMB X0, X2, K1, X30 // 62626d098df0
VPERMB 7(SI)(DI*4), X2, K1, X30 // 62626d098db4be07000000
VPERMB -7(DI)(R8*2), X2, K1, X30 // 62226d098db447f9ffffff
VPERMB X26, X9, K1, X30 // 620235098df2
VPERMB X19, X9, K1, X30 // 622235098df3
VPERMB X0, X9, K1, X30 // 626235098df0
VPERMB 7(SI)(DI*4), X9, K1, X30 // 626235098db4be07000000
VPERMB -7(DI)(R8*2), X9, K1, X30 // 622235098db447f9ffffff
VPERMB X26, X20, K1, X8 // 62125d018dc2
VPERMB X19, X20, K1, X8 // 62325d018dc3
VPERMB X0, X20, K1, X8 // 62725d018dc0
VPERMB 7(SI)(DI*4), X20, K1, X8 // 62725d018d84be07000000
VPERMB -7(DI)(R8*2), X20, K1, X8 // 62325d018d8447f9ffffff
VPERMB X26, X2, K1, X8 // 62126d098dc2
VPERMB X19, X2, K1, X8 // 62326d098dc3
VPERMB X0, X2, K1, X8 // 62726d098dc0
VPERMB 7(SI)(DI*4), X2, K1, X8 // 62726d098d84be07000000
VPERMB -7(DI)(R8*2), X2, K1, X8 // 62326d098d8447f9ffffff
VPERMB X26, X9, K1, X8 // 621235098dc2
VPERMB X19, X9, K1, X8 // 623235098dc3
VPERMB X0, X9, K1, X8 // 627235098dc0
VPERMB 7(SI)(DI*4), X9, K1, X8 // 627235098d84be07000000
VPERMB -7(DI)(R8*2), X9, K1, X8 // 623235098d8447f9ffffff
VPERMB Y5, Y31, K7, Y22 // 62e205278df5
VPERMB Y19, Y31, K7, Y22 // 62a205278df3
VPERMB Y31, Y31, K7, Y22 // 628205278df7
VPERMB 17(SP)(BP*1), Y31, K7, Y22 // 62e205278db42c11000000
VPERMB -7(CX)(DX*8), Y31, K7, Y22 // 62e205278db4d1f9ffffff
VPERMB Y5, Y5, K7, Y22 // 62e2552f8df5
VPERMB Y19, Y5, K7, Y22 // 62a2552f8df3
VPERMB Y31, Y5, K7, Y22 // 6282552f8df7
VPERMB 17(SP)(BP*1), Y5, K7, Y22 // 62e2552f8db42c11000000
VPERMB -7(CX)(DX*8), Y5, K7, Y22 // 62e2552f8db4d1f9ffffff
VPERMB Y5, Y0, K7, Y22 // 62e27d2f8df5
VPERMB Y19, Y0, K7, Y22 // 62a27d2f8df3
VPERMB Y31, Y0, K7, Y22 // 62827d2f8df7
VPERMB 17(SP)(BP*1), Y0, K7, Y22 // 62e27d2f8db42c11000000
VPERMB -7(CX)(DX*8), Y0, K7, Y22 // 62e27d2f8db4d1f9ffffff
VPERMB Y5, Y31, K7, Y9 // 627205278dcd
VPERMB Y19, Y31, K7, Y9 // 623205278dcb
VPERMB Y31, Y31, K7, Y9 // 621205278dcf
VPERMB 17(SP)(BP*1), Y31, K7, Y9 // 627205278d8c2c11000000
VPERMB -7(CX)(DX*8), Y31, K7, Y9 // 627205278d8cd1f9ffffff
VPERMB Y5, Y5, K7, Y9 // 6272552f8dcd
VPERMB Y19, Y5, K7, Y9 // 6232552f8dcb
VPERMB Y31, Y5, K7, Y9 // 6212552f8dcf
VPERMB 17(SP)(BP*1), Y5, K7, Y9 // 6272552f8d8c2c11000000
VPERMB -7(CX)(DX*8), Y5, K7, Y9 // 6272552f8d8cd1f9ffffff
VPERMB Y5, Y0, K7, Y9 // 62727d2f8dcd
VPERMB Y19, Y0, K7, Y9 // 62327d2f8dcb
VPERMB Y31, Y0, K7, Y9 // 62127d2f8dcf
VPERMB 17(SP)(BP*1), Y0, K7, Y9 // 62727d2f8d8c2c11000000
VPERMB -7(CX)(DX*8), Y0, K7, Y9 // 62727d2f8d8cd1f9ffffff
VPERMB Y5, Y31, K7, Y23 // 62e205278dfd
VPERMB Y19, Y31, K7, Y23 // 62a205278dfb
VPERMB Y31, Y31, K7, Y23 // 628205278dff
VPERMB 17(SP)(BP*1), Y31, K7, Y23 // 62e205278dbc2c11000000
VPERMB -7(CX)(DX*8), Y31, K7, Y23 // 62e205278dbcd1f9ffffff
VPERMB Y5, Y5, K7, Y23 // 62e2552f8dfd
VPERMB Y19, Y5, K7, Y23 // 62a2552f8dfb
VPERMB Y31, Y5, K7, Y23 // 6282552f8dff
VPERMB 17(SP)(BP*1), Y5, K7, Y23 // 62e2552f8dbc2c11000000
VPERMB -7(CX)(DX*8), Y5, K7, Y23 // 62e2552f8dbcd1f9ffffff
VPERMB Y5, Y0, K7, Y23 // 62e27d2f8dfd
VPERMB Y19, Y0, K7, Y23 // 62a27d2f8dfb
VPERMB Y31, Y0, K7, Y23 // 62827d2f8dff
VPERMB 17(SP)(BP*1), Y0, K7, Y23 // 62e27d2f8dbc2c11000000
VPERMB -7(CX)(DX*8), Y0, K7, Y23 // 62e27d2f8dbcd1f9ffffff
VPERMB Z3, Z8, K1, Z3 // 62f23d498ddb
VPERMB Z27, Z8, K1, Z3 // 62923d498ddb
VPERMB 7(AX), Z8, K1, Z3 // 62f23d498d9807000000
VPERMB (DI), Z8, K1, Z3 // 62f23d498d1f
VPERMB Z3, Z2, K1, Z3 // 62f26d498ddb
VPERMB Z27, Z2, K1, Z3 // 62926d498ddb
VPERMB 7(AX), Z2, K1, Z3 // 62f26d498d9807000000
VPERMB (DI), Z2, K1, Z3 // 62f26d498d1f
VPERMB Z3, Z8, K1, Z21 // 62e23d498deb
VPERMB Z27, Z8, K1, Z21 // 62823d498deb
VPERMB 7(AX), Z8, K1, Z21 // 62e23d498da807000000
VPERMB (DI), Z8, K1, Z21 // 62e23d498d2f
VPERMB Z3, Z2, K1, Z21 // 62e26d498deb
VPERMB Z27, Z2, K1, Z21 // 62826d498deb
VPERMB 7(AX), Z2, K1, Z21 // 62e26d498da807000000
VPERMB (DI), Z2, K1, Z21 // 62e26d498d2f
VPERMI2B X15, X8, K7, X31 // 62423d0f75ff
VPERMI2B X0, X8, K7, X31 // 62623d0f75f8
VPERMI2B X16, X8, K7, X31 // 62223d0f75f8
VPERMI2B 17(SP), X8, K7, X31 // 62623d0f75bc2411000000
VPERMI2B -17(BP)(SI*4), X8, K7, X31 // 62623d0f75bcb5efffffff
VPERMI2B X15, X1, K7, X31 // 6242750f75ff
VPERMI2B X0, X1, K7, X31 // 6262750f75f8
VPERMI2B X16, X1, K7, X31 // 6222750f75f8
VPERMI2B 17(SP), X1, K7, X31 // 6262750f75bc2411000000
VPERMI2B -17(BP)(SI*4), X1, K7, X31 // 6262750f75bcb5efffffff
VPERMI2B X15, X0, K7, X31 // 62427d0f75ff
VPERMI2B X0, X0, K7, X31 // 62627d0f75f8
VPERMI2B X16, X0, K7, X31 // 62227d0f75f8
VPERMI2B 17(SP), X0, K7, X31 // 62627d0f75bc2411000000
VPERMI2B -17(BP)(SI*4), X0, K7, X31 // 62627d0f75bcb5efffffff
VPERMI2B X15, X8, K7, X16 // 62c23d0f75c7
VPERMI2B X0, X8, K7, X16 // 62e23d0f75c0
VPERMI2B X16, X8, K7, X16 // 62a23d0f75c0
VPERMI2B 17(SP), X8, K7, X16 // 62e23d0f75842411000000
VPERMI2B -17(BP)(SI*4), X8, K7, X16 // 62e23d0f7584b5efffffff
VPERMI2B X15, X1, K7, X16 // 62c2750f75c7
VPERMI2B X0, X1, K7, X16 // 62e2750f75c0
VPERMI2B X16, X1, K7, X16 // 62a2750f75c0
VPERMI2B 17(SP), X1, K7, X16 // 62e2750f75842411000000
VPERMI2B -17(BP)(SI*4), X1, K7, X16 // 62e2750f7584b5efffffff
VPERMI2B X15, X0, K7, X16 // 62c27d0f75c7
VPERMI2B X0, X0, K7, X16 // 62e27d0f75c0
VPERMI2B X16, X0, K7, X16 // 62a27d0f75c0
VPERMI2B 17(SP), X0, K7, X16 // 62e27d0f75842411000000
VPERMI2B -17(BP)(SI*4), X0, K7, X16 // 62e27d0f7584b5efffffff
VPERMI2B X15, X8, K7, X7 // 62d23d0f75ff
VPERMI2B X0, X8, K7, X7 // 62f23d0f75f8
VPERMI2B X16, X8, K7, X7 // 62b23d0f75f8
VPERMI2B 17(SP), X8, K7, X7 // 62f23d0f75bc2411000000
VPERMI2B -17(BP)(SI*4), X8, K7, X7 // 62f23d0f75bcb5efffffff
VPERMI2B X15, X1, K7, X7 // 62d2750f75ff
VPERMI2B X0, X1, K7, X7 // 62f2750f75f8
VPERMI2B X16, X1, K7, X7 // 62b2750f75f8
VPERMI2B 17(SP), X1, K7, X7 // 62f2750f75bc2411000000
VPERMI2B -17(BP)(SI*4), X1, K7, X7 // 62f2750f75bcb5efffffff
VPERMI2B X15, X0, K7, X7 // 62d27d0f75ff
VPERMI2B X0, X0, K7, X7 // 62f27d0f75f8
VPERMI2B X16, X0, K7, X7 // 62b27d0f75f8
VPERMI2B 17(SP), X0, K7, X7 // 62f27d0f75bc2411000000
VPERMI2B -17(BP)(SI*4), X0, K7, X7 // 62f27d0f75bcb5efffffff
VPERMI2B Y18, Y15, K2, Y2 // 62b2052a75d2
VPERMI2B Y24, Y15, K2, Y2 // 6292052a75d0
VPERMI2B Y9, Y15, K2, Y2 // 62d2052a75d1
VPERMI2B 15(R8)(R14*1), Y15, K2, Y2 // 6292052a7594300f000000
VPERMI2B 15(R8)(R14*2), Y15, K2, Y2 // 6292052a7594700f000000
VPERMI2B Y18, Y22, K2, Y2 // 62b24d2275d2
VPERMI2B Y24, Y22, K2, Y2 // 62924d2275d0
VPERMI2B Y9, Y22, K2, Y2 // 62d24d2275d1
VPERMI2B 15(R8)(R14*1), Y22, K2, Y2 // 62924d227594300f000000
VPERMI2B 15(R8)(R14*2), Y22, K2, Y2 // 62924d227594700f000000
VPERMI2B Y18, Y20, K2, Y2 // 62b25d2275d2
VPERMI2B Y24, Y20, K2, Y2 // 62925d2275d0
VPERMI2B Y9, Y20, K2, Y2 // 62d25d2275d1
VPERMI2B 15(R8)(R14*1), Y20, K2, Y2 // 62925d227594300f000000
VPERMI2B 15(R8)(R14*2), Y20, K2, Y2 // 62925d227594700f000000
VPERMI2B Y18, Y15, K2, Y13 // 6232052a75ea
VPERMI2B Y24, Y15, K2, Y13 // 6212052a75e8
VPERMI2B Y9, Y15, K2, Y13 // 6252052a75e9
VPERMI2B 15(R8)(R14*1), Y15, K2, Y13 // 6212052a75ac300f000000
VPERMI2B 15(R8)(R14*2), Y15, K2, Y13 // 6212052a75ac700f000000
VPERMI2B Y18, Y22, K2, Y13 // 62324d2275ea
VPERMI2B Y24, Y22, K2, Y13 // 62124d2275e8
VPERMI2B Y9, Y22, K2, Y13 // 62524d2275e9
VPERMI2B 15(R8)(R14*1), Y22, K2, Y13 // 62124d2275ac300f000000
VPERMI2B 15(R8)(R14*2), Y22, K2, Y13 // 62124d2275ac700f000000
VPERMI2B Y18, Y20, K2, Y13 // 62325d2275ea
VPERMI2B Y24, Y20, K2, Y13 // 62125d2275e8
VPERMI2B Y9, Y20, K2, Y13 // 62525d2275e9
VPERMI2B 15(R8)(R14*1), Y20, K2, Y13 // 62125d2275ac300f000000
VPERMI2B 15(R8)(R14*2), Y20, K2, Y13 // 62125d2275ac700f000000
VPERMI2B Y18, Y15, K2, Y27 // 6222052a75da
VPERMI2B Y24, Y15, K2, Y27 // 6202052a75d8
VPERMI2B Y9, Y15, K2, Y27 // 6242052a75d9
VPERMI2B 15(R8)(R14*1), Y15, K2, Y27 // 6202052a759c300f000000
VPERMI2B 15(R8)(R14*2), Y15, K2, Y27 // 6202052a759c700f000000
VPERMI2B Y18, Y22, K2, Y27 // 62224d2275da
VPERMI2B Y24, Y22, K2, Y27 // 62024d2275d8
VPERMI2B Y9, Y22, K2, Y27 // 62424d2275d9
VPERMI2B 15(R8)(R14*1), Y22, K2, Y27 // 62024d22759c300f000000
VPERMI2B 15(R8)(R14*2), Y22, K2, Y27 // 62024d22759c700f000000
VPERMI2B Y18, Y20, K2, Y27 // 62225d2275da
VPERMI2B Y24, Y20, K2, Y27 // 62025d2275d8
VPERMI2B Y9, Y20, K2, Y27 // 62425d2275d9
VPERMI2B 15(R8)(R14*1), Y20, K2, Y27 // 62025d22759c300f000000
VPERMI2B 15(R8)(R14*2), Y20, K2, Y27 // 62025d22759c700f000000
VPERMI2B Z12, Z9, K4, Z3 // 62d2354c75dc
VPERMI2B Z22, Z9, K4, Z3 // 62b2354c75de
VPERMI2B -17(BP)(SI*8), Z9, K4, Z3 // 62f2354c759cf5efffffff
VPERMI2B (R15), Z9, K4, Z3 // 62d2354c751f
VPERMI2B Z12, Z19, K4, Z3 // 62d2654475dc
VPERMI2B Z22, Z19, K4, Z3 // 62b2654475de
VPERMI2B -17(BP)(SI*8), Z19, K4, Z3 // 62f26544759cf5efffffff
VPERMI2B (R15), Z19, K4, Z3 // 62d26544751f
VPERMI2B Z12, Z9, K4, Z30 // 6242354c75f4
VPERMI2B Z22, Z9, K4, Z30 // 6222354c75f6
VPERMI2B -17(BP)(SI*8), Z9, K4, Z30 // 6262354c75b4f5efffffff
VPERMI2B (R15), Z9, K4, Z30 // 6242354c7537
VPERMI2B Z12, Z19, K4, Z30 // 6242654475f4
VPERMI2B Z22, Z19, K4, Z30 // 6222654475f6
VPERMI2B -17(BP)(SI*8), Z19, K4, Z30 // 6262654475b4f5efffffff
VPERMI2B (R15), Z19, K4, Z30 // 624265447537
VPERMT2B X2, X0, K7, X20 // 62e27d0f7de2
VPERMT2B X8, X0, K7, X20 // 62c27d0f7de0
VPERMT2B X9, X0, K7, X20 // 62c27d0f7de1
VPERMT2B (BX), X0, K7, X20 // 62e27d0f7d23
VPERMT2B -17(BP)(SI*1), X0, K7, X20 // 62e27d0f7da435efffffff
VPERMT2B X2, X9, K7, X20 // 62e2350f7de2
VPERMT2B X8, X9, K7, X20 // 62c2350f7de0
VPERMT2B X9, X9, K7, X20 // 62c2350f7de1
VPERMT2B (BX), X9, K7, X20 // 62e2350f7d23
VPERMT2B -17(BP)(SI*1), X9, K7, X20 // 62e2350f7da435efffffff
VPERMT2B X2, X13, K7, X20 // 62e2150f7de2
VPERMT2B X8, X13, K7, X20 // 62c2150f7de0
VPERMT2B X9, X13, K7, X20 // 62c2150f7de1
VPERMT2B (BX), X13, K7, X20 // 62e2150f7d23
VPERMT2B -17(BP)(SI*1), X13, K7, X20 // 62e2150f7da435efffffff
VPERMT2B X2, X0, K7, X5 // 62f27d0f7dea
VPERMT2B X8, X0, K7, X5 // 62d27d0f7de8
VPERMT2B X9, X0, K7, X5 // 62d27d0f7de9
VPERMT2B (BX), X0, K7, X5 // 62f27d0f7d2b
VPERMT2B -17(BP)(SI*1), X0, K7, X5 // 62f27d0f7dac35efffffff
VPERMT2B X2, X9, K7, X5 // 62f2350f7dea
VPERMT2B X8, X9, K7, X5 // 62d2350f7de8
VPERMT2B X9, X9, K7, X5 // 62d2350f7de9
VPERMT2B (BX), X9, K7, X5 // 62f2350f7d2b
VPERMT2B -17(BP)(SI*1), X9, K7, X5 // 62f2350f7dac35efffffff
VPERMT2B X2, X13, K7, X5 // 62f2150f7dea
VPERMT2B X8, X13, K7, X5 // 62d2150f7de8
VPERMT2B X9, X13, K7, X5 // 62d2150f7de9
VPERMT2B (BX), X13, K7, X5 // 62f2150f7d2b
VPERMT2B -17(BP)(SI*1), X13, K7, X5 // 62f2150f7dac35efffffff
VPERMT2B X2, X0, K7, X25 // 62627d0f7dca
VPERMT2B X8, X0, K7, X25 // 62427d0f7dc8
VPERMT2B X9, X0, K7, X25 // 62427d0f7dc9
VPERMT2B (BX), X0, K7, X25 // 62627d0f7d0b
VPERMT2B -17(BP)(SI*1), X0, K7, X25 // 62627d0f7d8c35efffffff
VPERMT2B X2, X9, K7, X25 // 6262350f7dca
VPERMT2B X8, X9, K7, X25 // 6242350f7dc8
VPERMT2B X9, X9, K7, X25 // 6242350f7dc9
VPERMT2B (BX), X9, K7, X25 // 6262350f7d0b
VPERMT2B -17(BP)(SI*1), X9, K7, X25 // 6262350f7d8c35efffffff
VPERMT2B X2, X13, K7, X25 // 6262150f7dca
VPERMT2B X8, X13, K7, X25 // 6242150f7dc8
VPERMT2B X9, X13, K7, X25 // 6242150f7dc9
VPERMT2B (BX), X13, K7, X25 // 6262150f7d0b
VPERMT2B -17(BP)(SI*1), X13, K7, X25 // 6262150f7d8c35efffffff
VPERMT2B Y14, Y2, K6, Y18 // 62c26d2e7dd6
VPERMT2B Y8, Y2, K6, Y18 // 62c26d2e7dd0
VPERMT2B Y20, Y2, K6, Y18 // 62a26d2e7dd4
VPERMT2B 7(SI)(DI*4), Y2, K6, Y18 // 62e26d2e7d94be07000000
VPERMT2B -7(DI)(R8*2), Y2, K6, Y18 // 62a26d2e7d9447f9ffffff
VPERMT2B Y14, Y7, K6, Y18 // 62c2452e7dd6
VPERMT2B Y8, Y7, K6, Y18 // 62c2452e7dd0
VPERMT2B Y20, Y7, K6, Y18 // 62a2452e7dd4
VPERMT2B 7(SI)(DI*4), Y7, K6, Y18 // 62e2452e7d94be07000000
VPERMT2B -7(DI)(R8*2), Y7, K6, Y18 // 62a2452e7d9447f9ffffff
VPERMT2B Y14, Y21, K6, Y18 // 62c255267dd6
VPERMT2B Y8, Y21, K6, Y18 // 62c255267dd0
VPERMT2B Y20, Y21, K6, Y18 // 62a255267dd4
VPERMT2B 7(SI)(DI*4), Y21, K6, Y18 // 62e255267d94be07000000
VPERMT2B -7(DI)(R8*2), Y21, K6, Y18 // 62a255267d9447f9ffffff
VPERMT2B Y14, Y2, K6, Y3 // 62d26d2e7dde
VPERMT2B Y8, Y2, K6, Y3 // 62d26d2e7dd8
VPERMT2B Y20, Y2, K6, Y3 // 62b26d2e7ddc
VPERMT2B 7(SI)(DI*4), Y2, K6, Y3 // 62f26d2e7d9cbe07000000
VPERMT2B -7(DI)(R8*2), Y2, K6, Y3 // 62b26d2e7d9c47f9ffffff
VPERMT2B Y14, Y7, K6, Y3 // 62d2452e7dde
VPERMT2B Y8, Y7, K6, Y3 // 62d2452e7dd8
VPERMT2B Y20, Y7, K6, Y3 // 62b2452e7ddc
VPERMT2B 7(SI)(DI*4), Y7, K6, Y3 // 62f2452e7d9cbe07000000
VPERMT2B -7(DI)(R8*2), Y7, K6, Y3 // 62b2452e7d9c47f9ffffff
VPERMT2B Y14, Y21, K6, Y3 // 62d255267dde
VPERMT2B Y8, Y21, K6, Y3 // 62d255267dd8
VPERMT2B Y20, Y21, K6, Y3 // 62b255267ddc
VPERMT2B 7(SI)(DI*4), Y21, K6, Y3 // 62f255267d9cbe07000000
VPERMT2B -7(DI)(R8*2), Y21, K6, Y3 // 62b255267d9c47f9ffffff
VPERMT2B Y14, Y2, K6, Y24 // 62426d2e7dc6
VPERMT2B Y8, Y2, K6, Y24 // 62426d2e7dc0
VPERMT2B Y20, Y2, K6, Y24 // 62226d2e7dc4
VPERMT2B 7(SI)(DI*4), Y2, K6, Y24 // 62626d2e7d84be07000000
VPERMT2B -7(DI)(R8*2), Y2, K6, Y24 // 62226d2e7d8447f9ffffff
VPERMT2B Y14, Y7, K6, Y24 // 6242452e7dc6
VPERMT2B Y8, Y7, K6, Y24 // 6242452e7dc0
VPERMT2B Y20, Y7, K6, Y24 // 6222452e7dc4
VPERMT2B 7(SI)(DI*4), Y7, K6, Y24 // 6262452e7d84be07000000
VPERMT2B -7(DI)(R8*2), Y7, K6, Y24 // 6222452e7d8447f9ffffff
VPERMT2B Y14, Y21, K6, Y24 // 624255267dc6
VPERMT2B Y8, Y21, K6, Y24 // 624255267dc0
VPERMT2B Y20, Y21, K6, Y24 // 622255267dc4
VPERMT2B 7(SI)(DI*4), Y21, K6, Y24 // 626255267d84be07000000
VPERMT2B -7(DI)(R8*2), Y21, K6, Y24 // 622255267d8447f9ffffff
VPERMT2B Z20, Z1, K3, Z6 // 62b2754b7df4
VPERMT2B Z9, Z1, K3, Z6 // 62d2754b7df1
VPERMT2B (CX), Z1, K3, Z6 // 62f2754b7d31
VPERMT2B 99(R15), Z1, K3, Z6 // 62d2754b7db763000000
VPERMT2B Z20, Z9, K3, Z6 // 62b2354b7df4
VPERMT2B Z9, Z9, K3, Z6 // 62d2354b7df1
VPERMT2B (CX), Z9, K3, Z6 // 62f2354b7d31
VPERMT2B 99(R15), Z9, K3, Z6 // 62d2354b7db763000000
VPERMT2B Z20, Z1, K3, Z9 // 6232754b7dcc
VPERMT2B Z9, Z1, K3, Z9 // 6252754b7dc9
VPERMT2B (CX), Z1, K3, Z9 // 6272754b7d09
VPERMT2B 99(R15), Z1, K3, Z9 // 6252754b7d8f63000000
VPERMT2B Z20, Z9, K3, Z9 // 6232354b7dcc
VPERMT2B Z9, Z9, K3, Z9 // 6252354b7dc9
VPERMT2B (CX), Z9, K3, Z9 // 6272354b7d09
VPERMT2B 99(R15), Z9, K3, Z9 // 6252354b7d8f63000000
VPMULTISHIFTQB X9, X24, K5, X7 // 62d2bd0583f9
VPMULTISHIFTQB X7, X24, K5, X7 // 62f2bd0583ff
VPMULTISHIFTQB X14, X24, K5, X7 // 62d2bd0583fe
VPMULTISHIFTQB 17(SP)(BP*1), X24, K5, X7 // 62f2bd0583bc2c11000000
VPMULTISHIFTQB -7(CX)(DX*8), X24, K5, X7 // 62f2bd0583bcd1f9ffffff
VPMULTISHIFTQB X9, X20, K5, X7 // 62d2dd0583f9
VPMULTISHIFTQB X7, X20, K5, X7 // 62f2dd0583ff
VPMULTISHIFTQB X14, X20, K5, X7 // 62d2dd0583fe
VPMULTISHIFTQB 17(SP)(BP*1), X20, K5, X7 // 62f2dd0583bc2c11000000
VPMULTISHIFTQB -7(CX)(DX*8), X20, K5, X7 // 62f2dd0583bcd1f9ffffff
VPMULTISHIFTQB X9, X7, K5, X7 // 62d2c50d83f9
VPMULTISHIFTQB X7, X7, K5, X7 // 62f2c50d83ff
VPMULTISHIFTQB X14, X7, K5, X7 // 62d2c50d83fe
VPMULTISHIFTQB 17(SP)(BP*1), X7, K5, X7 // 62f2c50d83bc2c11000000
VPMULTISHIFTQB -7(CX)(DX*8), X7, K5, X7 // 62f2c50d83bcd1f9ffffff
VPMULTISHIFTQB X9, X24, K5, X0 // 62d2bd0583c1
VPMULTISHIFTQB X7, X24, K5, X0 // 62f2bd0583c7
VPMULTISHIFTQB X14, X24, K5, X0 // 62d2bd0583c6
VPMULTISHIFTQB 17(SP)(BP*1), X24, K5, X0 // 62f2bd0583842c11000000
VPMULTISHIFTQB -7(CX)(DX*8), X24, K5, X0 // 62f2bd058384d1f9ffffff
VPMULTISHIFTQB X9, X20, K5, X0 // 62d2dd0583c1
VPMULTISHIFTQB X7, X20, K5, X0 // 62f2dd0583c7
VPMULTISHIFTQB X14, X20, K5, X0 // 62d2dd0583c6
VPMULTISHIFTQB 17(SP)(BP*1), X20, K5, X0 // 62f2dd0583842c11000000
VPMULTISHIFTQB -7(CX)(DX*8), X20, K5, X0 // 62f2dd058384d1f9ffffff
VPMULTISHIFTQB X9, X7, K5, X0 // 62d2c50d83c1
VPMULTISHIFTQB X7, X7, K5, X0 // 62f2c50d83c7
VPMULTISHIFTQB X14, X7, K5, X0 // 62d2c50d83c6
VPMULTISHIFTQB 17(SP)(BP*1), X7, K5, X0 // 62f2c50d83842c11000000
VPMULTISHIFTQB -7(CX)(DX*8), X7, K5, X0 // 62f2c50d8384d1f9ffffff
VPMULTISHIFTQB Y16, Y30, K7, Y12 // 62328d2783e0
VPMULTISHIFTQB Y1, Y30, K7, Y12 // 62728d2783e1
VPMULTISHIFTQB Y30, Y30, K7, Y12 // 62128d2783e6
VPMULTISHIFTQB 17(SP)(BP*2), Y30, K7, Y12 // 62728d2783a46c11000000
VPMULTISHIFTQB -7(DI)(R8*4), Y30, K7, Y12 // 62328d2783a487f9ffffff
VPMULTISHIFTQB Y16, Y26, K7, Y12 // 6232ad2783e0
VPMULTISHIFTQB Y1, Y26, K7, Y12 // 6272ad2783e1
VPMULTISHIFTQB Y30, Y26, K7, Y12 // 6212ad2783e6
VPMULTISHIFTQB 17(SP)(BP*2), Y26, K7, Y12 // 6272ad2783a46c11000000
VPMULTISHIFTQB -7(DI)(R8*4), Y26, K7, Y12 // 6232ad2783a487f9ffffff
VPMULTISHIFTQB Y16, Y7, K7, Y12 // 6232c52f83e0
VPMULTISHIFTQB Y1, Y7, K7, Y12 // 6272c52f83e1
VPMULTISHIFTQB Y30, Y7, K7, Y12 // 6212c52f83e6
VPMULTISHIFTQB 17(SP)(BP*2), Y7, K7, Y12 // 6272c52f83a46c11000000
VPMULTISHIFTQB -7(DI)(R8*4), Y7, K7, Y12 // 6232c52f83a487f9ffffff
VPMULTISHIFTQB Y16, Y30, K7, Y21 // 62a28d2783e8
VPMULTISHIFTQB Y1, Y30, K7, Y21 // 62e28d2783e9
VPMULTISHIFTQB Y30, Y30, K7, Y21 // 62828d2783ee
VPMULTISHIFTQB 17(SP)(BP*2), Y30, K7, Y21 // 62e28d2783ac6c11000000
VPMULTISHIFTQB -7(DI)(R8*4), Y30, K7, Y21 // 62a28d2783ac87f9ffffff
VPMULTISHIFTQB Y16, Y26, K7, Y21 // 62a2ad2783e8
VPMULTISHIFTQB Y1, Y26, K7, Y21 // 62e2ad2783e9
VPMULTISHIFTQB Y30, Y26, K7, Y21 // 6282ad2783ee
VPMULTISHIFTQB 17(SP)(BP*2), Y26, K7, Y21 // 62e2ad2783ac6c11000000
VPMULTISHIFTQB -7(DI)(R8*4), Y26, K7, Y21 // 62a2ad2783ac87f9ffffff
VPMULTISHIFTQB Y16, Y7, K7, Y21 // 62a2c52f83e8
VPMULTISHIFTQB Y1, Y7, K7, Y21 // 62e2c52f83e9
VPMULTISHIFTQB Y30, Y7, K7, Y21 // 6282c52f83ee
VPMULTISHIFTQB 17(SP)(BP*2), Y7, K7, Y21 // 62e2c52f83ac6c11000000
VPMULTISHIFTQB -7(DI)(R8*4), Y7, K7, Y21 // 62a2c52f83ac87f9ffffff
VPMULTISHIFTQB Y16, Y30, K7, Y14 // 62328d2783f0
VPMULTISHIFTQB Y1, Y30, K7, Y14 // 62728d2783f1
VPMULTISHIFTQB Y30, Y30, K7, Y14 // 62128d2783f6
VPMULTISHIFTQB 17(SP)(BP*2), Y30, K7, Y14 // 62728d2783b46c11000000
VPMULTISHIFTQB -7(DI)(R8*4), Y30, K7, Y14 // 62328d2783b487f9ffffff
VPMULTISHIFTQB Y16, Y26, K7, Y14 // 6232ad2783f0
VPMULTISHIFTQB Y1, Y26, K7, Y14 // 6272ad2783f1
VPMULTISHIFTQB Y30, Y26, K7, Y14 // 6212ad2783f6
VPMULTISHIFTQB 17(SP)(BP*2), Y26, K7, Y14 // 6272ad2783b46c11000000
VPMULTISHIFTQB -7(DI)(R8*4), Y26, K7, Y14 // 6232ad2783b487f9ffffff
VPMULTISHIFTQB Y16, Y7, K7, Y14 // 6232c52f83f0
VPMULTISHIFTQB Y1, Y7, K7, Y14 // 6272c52f83f1
VPMULTISHIFTQB Y30, Y7, K7, Y14 // 6212c52f83f6
VPMULTISHIFTQB 17(SP)(BP*2), Y7, K7, Y14 // 6272c52f83b46c11000000
VPMULTISHIFTQB -7(DI)(R8*4), Y7, K7, Y14 // 6232c52f83b487f9ffffff
VPMULTISHIFTQB Z7, Z2, K7, Z18 // 62e2ed4f83d7
VPMULTISHIFTQB Z13, Z2, K7, Z18 // 62c2ed4f83d5
VPMULTISHIFTQB 7(AX)(CX*4), Z2, K7, Z18 // 62e2ed4f83948807000000
VPMULTISHIFTQB 7(AX)(CX*1), Z2, K7, Z18 // 62e2ed4f83940807000000
VPMULTISHIFTQB Z7, Z21, K7, Z18 // 62e2d54783d7
VPMULTISHIFTQB Z13, Z21, K7, Z18 // 62c2d54783d5
VPMULTISHIFTQB 7(AX)(CX*4), Z21, K7, Z18 // 62e2d54783948807000000
VPMULTISHIFTQB 7(AX)(CX*1), Z21, K7, Z18 // 62e2d54783940807000000
VPMULTISHIFTQB Z7, Z2, K7, Z24 // 6262ed4f83c7
VPMULTISHIFTQB Z13, Z2, K7, Z24 // 6242ed4f83c5
VPMULTISHIFTQB 7(AX)(CX*4), Z2, K7, Z24 // 6262ed4f83848807000000
VPMULTISHIFTQB 7(AX)(CX*1), Z2, K7, Z24 // 6262ed4f83840807000000
VPMULTISHIFTQB Z7, Z21, K7, Z24 // 6262d54783c7
VPMULTISHIFTQB Z13, Z21, K7, Z24 // 6242d54783c5
VPMULTISHIFTQB 7(AX)(CX*4), Z21, K7, Z24 // 6262d54783848807000000
VPMULTISHIFTQB 7(AX)(CX*1), Z21, K7, Z24 // 6262d54783840807000000
RET