-
Notifications
You must be signed in to change notification settings - Fork 3.4k
/
transpose_ints_amd64.go
325 lines (285 loc) · 13.4 KB
/
transpose_ints_amd64.go
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
// Code generated by transpose_ints_amd64.go.tmpl. DO NOT EDIT.
// Licensed to the Apache Software Foundation (ASF) under one
// or more contributor license agreements. See the NOTICE file
// distributed with this work for additional information
// regarding copyright ownership. The ASF licenses this file
// to you under the Apache License, Version 2.0 (the
// "License"); you may not use this file except in compliance
// with the License. You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//go:build !noasm
package utils
import (
"golang.org/x/sys/cpu"
)
var (
TransposeInt8Int8 func([]int8, []int8, []int32)
TransposeInt8Uint8 func([]int8, []uint8, []int32)
TransposeInt8Int16 func([]int8, []int16, []int32)
TransposeInt8Uint16 func([]int8, []uint16, []int32)
TransposeInt8Int32 func([]int8, []int32, []int32)
TransposeInt8Uint32 func([]int8, []uint32, []int32)
TransposeInt8Int64 func([]int8, []int64, []int32)
TransposeInt8Uint64 func([]int8, []uint64, []int32)
TransposeUint8Int8 func([]uint8, []int8, []int32)
TransposeUint8Uint8 func([]uint8, []uint8, []int32)
TransposeUint8Int16 func([]uint8, []int16, []int32)
TransposeUint8Uint16 func([]uint8, []uint16, []int32)
TransposeUint8Int32 func([]uint8, []int32, []int32)
TransposeUint8Uint32 func([]uint8, []uint32, []int32)
TransposeUint8Int64 func([]uint8, []int64, []int32)
TransposeUint8Uint64 func([]uint8, []uint64, []int32)
TransposeInt16Int8 func([]int16, []int8, []int32)
TransposeInt16Uint8 func([]int16, []uint8, []int32)
TransposeInt16Int16 func([]int16, []int16, []int32)
TransposeInt16Uint16 func([]int16, []uint16, []int32)
TransposeInt16Int32 func([]int16, []int32, []int32)
TransposeInt16Uint32 func([]int16, []uint32, []int32)
TransposeInt16Int64 func([]int16, []int64, []int32)
TransposeInt16Uint64 func([]int16, []uint64, []int32)
TransposeUint16Int8 func([]uint16, []int8, []int32)
TransposeUint16Uint8 func([]uint16, []uint8, []int32)
TransposeUint16Int16 func([]uint16, []int16, []int32)
TransposeUint16Uint16 func([]uint16, []uint16, []int32)
TransposeUint16Int32 func([]uint16, []int32, []int32)
TransposeUint16Uint32 func([]uint16, []uint32, []int32)
TransposeUint16Int64 func([]uint16, []int64, []int32)
TransposeUint16Uint64 func([]uint16, []uint64, []int32)
TransposeInt32Int8 func([]int32, []int8, []int32)
TransposeInt32Uint8 func([]int32, []uint8, []int32)
TransposeInt32Int16 func([]int32, []int16, []int32)
TransposeInt32Uint16 func([]int32, []uint16, []int32)
TransposeInt32Int32 func([]int32, []int32, []int32)
TransposeInt32Uint32 func([]int32, []uint32, []int32)
TransposeInt32Int64 func([]int32, []int64, []int32)
TransposeInt32Uint64 func([]int32, []uint64, []int32)
TransposeUint32Int8 func([]uint32, []int8, []int32)
TransposeUint32Uint8 func([]uint32, []uint8, []int32)
TransposeUint32Int16 func([]uint32, []int16, []int32)
TransposeUint32Uint16 func([]uint32, []uint16, []int32)
TransposeUint32Int32 func([]uint32, []int32, []int32)
TransposeUint32Uint32 func([]uint32, []uint32, []int32)
TransposeUint32Int64 func([]uint32, []int64, []int32)
TransposeUint32Uint64 func([]uint32, []uint64, []int32)
TransposeInt64Int8 func([]int64, []int8, []int32)
TransposeInt64Uint8 func([]int64, []uint8, []int32)
TransposeInt64Int16 func([]int64, []int16, []int32)
TransposeInt64Uint16 func([]int64, []uint16, []int32)
TransposeInt64Int32 func([]int64, []int32, []int32)
TransposeInt64Uint32 func([]int64, []uint32, []int32)
TransposeInt64Int64 func([]int64, []int64, []int32)
TransposeInt64Uint64 func([]int64, []uint64, []int32)
TransposeUint64Int8 func([]uint64, []int8, []int32)
TransposeUint64Uint8 func([]uint64, []uint8, []int32)
TransposeUint64Int16 func([]uint64, []int16, []int32)
TransposeUint64Uint16 func([]uint64, []uint16, []int32)
TransposeUint64Int32 func([]uint64, []int32, []int32)
TransposeUint64Uint32 func([]uint64, []uint32, []int32)
TransposeUint64Int64 func([]uint64, []int64, []int32)
TransposeUint64Uint64 func([]uint64, []uint64, []int32)
)
func init() {
if cpu.X86.HasAVX2 {
TransposeInt8Int8 = transposeInt8Int8avx2
TransposeInt8Uint8 = transposeInt8Uint8avx2
TransposeInt8Int16 = transposeInt8Int16avx2
TransposeInt8Uint16 = transposeInt8Uint16avx2
TransposeInt8Int32 = transposeInt8Int32avx2
TransposeInt8Uint32 = transposeInt8Uint32avx2
TransposeInt8Int64 = transposeInt8Int64avx2
TransposeInt8Uint64 = transposeInt8Uint64avx2
TransposeUint8Int8 = transposeUint8Int8avx2
TransposeUint8Uint8 = transposeUint8Uint8avx2
TransposeUint8Int16 = transposeUint8Int16avx2
TransposeUint8Uint16 = transposeUint8Uint16avx2
TransposeUint8Int32 = transposeUint8Int32avx2
TransposeUint8Uint32 = transposeUint8Uint32avx2
TransposeUint8Int64 = transposeUint8Int64avx2
TransposeUint8Uint64 = transposeUint8Uint64avx2
TransposeInt16Int8 = transposeInt16Int8avx2
TransposeInt16Uint8 = transposeInt16Uint8avx2
TransposeInt16Int16 = transposeInt16Int16avx2
TransposeInt16Uint16 = transposeInt16Uint16avx2
TransposeInt16Int32 = transposeInt16Int32avx2
TransposeInt16Uint32 = transposeInt16Uint32avx2
TransposeInt16Int64 = transposeInt16Int64avx2
TransposeInt16Uint64 = transposeInt16Uint64avx2
TransposeUint16Int8 = transposeUint16Int8avx2
TransposeUint16Uint8 = transposeUint16Uint8avx2
TransposeUint16Int16 = transposeUint16Int16avx2
TransposeUint16Uint16 = transposeUint16Uint16avx2
TransposeUint16Int32 = transposeUint16Int32avx2
TransposeUint16Uint32 = transposeUint16Uint32avx2
TransposeUint16Int64 = transposeUint16Int64avx2
TransposeUint16Uint64 = transposeUint16Uint64avx2
TransposeInt32Int8 = transposeInt32Int8avx2
TransposeInt32Uint8 = transposeInt32Uint8avx2
TransposeInt32Int16 = transposeInt32Int16avx2
TransposeInt32Uint16 = transposeInt32Uint16avx2
TransposeInt32Int32 = transposeInt32Int32avx2
TransposeInt32Uint32 = transposeInt32Uint32avx2
TransposeInt32Int64 = transposeInt32Int64avx2
TransposeInt32Uint64 = transposeInt32Uint64avx2
TransposeUint32Int8 = transposeUint32Int8avx2
TransposeUint32Uint8 = transposeUint32Uint8avx2
TransposeUint32Int16 = transposeUint32Int16avx2
TransposeUint32Uint16 = transposeUint32Uint16avx2
TransposeUint32Int32 = transposeUint32Int32avx2
TransposeUint32Uint32 = transposeUint32Uint32avx2
TransposeUint32Int64 = transposeUint32Int64avx2
TransposeUint32Uint64 = transposeUint32Uint64avx2
TransposeInt64Int8 = transposeInt64Int8avx2
TransposeInt64Uint8 = transposeInt64Uint8avx2
TransposeInt64Int16 = transposeInt64Int16avx2
TransposeInt64Uint16 = transposeInt64Uint16avx2
TransposeInt64Int32 = transposeInt64Int32avx2
TransposeInt64Uint32 = transposeInt64Uint32avx2
TransposeInt64Int64 = transposeInt64Int64avx2
TransposeInt64Uint64 = transposeInt64Uint64avx2
TransposeUint64Int8 = transposeUint64Int8avx2
TransposeUint64Uint8 = transposeUint64Uint8avx2
TransposeUint64Int16 = transposeUint64Int16avx2
TransposeUint64Uint16 = transposeUint64Uint16avx2
TransposeUint64Int32 = transposeUint64Int32avx2
TransposeUint64Uint32 = transposeUint64Uint32avx2
TransposeUint64Int64 = transposeUint64Int64avx2
TransposeUint64Uint64 = transposeUint64Uint64avx2
} else if cpu.X86.HasSSE42 {
TransposeInt8Int8 = transposeInt8Int8sse4
TransposeInt8Uint8 = transposeInt8Uint8sse4
TransposeInt8Int16 = transposeInt8Int16sse4
TransposeInt8Uint16 = transposeInt8Uint16sse4
TransposeInt8Int32 = transposeInt8Int32sse4
TransposeInt8Uint32 = transposeInt8Uint32sse4
TransposeInt8Int64 = transposeInt8Int64sse4
TransposeInt8Uint64 = transposeInt8Uint64sse4
TransposeUint8Int8 = transposeUint8Int8sse4
TransposeUint8Uint8 = transposeUint8Uint8sse4
TransposeUint8Int16 = transposeUint8Int16sse4
TransposeUint8Uint16 = transposeUint8Uint16sse4
TransposeUint8Int32 = transposeUint8Int32sse4
TransposeUint8Uint32 = transposeUint8Uint32sse4
TransposeUint8Int64 = transposeUint8Int64sse4
TransposeUint8Uint64 = transposeUint8Uint64sse4
TransposeInt16Int8 = transposeInt16Int8sse4
TransposeInt16Uint8 = transposeInt16Uint8sse4
TransposeInt16Int16 = transposeInt16Int16sse4
TransposeInt16Uint16 = transposeInt16Uint16sse4
TransposeInt16Int32 = transposeInt16Int32sse4
TransposeInt16Uint32 = transposeInt16Uint32sse4
TransposeInt16Int64 = transposeInt16Int64sse4
TransposeInt16Uint64 = transposeInt16Uint64sse4
TransposeUint16Int8 = transposeUint16Int8sse4
TransposeUint16Uint8 = transposeUint16Uint8sse4
TransposeUint16Int16 = transposeUint16Int16sse4
TransposeUint16Uint16 = transposeUint16Uint16sse4
TransposeUint16Int32 = transposeUint16Int32sse4
TransposeUint16Uint32 = transposeUint16Uint32sse4
TransposeUint16Int64 = transposeUint16Int64sse4
TransposeUint16Uint64 = transposeUint16Uint64sse4
TransposeInt32Int8 = transposeInt32Int8sse4
TransposeInt32Uint8 = transposeInt32Uint8sse4
TransposeInt32Int16 = transposeInt32Int16sse4
TransposeInt32Uint16 = transposeInt32Uint16sse4
TransposeInt32Int32 = transposeInt32Int32sse4
TransposeInt32Uint32 = transposeInt32Uint32sse4
TransposeInt32Int64 = transposeInt32Int64sse4
TransposeInt32Uint64 = transposeInt32Uint64sse4
TransposeUint32Int8 = transposeUint32Int8sse4
TransposeUint32Uint8 = transposeUint32Uint8sse4
TransposeUint32Int16 = transposeUint32Int16sse4
TransposeUint32Uint16 = transposeUint32Uint16sse4
TransposeUint32Int32 = transposeUint32Int32sse4
TransposeUint32Uint32 = transposeUint32Uint32sse4
TransposeUint32Int64 = transposeUint32Int64sse4
TransposeUint32Uint64 = transposeUint32Uint64sse4
TransposeInt64Int8 = transposeInt64Int8sse4
TransposeInt64Uint8 = transposeInt64Uint8sse4
TransposeInt64Int16 = transposeInt64Int16sse4
TransposeInt64Uint16 = transposeInt64Uint16sse4
TransposeInt64Int32 = transposeInt64Int32sse4
TransposeInt64Uint32 = transposeInt64Uint32sse4
TransposeInt64Int64 = transposeInt64Int64sse4
TransposeInt64Uint64 = transposeInt64Uint64sse4
TransposeUint64Int8 = transposeUint64Int8sse4
TransposeUint64Uint8 = transposeUint64Uint8sse4
TransposeUint64Int16 = transposeUint64Int16sse4
TransposeUint64Uint16 = transposeUint64Uint16sse4
TransposeUint64Int32 = transposeUint64Int32sse4
TransposeUint64Uint32 = transposeUint64Uint32sse4
TransposeUint64Int64 = transposeUint64Int64sse4
TransposeUint64Uint64 = transposeUint64Uint64sse4
} else {
TransposeInt8Int8 = transposeInt8Int8
TransposeInt8Uint8 = transposeInt8Uint8
TransposeInt8Int16 = transposeInt8Int16
TransposeInt8Uint16 = transposeInt8Uint16
TransposeInt8Int32 = transposeInt8Int32
TransposeInt8Uint32 = transposeInt8Uint32
TransposeInt8Int64 = transposeInt8Int64
TransposeInt8Uint64 = transposeInt8Uint64
TransposeUint8Int8 = transposeUint8Int8
TransposeUint8Uint8 = transposeUint8Uint8
TransposeUint8Int16 = transposeUint8Int16
TransposeUint8Uint16 = transposeUint8Uint16
TransposeUint8Int32 = transposeUint8Int32
TransposeUint8Uint32 = transposeUint8Uint32
TransposeUint8Int64 = transposeUint8Int64
TransposeUint8Uint64 = transposeUint8Uint64
TransposeInt16Int8 = transposeInt16Int8
TransposeInt16Uint8 = transposeInt16Uint8
TransposeInt16Int16 = transposeInt16Int16
TransposeInt16Uint16 = transposeInt16Uint16
TransposeInt16Int32 = transposeInt16Int32
TransposeInt16Uint32 = transposeInt16Uint32
TransposeInt16Int64 = transposeInt16Int64
TransposeInt16Uint64 = transposeInt16Uint64
TransposeUint16Int8 = transposeUint16Int8
TransposeUint16Uint8 = transposeUint16Uint8
TransposeUint16Int16 = transposeUint16Int16
TransposeUint16Uint16 = transposeUint16Uint16
TransposeUint16Int32 = transposeUint16Int32
TransposeUint16Uint32 = transposeUint16Uint32
TransposeUint16Int64 = transposeUint16Int64
TransposeUint16Uint64 = transposeUint16Uint64
TransposeInt32Int8 = transposeInt32Int8
TransposeInt32Uint8 = transposeInt32Uint8
TransposeInt32Int16 = transposeInt32Int16
TransposeInt32Uint16 = transposeInt32Uint16
TransposeInt32Int32 = transposeInt32Int32
TransposeInt32Uint32 = transposeInt32Uint32
TransposeInt32Int64 = transposeInt32Int64
TransposeInt32Uint64 = transposeInt32Uint64
TransposeUint32Int8 = transposeUint32Int8
TransposeUint32Uint8 = transposeUint32Uint8
TransposeUint32Int16 = transposeUint32Int16
TransposeUint32Uint16 = transposeUint32Uint16
TransposeUint32Int32 = transposeUint32Int32
TransposeUint32Uint32 = transposeUint32Uint32
TransposeUint32Int64 = transposeUint32Int64
TransposeUint32Uint64 = transposeUint32Uint64
TransposeInt64Int8 = transposeInt64Int8
TransposeInt64Uint8 = transposeInt64Uint8
TransposeInt64Int16 = transposeInt64Int16
TransposeInt64Uint16 = transposeInt64Uint16
TransposeInt64Int32 = transposeInt64Int32
TransposeInt64Uint32 = transposeInt64Uint32
TransposeInt64Int64 = transposeInt64Int64
TransposeInt64Uint64 = transposeInt64Uint64
TransposeUint64Int8 = transposeUint64Int8
TransposeUint64Uint8 = transposeUint64Uint8
TransposeUint64Int16 = transposeUint64Int16
TransposeUint64Uint16 = transposeUint64Uint16
TransposeUint64Int32 = transposeUint64Int32
TransposeUint64Uint32 = transposeUint64Uint32
TransposeUint64Int64 = transposeUint64Int64
TransposeUint64Uint64 = transposeUint64Uint64
}
}