forked from DuanWeiFan/parquet-go
-
Notifications
You must be signed in to change notification settings - Fork 0
/
gather_amd64.go
85 lines (66 loc) · 1.52 KB
/
gather_amd64.go
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
//go:build !purego
package sparse
import (
"golang.org/x/sys/cpu"
)
func gatherBits(dst []byte, src Uint8Array) int {
n := min(len(dst)*8, src.Len())
i := 0
if n >= 8 {
i = (n / 8) * 8
// Make sure `offset` is at least 4 bytes, otherwise VPGATHERDD may read
// data beyond the end of the program memory and trigger a fault.
//
// If the boolean values do not have enough padding we must fallback to
// the scalar algorithm to be able to load single bytes from memory.
if src.off >= 4 && cpu.X86.HasAVX2 {
gatherBitsAVX2(dst, src.Slice(0, i))
} else {
gatherBitsDefault(dst, src.Slice(0, i))
}
}
for i < n {
x := i / 8
y := i % 8
b := src.Index(i)
dst[x] = ((b & 1) << y) | (dst[x] & ^(1 << y))
i++
}
return n
}
func gather32(dst []uint32, src Uint32Array) int {
n := min(len(dst), src.Len())
i := 0
if n >= 16 && cpu.X86.HasAVX2 {
i = (n / 8) * 8
gather32AVX2(dst[:i:i], src)
}
for i < n {
dst[i] = src.Index(i)
i++
}
return n
}
func gather64(dst []uint64, src Uint64Array) int {
n := min(len(dst), src.Len())
i := 0
if n >= 8 && cpu.X86.HasAVX2 {
i = (n / 4) * 4
gather64AVX2(dst[:i:i], src)
}
for i < n {
dst[i] = src.Index(i)
i++
}
return n
}
//go:noescape
func gatherBitsAVX2(dst []byte, src Uint8Array)
//go:noescape
func gatherBitsDefault(dst []byte, src Uint8Array)
//go:noescape
func gather32AVX2(dst []uint32, src Uint32Array)
//go:noescape
func gather64AVX2(dst []uint64, src Uint64Array)
//go:noescape
func gather128(dst [][16]byte, src Uint128Array) int