forked from ktye/fft
/
convolve.go
173 lines (166 loc) · 5.06 KB
/
convolve.go
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
package gofft
import (
"runtime"
"sync"
)
// Convolve computes the discrete convolution of x and y using FFT.
// Pads x and y to the next power of 2 from len(x)+len(y)-1
func Convolve(x, y []complex128) ([]complex128, error) {
if len(x) == 0 && len(y) == 0 {
return nil, nil
}
n := len(x) + len(y) - 1
N := NextPow2(n)
x = ZeroPad(x, N)
y = ZeroPad(y, N)
err := FastConvolve(x, y)
return x[:n], err
}
// FastConvolve computes the discrete convolution of x and y using FFT
// and stores the result in x, while erasing y (setting it to 0s).
// Since this does no allocations, x and y are assumed to already be 0-padded
// for at least half their length.
func FastConvolve(x, y []complex128) error {
if len(x) == 0 && len(y) == 0 {
return nil
}
if err := checkZero("difference in FastConvolve input vectors length", len(x)-len(y)); err != nil {
return err
}
if err := checkLength("FastConvolve input vector length", len(x)); err != nil {
return err
}
convolve(x, y)
return nil
}
// MultiConvolve computes the discrete convolution of many arrays using a
// hierarchical FFT algorithm that successfully builds up larger convolutions.
// This requires allocating up to 4*N extra memory for appropriate 0-padding
// where N=sum(len(x) for x in X).
// Takes O(N*log(N)^2) run time and O(N) additional space.
//
// This is much slower and takes many more allocations than FastMultiConvolve
// below, but has a smart planner that handles disproportionate array sizes
// very well. If all your arrays are the same length, FastMultiConvolve
// will be much faster.
func MultiConvolve(X ...[]complex128) ([]complex128, error) {
if len(X) == 0 {
return nil, nil
}
arraysByLength := map[int][][]complex128{}
mx := 1
returnLength := 1
for _, x := range X {
// Pad out each array to the next power of two after twice the length
// Doubling the length gives a buffer-zone for convolve to write to
n := NextPow2(2 * len(x))
arraysByLength[n] = append(arraysByLength[n], ZeroPad(x, n))
if n > mx {
mx = n
}
returnLength += len(x) - 1
}
if returnLength <= 0 {
return nil, nil
}
// For each successive power of 2, convolve the entries in pairs up to the
// next power of 2
for i := 1; ; i <<= 1 {
arrays := arraysByLength[i]
if len(arrays) > 0 {
if len(arraysByLength) == 1 {
return multiConvolveSingleLevel(arrays, returnLength)
}
for j := 0; j < len(arrays); j += 2 {
if j+1 < len(arrays) {
// For every pair, convolve to a single array
convolve(arrays[j], arrays[j+1])
}
// Pad out to the next power of 2
arraysByLength[2*i] = append(arraysByLength[2*i], ZeroPad(arrays[j], 2*i))
}
}
// Trigger the garbage collector
arraysByLength[i] = nil
delete(arraysByLength, i)
}
}
func multiConvolveSingleLevel(arrays [][]complex128, returnLength int) ([]complex128, error) {
// If this is the final level, no need for further allocations,
// just convolve together and return
if len(arrays) == 2 {
convolve(arrays[0], arrays[1])
return arrays[0][:returnLength], nil
}
if len(arrays) == 1 {
return arrays[0][:returnLength], nil
}
// If everything has ended up on the same length,
// just use FastMultiConvolve and return
N := len(arrays[0])
n2 := NextPow2(len(arrays))
data := make([]complex128, n2*N)
for j, array := range arrays {
copy(data[N*j:], array)
}
for j := len(arrays); j < n2; j++ {
data[N*j] = 1.0
}
err := FastMultiConvolve(data, N, false)
return data[:returnLength], err
}
// FastMultiConvolve computes the discrete convolution of many arrays using a
// hierarchical FFT algorithm, and stores the result in the first section of
// the input, writing 0s to the remainder of the input
// This does no allocations, so the arrays must first be 0-padded out to the
// next power of 2 from sum of the lengths of the longest two arrays.
// Additionally, the number of arrays must be a power of 2
// X is the concatenated array of arrays, of length N (n*m)
// n is the length of the 0-padded arrays.
// multithread tells the algorithm to use goroutines,
// which can slow things down for small N.
// Takes O(N*log(N)^2) run time and O(1) additional space.
func FastMultiConvolve(X []complex128, n int, multithread bool) error {
if err := checkLength("Convolve single input array", n); err != nil {
return err
}
N := len(X)
if err := checkZero("FastMultiConvolve remainder", N%n); err != nil {
return err
}
if err := checkLength("FastMultiConvolve number of input arrays", N/n); err != nil {
return err
}
for ; n != N; n <<= 1 {
n2 := n << 1
if multithread {
var wg sync.WaitGroup
NumCPU := runtime.NumCPU()
for j := 0; j < NumCPU; j++ {
wg.Add(1)
go func(s, e int) {
defer wg.Done()
for i := s; i < e; i += n2 {
convolve(X[i:i+n], X[i+n:i+n2])
}
}(n2*((j*N/n2)/NumCPU), n2*(((j+1)*N/n2)/NumCPU))
}
wg.Wait()
} else {
for i := 0; i < N; i += n2 {
convolve(X[i:i+n], X[i+n:i+n2])
}
}
}
return nil
}
// convolve does the actual work of convolutions.
func convolve(x, y []complex128) {
fft(x)
fft(y)
for i := 0; i < len(x); i++ {
x[i] *= y[i]
y[i] = 0
}
ifft(x)
}