forked from padster/go-sound
-
Notifications
You must be signed in to change notification settings - Fork 0
/
constantq.go
295 lines (241 loc) · 8.89 KB
/
constantq.go
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
package cq
import (
"fmt"
"math"
"github.com/mjibson/go-dsp/fft"
)
const DEBUG_CQ = false
type ConstantQ struct {
kernel *CQKernel
bigBlockSize int
OutputLatency int
buffers [][]float64
latencies []int
decimators []*Resampler
}
func NewConstantQ(params CQParams) *ConstantQ {
kernel := NewCQKernel(params)
p := kernel.Properties
// Use exact powers of two for resampling rates. They don't have
// to be related to our actual samplerate: the resampler only
// cares about the ratio, but it only accepts integer source and
// target rates, and if we start from the actual samplerate we
// risk getting non-integer rates for lower octaves
sourceRate := unsafeShift(p.octaves)
latencies := make([]int, p.octaves, p.octaves)
decimators := make([]*Resampler, p.octaves, p.octaves)
// top octave, no resampling
latencies[0] = 0
decimators[0] = nil
for i := 1; i < p.octaves; i++ {
factor := unsafeShift(i)
r := NewResampler(sourceRate, sourceRate/factor, 50, 0.05)
if DEBUG_CQ {
fmt.Printf("Forward: octave %d: resample from %v to %v\n", i, sourceRate, sourceRate/factor)
}
// We need to adapt the latencies so as to get the first input
// sample to be aligned, in time, at the decimator output
// across all octaves.
//
// Our decimator uses a linear phase filter, but being causal
// it is not zero phase: it has a latency that depends on the
// decimation factor. Those latencies have been calculated
// per-octave and are available to us in the latencies
// array. Left to its own devices, the first input sample will
// appear at output sample 0 in the highest octave (where no
// decimation is needed), sample number latencies[1] in the
// next octave down, latencies[2] in the next one, etc. We get
// to apply some artificial per-octave latency after the
// decimator in the processing chain, in order to compensate
// for the differing latencies associated with different
// decimation factors. How much should we insert?
//
// The outputs of the decimators are at different rates (in
// terms of the relation between clock time and samples) and
// we want them aligned in terms of time. So, for example, a
// latency of 10 samples with a decimation factor of 2 is
// equivalent to a latency of 20 with no decimation -- they
// both result in the first output sample happening at the
// same equivalent time in milliseconds.
//
// So here we record the latency added by the decimator, in
// terms of the sample rate of the undecimated signal. Then we
// use that to compensate in a moment, when we've discovered
// what the longest latency across all octaves is.
latencies[i] = r.GetLatency() * factor
decimators[i] = r
}
bigBlockSize := p.fftSize * unsafeShift(p.octaves-1)
// Now add in the extra padding and compensate for hops that must
// be dropped in order to align the atom centres across
// octaves. Again this is a bit trickier because we are doing it
// at input rather than output and so must work in per-octave
// sample rates rather than output blocks
emptyHops := p.firstCentre / p.atomSpacing
drops := make([]int, p.octaves, p.octaves)
for i := 0; i < p.octaves; i++ {
factor := unsafeShift(i)
dropHops := emptyHops*unsafeShift(p.octaves-i-1) - emptyHops
drops[i] = ((dropHops * p.fftHop) * factor) / p.atomsPerFrame
}
maxLatPlusDrop := 0
for i := 0; i < p.octaves; i++ {
latPlusDrop := latencies[i] + drops[i]
if latPlusDrop > maxLatPlusDrop {
maxLatPlusDrop = latPlusDrop
}
}
totalLatency := maxLatPlusDrop
lat0 := totalLatency - latencies[0] - drops[0]
totalLatency = roundUp(float64((lat0/p.fftHop)*p.fftHop)) + latencies[0] + drops[0]
// We want (totalLatency - latencies[i]) to be a multiple of 2^i
// for each octave i, so that we do not end up with fractional
// octave latencies below. In theory this is hard, in practice if
// we ensure it for the last octave we should be OK.
finalOctLat := float64(latencies[p.octaves-1])
finalOneFactInt := unsafeShift(p.octaves - 1)
finalOctFact := float64(finalOneFactInt)
totalLatency = int(finalOctLat + finalOctFact*math.Ceil((float64(totalLatency)-finalOctLat)/finalOctFact) + .5)
if DEBUG_CQ {
fmt.Printf("total latency = %v\n", totalLatency)
}
// Padding as in the reference (will be introduced with the
// latency compensation in the loop below)
outputLatency := totalLatency + bigBlockSize - p.firstCentre*unsafeShift(p.octaves-1)
if DEBUG_CQ {
fmt.Printf("bigBlockSize = %v, firstCentre = %v, octaves = %v, so outputLatency = %v\n",
bigBlockSize, p.firstCentre, p.octaves, outputLatency)
}
buffers := make([][]float64, p.octaves, p.octaves)
for i := 0; i < p.octaves; i++ {
factor := unsafeShift(i)
// Calculate the difference between the total latency applied
// across all octaves, and the existing latency due to the
// decimator for this octave, and then convert it back into
// the sample rate appropriate for the output latency of this
// decimator -- including one additional big block of padding
// (as in the reference).
octaveLatency := float64(totalLatency-latencies[i]-drops[i]+bigBlockSize) / float64(factor)
if DEBUG_CQ {
rounded := float64(round(octaveLatency))
fmt.Printf("octave %d: resampler latency = %v, drop = %v, (/factor = %v), octaveLatency = %v -> %v (diff * factor = %v * %v = %v)\n",
i, latencies[i], drops[i], drops[i]/factor, octaveLatency, rounded, octaveLatency-rounded, factor, (octaveLatency-rounded)*float64(factor))
}
sz := int(octaveLatency + 0.5)
buffers[i] = make([]float64, sz, sz)
}
return &ConstantQ{
kernel,
bigBlockSize,
outputLatency,
buffers,
latencies,
decimators,
}
}
func (cq *ConstantQ) ProcessChannel(samples <-chan float64) <-chan []complex128 {
result := make(chan []complex128)
// required := cq.kernel.Properties.fftSize * unsafeShift(cq.octaves - 1)
required := 4096 // HACK - actually figure this out properly.
go func() {
buffer := make([]float64, required, required)
at := 0
for s := range samples {
if at == required {
for _, c := range cq.Process(buffer) {
result <- c
}
at = 0
}
buffer[at] = s
at++
}
for _, c := range cq.Process(buffer[:at]) {
result <- c
}
for _, c := range cq.GetRemainingOutput() {
result <- c
}
close(result)
}()
return result
}
func (cq *ConstantQ) Process(td []float64) [][]complex128 {
apf := cq.kernel.Properties.atomsPerFrame
bpo := cq.kernel.Properties.binsPerOctave
octaves := cq.kernel.Properties.octaves
fftSize := cq.kernel.Properties.fftSize
cq.buffers[0] = append(cq.buffers[0], td...)
for i := 1; i < octaves; i++ {
decimated := cq.decimators[i].Process(td)
cq.buffers[i] = append(cq.buffers[i], decimated...)
}
out := [][]complex128{}
for {
// We could have quite different remaining sample counts in
// different octaves, because (apart from the predictable
// added counts for decimator output on each block) we also
// have variable additional latency per octave
enough := true
for i := 0; i < octaves; i++ {
required := fftSize * unsafeShift(octaves-i-1)
if len(cq.buffers[i]) < required {
enough = false
}
}
if !enough {
break
}
base := len(out)
totalColumns := unsafeShift(octaves-1) * apf
// Pre-fill totalColumns number of empty arrays
out = append(out, make([][]complex128, totalColumns, totalColumns)...)
for octave := 0; octave < octaves; octave++ {
blocksThisOctave := unsafeShift(octaves - octave - 1)
for b := 0; b < blocksThisOctave; b++ {
block := cq.processOctaveBlock(octave)
for j := 0; j < apf; j++ {
target := base + (b*(totalColumns/blocksThisOctave) + (j * ((totalColumns / blocksThisOctave) / apf)))
toAppend := bpo*(octave+1) - len(out[target])
if toAppend > 0 {
out[target] = append(out[target], make([]complex128, toAppend, toAppend)...)
}
for i := 0; i < bpo; i++ {
out[target][bpo*octave+i] = block[j][bpo-i-1]
}
}
}
}
}
return out
}
func (cq *ConstantQ) GetRemainingOutput() [][]complex128 {
// Same as padding added at start, though rounded up
pad := roundUp(float64(cq.OutputLatency)/float64(cq.bigBlockSize)) * cq.bigBlockSize
zeros := make([]float64, pad, pad)
return cq.Process(zeros)
}
func (cq *ConstantQ) processOctaveBlock(octave int) [][]complex128 {
apf := cq.kernel.Properties.atomsPerFrame
bpo := cq.kernel.Properties.binsPerOctave
fftHop := cq.kernel.Properties.fftHop
fftSize := cq.kernel.Properties.fftSize
cv := fft.FFTReal(cq.buffers[octave][:fftSize])
cq.buffers[octave] = cq.buffers[octave][fftHop:]
cqrowvec := cq.kernel.processForward(cv)
// Reform into a column matrix
cqblock := make([][]complex128, apf, apf)
for j := 0; j < apf; j++ {
cqblock[j] = make([]complex128, bpo, bpo)
for i := 0; i < bpo; i++ {
cqblock[j][i] = cqrowvec[i*apf+j]
}
}
return cqblock
}
func (cq *ConstantQ) BinCount() int {
return cq.kernel.BinCount()
}
func (cq *ConstantQ) bpo() int {
return cq.kernel.Properties.binsPerOctave
}