/
Calibrate.hs
66 lines (47 loc) · 2.22 KB
/
Calibrate.hs
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
module Calibrate where
import Data.WAVE
import Data.Complex
import Data.List
import Numeric.FFT
import Util
import ExtProg
-- Calibrated or uncalibrated voice: calibrated means we know the fundamental frequency
-- for pitch 50, uncalibrated means we use the given voice name and text to get it.
type Calibration = Either (String, String, String) Double
-- Determine the fundamental frequency for a voice at the given pitch. Get the spectrum,
-- smoothen it and get the first peak.
voiceFundamental :: String -> String -> Int -> String -> IO (Maybe Double)
voiceFundamental exec text pitch voice = do
p <- voiceSpectrumPitchText exec text pitch voice
let smpp = filter ((> 0.05) . snd) $ smoothen 3 p
lmpp = locMax smpp
fund = case lmpp of
[] -> Nothing
_ -> Just $ fst $ head lmpp
return fund
-- The most general case: set text, calibration pitch, and voice name.
voiceSpectrumPitchText :: String -> String -> Int -> String -> IO [(Double, Double)]
voiceSpectrumPitchText exec text pitch voice = do
spd <- findSpeed exec 120 voice pitch 0.25 text (1, 300) >>= return . fst
wav <- runEspeak exec 120 voice pitch spd text >>= getSample
let fft = filter ((< 2000) . fst) $ waveFFT wav
return fft
-- Run a FFT on a signal given WAVE, returning pairs of frequency-normalized magnitude
-- sorted by decreasing magnitude
waveFFT :: WAVE -> [(Double, Double)]
waveFFT wav =
let sampldbl = map (sampleToDouble . head) $ waveSamples wav
sampcmplx = map (:+ 0) sampldbl
freqdom = map magnitude $ fft sampcmplx
rate = waveFrameRate $ waveHeader wav
freqs = map (\i -> fromIntegral (i * fromIntegral rate) / fromIntegral (length freqdom))
[0 .. length freqdom - 1]
spectrum = zip freqs (normalize freqdom)
sortDescSnd = sortBy $ \(x1, y1) (x2, y2) -> compare y2 y1
in sortDescSnd spectrum
-- Find a minimal range of speed that approximates the given time in seconds
findSpeed :: String -> Int -> String -> Int -> Rational -> String -> (Int, Int) -> IO (Int, Int)
findSpeed exec ampl voice pitch tsec utter (lo, hi) =
rangeSearch 1 (lo, hi) $ \mid -> do
wav <- runEspeak exec ampl voice pitch mid utter >>= getSample
return $ compare (soundLength wav) tsec