RF: more robust pyaudio-based STVK

jeremygray · Feb 8, 2015 · 9f48f44 · 9f48f44
1 parent 2bf6c92
commit 9f48f44
Showing 1 changed file with 50 additions and 19 deletions.
diff --git a/psychopy/voicekey.py b/psychopy/voicekey.py
@@ -1,10 +1,11 @@
 #!/usr/bin/env python2
 
 """
-PsychoPy basic voice-key, as adapted from OpenSesame.
+PsychoPy-compatible basic voice-key, requires installation of pyaudio
 
-Listens for a sound from the microphone.
-OpenSesame version adapted from:
+Author: Jeremy R. Gray
+
+Adapted from Sebastian Mathot's code, which was based on:
 http://stackoverflow.com/questions/4160175/detect-tap-with-pyaudio-from-live-mic
 """
 
@@ -18,16 +19,22 @@
     logging.error('voice key not available; install pyaudio and try again')
     raise
 
-SHORT_NORMALIZE = 1.0 / 32768
+# use 16-bit signed ints:
 paFORMAT = pyaudio.paInt16
+paFORMAT_NORM = 1.0 / 2**15
+npFORMAT = np.int16
 
 def rms_pa(block_pa):
-    """Get root mean square as a measure of loudness; expects pyaudio block
+    """Get root mean square as a measure of loudness
+
+    expects pyaudio block as input, normalizes to -1 .. +1
     """
-    block_np = np.fromstring(block_pa, dtype=np.int16).astype(dtype=np.float32)
+    block_np = np.fromstring(block_pa, dtype=npFORMAT).astype(dtype=np.float32)
     if len(block_np.shape) > 1:
-        return np.sqrt(np.mean(block_np ** 2, axis=1)) * SHORT_NORMALIZE
-    return np.sqrt(np.mean(block_np ** 2)) * SHORT_NORMALIZE
+        rms = np.sqrt(np.mean(block_np ** 2, axis=1))
+    else:
+        rms = np.sqrt(np.mean(block_np ** 2))
+    return rms * paFORMAT_NORM
 
 class SimpleThresholdVoiceKey(object):
     """Voice key trips when sound loudness goes above a threshold, returns RT.
@@ -37,25 +44,43 @@ class SimpleThresholdVoiceKey(object):
 
     If the timeout expires, the returned time will be 0.0 seconds.
     """
-    def __init__(self, threshold=0.050, channels=2, device=0,
-                 input_block_secs=0.01, timeout=10):
+    def __init__(self, threshold=0.05, channels=1, device=None,
+                 input_block_secs=0.01, timeout=10, log=True):
         self.threshold = threshold
-        self.channels = channels
-        self.device = device
         self.input_block_secs = input_block_secs
         self.timeout = timeout
 
-        # hard-coded
-        self.rate = 44100
-
-    def waitForTrip(self):
+        # get the input device and its expected input rate:
+        pa = pyaudio.PyAudio()
+        devs = []
+        for i in range(pa.get_device_count()):
+            info = pa.get_device_info_by_index(i)
+            if info['maxInputChannels']:
+                devs.append((i, info))
+        if not devs:
+            raise IOError('no sound input devices found')
+        if device is not None:
+            self.device = device
+        else:
+            self.device = devs[0][0]  # device by number
+        self.device_info = devs[self.device][1]
+
+        self.rate = int(self.device_info['defaultSampleRate'])
+        self.input_frames = int(self.rate * self.input_block_secs)
+        self.channels = 1 + int(channels == 2)
+        if log:
+            args = [self.device, self.device_info['name'], self.channels]
+            logging.info('voicekey: input device {0}, {1}, channels={2}'.format(*args))
+            args = [self.rate, self.threshold, self.timeout]
+            logging.info('voicekey: rate {0}Hz, rms threshold {1}, timeout {2}s'.format(*args))
+
+    def wait(self):
         """blocks program execution until microphone input exceeds threshold
 
         If takes longer than timeout, will return 0.000 for the time.
         """
         # Measure the start of the response interval
         start_time = core.getTime()
-        self.input_frames = int(self.rate * self.input_block_secs)
 
         # Open the mic
         self.stream = pyaudio.PyAudio().open(format=paFORMAT,
@@ -70,7 +95,8 @@ def waitForTrip(self):
             try:
                 block_pa = self.stream.read(self.input_frames)
             except IOError as e:
-                logging.error('voice key error: ' + e)
+                logging.error('voice key error: ' + str(e))
+                raise
             loudness = rms_pa(block_pa)
             audioRT = core.getTime() - start_time
             if loudness > self.threshold:
@@ -87,4 +113,9 @@ def waitForTrip(self):
 STVK = SimpleThresholdVoiceKey
 
 if __name__ == '__main__':
-    print STVK().waitForTrip()
+    vk = STVK()
+    p_args = [vk.device, vk.device_info['name'], vk.channels]
+    print('voice-key: input device {0}, {1}, channels={2}'.format(*p_args))
+    p_args = [vk.rate, vk.threshold, vk.timeout]
+    print('           rate {0}Hz, rms threshold {1}, timeout {2}s'.format(*p_args))
+    print vk.wait()