From 71e2d154f62798e057feb7d61dc5847ee87b02db Mon Sep 17 00:00:00 2001
From: "Jeremy R. Gray" <jrgray@gmail.com>
Date: Mon, 28 Sep 2015 11:46:04 -0400
Subject: [PATCH] enh: offset voice key, version 0.5

---
 psychopy/voicekey/__init__.py |  88 +++++------
 psychopy/voicekey/vk_plot.py  | 283 ----------------------------------
 2 files changed, 35 insertions(+), 336 deletions(-)
 delete mode 100644 psychopy/voicekey/vk_plot.py

diff --git a/psychopy/voicekey/__init__.py b/psychopy/voicekey/__init__.py
index c807dab46d4..85633a80f9f 100644
--- a/psychopy/voicekey/__init__.py
+++ b/psychopy/voicekey/__init__.py
@@ -5,18 +5,16 @@
 
 Copyright (c) Jeremy R. Gray, 2015
 License: Distributed under the terms of the GPLv3
-Version: 0.4
 Dev status: beta. Can work well in some circumstances, not widely tested.
-Smoother with 64-bit python and pyo.
 
 _BaseVoiceKey is the main abstract class. Subclass and override the detect()
-method. See SimpleThresholdVoiceKey
-
-See readme.txt for notes, vk_plot.py for demos and testing.
+method. See SimpleThresholdVoiceKey or OnsetVoiceKey for examples.
 """
 
 from __future__ import division
 
+__version__ = 0.5
+
 import sys
 import os
 import numpy as np
@@ -32,8 +30,7 @@
 # pyo_server will point to a booted pyo server once pyo_init() is called:
 pyo_server = None
 
-# Various bits and pieces:
-from . signal import _BaseVoiceKeySignal
+# helper functions for time, signal processing, and file I/O:
 from . vk_tools import *
 
 # Constants:
@@ -207,7 +204,7 @@ def _set_signaler(self):
     def _set_tables(self):
         """Set up the pyo tables (allocate memory, etc).
 
-        One source -> three pyo tables: chunk=short, whole=all, baseline
+        One source -> three pyo tables: chunk=short, whole=all, baseline.
         triggers fill tables from self._source; make triggers in .start()
         """
         sec_per_chunk = self.msPerChunk / 1000.
@@ -439,9 +436,10 @@ def wait_for_event(self, plus=0):
             if naptime > 0:
                 sleep(naptime)
             self.stop()
-        # next sleep() helps avoid pyo "ReferenceError: weakly-referenced
-        #                               object no longer exists"
+        # next sleep() helps avoid pyo error:
+        #    "ReferenceError: weakly-referenced object no longer exists"
         sleep(1.5 * self.msPerChunk / 1000.)
+
         return self.elapsed
 
     def save(self, ftype='', dtype='int16'):
@@ -515,62 +513,46 @@ def detect(self):
             self.trip()
 
 
-class VoicelessPlosiveVoiceKey(_BaseVoiceKey):
-    """Class to detect and signal the offset of a vowel followed by a voiceless
-    plosive, e.g. the end of "ah" in utterance "ah pa".
+class OffsetVoiceKey(_BaseVoiceKey):
+    """Class to detect the offset of a single-word utterance.
+
+    Ends the recording after a delay; default = 300ms later.
     """
-    def __init__(self, sec=1.5, msPerChunk=2, file_out='', file_in='',
-                 duration=0.070, proportion=0.7, signaler=None,
-                 start=0, stop=-1, baseline=0):
+    def __init__(self, sec=10, file_out='', file_in='', delay=0.3, **kwargs):
         """Adjust parameters `duration` and `proportion` as needed.
         """
         config = {'sec': sec,
-                  'msPerChunk': msPerChunk,
                   'file_out': file_out,
                   'file_in': file_in,
-                  'duration': duration,  # min duration of vowel, in sec
-                  'proportion': proportion,  # min prop of chunks > threshold
-                  'signaler': signaler,  # obj for obj.signal() upon event
-                  'start': start,
-                  'stop': stop,
-                  'baseline': baseline,
+                  'delay': delay,
                  }
-        super(VoicelessPlosiveVoiceKey, self).__init__(**config)
+        kwargs.update(config)
+        super(OffsetVoiceKey, self).__init__(**kwargs)
 
     def detect(self):
-        """Detect the near-end of the first sustained speech-like sound.
-
-        Called every chunk, so keep it efficient.
-
-        Define multiple conditions. Trip (= trigger the event) if all are met.
-        - minimum time has elapsed (baseline period)
-        - have gone above a minimum threshold recently
-        - met that threshold for some proportion of recent chunks (hold time)
-        - sound is currently greatly attenuated (trailing edge)
+        """Wait for onset, offset, delay, then end the recording.
         """
-
         if self.event_detected or not self.baseline:
             return
-
-        thr_norm = 0.03 * self.max_bp  # 3% of recent max value; not ensured to be recent
-        if not hasattr(self, '_hold'):
-            # compute once, cache
-            self._hold = -1 * int(self.config['duration'] * 1000. / self.msPerChunk)
-            self._offset = -1 * int(8. / self.msPerChunk)  # ms -> chunks
-
-        vals = self.power_bp[self._hold:self._offset]
-        max_val = np.max(vals)
-        prop_over_thr = np.mean(vals > thr_norm)  # mean of 0, 1's
-        loud_enough = max_val > 5 * self.baseline and max_val > 500
-
-        recent = np.mean(self.power_bp[self._offset:])
-        quiet_recently = recent < 2 * thr_norm
-
-        conditions = (prop_over_thr > self.config['proportion'],
-                      loud_enough,
-                      quiet_recently)
-        if all(conditions):
+        if not self.event_onset:
+            window = 5  # chunks
+            threshold = 10 * self.baseline
+            conditions = all([x > threshold for x in self.power_bp[-window:]])
+            if conditions:
+                self.event_lag = window * self.msPerChunk / 1000.
+                self.event_onset = self.elapsed - self.event_lag
+                self.event_offset = 0
+        elif not self.event_offset:
+            window = 25
+            threshold = 10 * self.baseline
+            conditions = all([x < threshold for x in self.power_bp[-window:]])
+            if conditions:
+                self.event_lag = window * self.msPerChunk / 1000.
+                self.event_offset = self.elapsed - self.event_lag
+                self.event_time = self.event_offset  # for plotting
+        elif self.elapsed > self.event_offset + self.config['delay']:
             self.trip()
+            self.stop()
 
 
 ### ----- Convenience classes -------------------------------------------------
diff --git a/psychopy/voicekey/vk_plot.py b/psychopy/voicekey/vk_plot.py
deleted file mode 100644
index adb338bef37..00000000000
--- a/psychopy/voicekey/vk_plot.py
+++ /dev/null
@@ -1,283 +0,0 @@
-#!/usr/bin/env python2
-# encoding: utf-8
-
-import random
-from voicekey import *
-
-usage = """usage: python {0} [filename] [options]
-
-If no filename, will record to a new file using the microphone as input.
-If given a filename, will use the file as input.
-
-Options:
---help, -h: print this message and exit
---test:     run stability test at different buffersizes and msPerChunk
---10000:    run 10000 iterations, summarized the mean & std chunks and event time
---rec:      will use the Record class to record using microphone (no real-time analysis)
---tone:     will use the Player class to play a tone (no real-time analysis)
-""".format(sys.argv[0])
-
-##### --- Demo / dev usage -----------------------------------------------
-
-def demo_plot(vk, filename='', select=(0, -1)):
-    """Plot sound data, stats, processing time, and event marker.
-
-    Known limitation: what if file rate differs from vk.rate?
-    """
-    from pylab import (subplot, plot, show, title, axvline, axhline,
-                       autoscale, ylim, fill_between, annotate, figtext)
-
-    msPerChunk = vk.msPerChunk
-    rms = vk.power_bp
-    zx = vk.zcross
-    ts = vk.t_proc
-    t_on = vk.t_enter
-    duration = vk.t_exit[-1] - vk.t_enter[0]
-    mark = vk.event_onset
-
-    # times within a file that were used to start and stop the detection:
-    t_start, t_stop = select
-    t_stop = t_start + duration  # wait_for_event might trim the recording before it times out
-
-    if not t_stop > t_start:
-        label = '{0}:  event= {1:.3f}s'.format(filename, mark)
-    elif t_start == 0:
-        label_ = '{0} [{1:.3f}:{2:.3f}s]:  event= {3:.3f}s'
-        label = label_.format(filename, t_start, t_stop, mark)
-    else:
-        label_ = '{0} [{1:.3f}:{2:.3f}s]:  event= {3:.3f}s  [{4:.3f}s]'
-        label = label_.format(filename, t_start, t_stop, mark, mark + t_start)
-
-    # compensate for chunk slippage (assumes a constant rate across recording):
-    ct_ratio = msPerChunk * (len(rms) / duration)
-    if ct_ratio:
-        print("{0}; t/c slippage={1:.2f}".format(label, 1000. / ct_ratio))
-
-    ### ----- Plot -----
-    lw = 0.5  # linewidth
-
-    subplot(5, 1, 1)
-    title(label)
-
-    # mark the begin & end of silent (baseline) period
-    base_start, base_stop = T_BASELINE_ON, T_BASELINE_OFF
-
-    # raw sound data, zero slippage:
-    if hasattr(vk, '_wholetable'):
-        if filename and os.path.isfile(filename):
-            rate, raw = samples_from_file(filename, t_start, t_stop)
-        else:
-            rate = vk.rate
-            raw = np.array(vk._wholetable.getTable())
-        if vk.sec > 12:
-            plot([0, 1], color='red')
-            annotate(' cowardly refusing to plot a long recording', (0., 0.8))
-        else:
-            assert rate == vk.rate  # file sample rate should be used for detection as well
-            plot(raw, color='black', linewidth=lw / 2.)
-            axvline(vk.event_time * rate, color='red', linewidth=2)
-            axvline(base_start * rate, color='blue', linewidth=lw, alpha=0.3)  # start silent period
-            axvline(base_stop * rate, color='blue', linewidth=lw, alpha=0.3)
-            annotate('raw data by samples ({0:.0f} Hz)'.format(rate),
-                     (0.02 * len(raw), 0.75 * max(raw)))
-        autoscale(axis='x', tight=True)
-
-    mark_chunks = vk.event_time * ct_ratio
-
-    # RMS of bandpass filtered data (vk.power_bp)
-    if rms:
-        subplot(5, 1, 2)
-        times = np.linspace(0, len(rms) * msPerChunk, len(rms))
-        fill_between(times, rms, alpha=0.4, linewidth=lw)
-        plot(times, smooth(rms, 5))
-        autoscale(axis='x', tight=True)
-        axvline(base_start * ct_ratio, color='blue', linewidth=lw, alpha=0.3)  # start silent period
-        axvline(base_stop * ct_ratio, color='blue', linewidth=lw, alpha=0.3)
-        axvline(mark_chunks, color='red', linewidth=2)
-        annotate('RMS (audio power) by chunk', (0.02 * len(rms), 0.85 * max(rms)))
-        autoscale(axis='x', tight=True)
-
-    # zero-crossings per ms within bandpass-8k-filtered data
-    if zx:
-        subplot(5, 1, 3)
-        times = np.linspace(0, len(zx) * msPerChunk, len(zx))
-        plot(times, zx, linewidth=lw, color='gray')
-        plot(times, smooth(zx, 15), linewidth=lw*2, color='black')
-        autoscale(axis='x', tight=True)
-        axvline(base_start * ct_ratio, color='blue', linewidth=lw, alpha=0.3)  # baseline
-        axvline(base_stop * ct_ratio, color='blue', linewidth=lw, alpha=0.3)
-        axvline(mark_chunks, color='red', linewidth=2)
-        base_start_m = int(T_BASELINE_ON * 1000. / msPerChunk)
-        base_stop_m = int(T_BASELINE_OFF * 1000. / msPerChunk)
-        hline = np.mean(zx[base_start_m:base_stop_m])
-        axhline(hline, color='green')
-        annotate("zero-crossings per ms", (0.02 * len(zx), 0.85 * min(25, max(zx))))
-        autoscale(axis='x', tight=True)
-        ylim([0, 25])
-
-    # timing info
-    if len(ts):
-        subplot(5, 1, 4)
-        times = np.linspace(0, len(ts) * msPerChunk, len(ts))
-        # proportion of time actually used during processing each chunk
-        warn = 0.8  # warn if 80% of msPerChunk time is used for processing
-        axhline(warn, color='red', linewidth=lw)
-        fill_between(times, ts, alpha=0.5, linewidth=lw, color='green')
-        autoscale(axis='x', tight=True)
-        axvline(mark_chunks, color='black', linewidth=1)
-        annotate('proportion of time used to process a chunk', (0.02 * len(ts), 0.85))
-        autoscale(axis='x', tight=True)
-        ylim([0, 1])
-
-    # ratio of measured chunk-to-chunk time relative to chunk size
-    # clock reporting issues can make smoothed data more interpretable
-    if t_on:
-        subplot(5, 1, 5)
-        times = np.linspace(0, len(t_on) * msPerChunk, len(t_on)-1)
-        tc_ratio = (np.array(t_on[1:]) - np.array(t_on[:-1])) * 1000. / msPerChunk
-        plot(times, tc_ratio, alpha=0.2, linewidth=lw)
-        win = 16
-        sm_t = smooth(tc_ratio, win=win)
-        sm_t[:win] = np.ones(win)
-        sm_t[-win:] = np.ones(win)
-        if np.mean(sm_t) > 1.12:
-            color = 'red'
-        else:
-            color = 'black'
-        fill_between(times, 1, sm_t, linewidth=0, color=color, alpha=0.7)
-        annotate('t/c slippage by chunk, mean={0:.2f}'.format(np.mean(tc_ratio)), (0.02 * len(times), 1.7))
-        autoscale(axis='x', tight=True)
-        ylim([0, 2.5])
-
-    bits = ('32-bit', '64-bit')[have_pyo64]
-    footnote = 'chunk= {0} ms, pyo: buffer= {1}, {2}'.format(msPerChunk, pyo_buffer, bits)
-    figtext(0.99, 0.95, footnote, horizontalalignment='right')
-    figtext(0.99, 0.01, "baseline: {0:.3f}".format(vk.baseline), horizontalalignment='right')
-    show()
-
-def demo_get_signaler():
-    if '--u3' in sys.argv:
-        from voicekey.labjack_vks import LabJackU3VoiceKeySignal
-        sig = LabJackU3VoiceKeySignal()
-    else:
-        from voicekey.demo_vks import DemoVoiceKeySignal
-        sig = DemoVoiceKeySignal()
-    print ('Using {0}'.format(sig))
-    return sig
-
-def demo_file_input(msPerChunk, file_in, select=(0, -1), plot=True):
-    if not os.path.isfile(file_in):
-        raise IOError('file??')
-    start, stop = select
-    vk = OnsetVoiceKey(msPerChunk=msPerChunk,
-                       file_in=file_in,
-                       signaler=demo_get_signaler(),
-                       start=start,
-                       stop=stop,
-                      )
-    sleep(random.random()/4)  # detection time should be invariant
-    print('playing: {0}'.format(file_in))
-    if plot:
-        vk.wait_for_event(0.5)
-        demo_plot(vk, vk.file_in, select=select)
-    else:
-        vk.wait_for_event(0.001)  # faster return for speedier --10000 testing
-        assert vk.started  # start() called
-        assert len(vk.t_enter)  # do_chunk() called
-    return len(vk.t_enter), vk.event_onset
-
-def demo_mic_input(msPerChunk, plot=True):
-    vk = OnsetVoiceKey(sec=2.5,
-                       msPerChunk=msPerChunk,
-                       file_out='rec.wav',
-                       signaler=demo_get_signaler()
-                      )
-    vk.wait_for_event(plus=0.6)  # start, wait for event or time out, end 0.6s after event
-    assert len(vk.t_enter)
-    if plot:
-        demo_plot(vk, vk.filename)
-
-def test_stability(file_in):
-    # test stability of detection at various buf size + msPerChunk combinations
-    # Also want to test detection via mic <--> detection from file
-    # test: play a sound file, while recording
-
-    print('\nfile: {0}'.format(file_in))
-    results = {}
-    sl = {}  # slippage
-    start, stop = 0, -1
-    # for testfile in []:
-    sig = demo_get_signaler()
-    for rate in [44100]:
-        for buf in [32]: #[1, 8, 16, 32, 64]:
-            pyo_init(rate=rate, buffersize=buf)
-            bandpass_pre_cache(rate=rate)
-            for msPerChunk in [0.65, 1, 2, 2.4, 2.7]:
-                ts = []
-                slip = []
-                for i in range(12):
-                    try:
-                        vk = OnsetVoiceKey(
-                                      file_in=file_in,
-                                      signaler=sig,
-                                      start=start,
-                                      stop=stop,
-                                      config={'msPerChunk': msPerChunk}
-                                     )
-                        vk.wait_for_event(plus=0.01)
-                        ts.append(round(vk.event_time, 4))
-                        slip.append(vk.slippage)
-                    except:
-                        t = None
-                        raise
-                r = (rate, buf, msPerChunk)
-                results[r] = ts
-                sl[r] = slip
-                print("{0:.3f} {1:.4f}, slip {2:.2f}  <-- {3}".format(np.mean(results[r]), np.std(results[r]), np.mean(sl[r]), r))
-
-if __name__ == '__main__':
-    """Demo and diagnostics. See --help for usage.
-    """
-
-    if '-h' in sys.argv or '--help' in sys.argv:
-        sys.exit(usage)
-
-    testfile = 'testfile.flac'
-    if '--test' in sys.argv:
-        test_stability(testfile)
-        sys.exit()
-
-    pyo_buffer = 32
-    pyo_init(rate=RATE, buffersize=pyo_buffer)
-    bandpass_pre_cache(rate=RATE)
-    msPerChunk = 2.
-
-    if '--rec' in sys.argv:
-        r = Recorder()
-        r.record()
-    elif '--tone' in sys.argv:
-        p = Player(msPerChunk=msPerChunk, source=apodize(tone()))
-        p.play()
-        demo_plot(p)
-    elif '--10000' in sys.argv:
-        select = (0.4, .7)
-        results = []
-        for i in range(10000):
-            print i,
-            try:
-                r = demo_file_input(msPerChunk, testfile, select, plot=False)
-                results.append(r)
-            except KeyboardInterrupt, AssertionError:
-                break
-        r, t = zip(*results)
-        msg = '\n{0} repetitions\nchunks : {1:.2f} {2:.2f} (mean, std)'
-        print(msg.format(i + 1, np.mean(r), np.std(r)))
-        msg = 't_event: {0:.4f} {1:.5f} (mean, std)'
-        print(msg.format(np.mean(t), np.std(t)))
-    elif len(sys.argv) > 1:
-        # select a portion of the file by (start, stop) times:
-        select = (0, -1)  # default = whole file
-        demo_file_input(msPerChunk, sys.argv[1], select)
-    else:
-        print('recording:  say something!')
-        demo_mic_input(msPerChunk)