From 71e2d154f62798e057feb7d61dc5847ee87b02db Mon Sep 17 00:00:00 2001 From: "Jeremy R. Gray" Date: Mon, 28 Sep 2015 11:46:04 -0400 Subject: [PATCH] enh: offset voice key, version 0.5 --- psychopy/voicekey/__init__.py | 88 +++++------ psychopy/voicekey/vk_plot.py | 283 ---------------------------------- 2 files changed, 35 insertions(+), 336 deletions(-) delete mode 100644 psychopy/voicekey/vk_plot.py diff --git a/psychopy/voicekey/__init__.py b/psychopy/voicekey/__init__.py index c807dab46d4..85633a80f9f 100644 --- a/psychopy/voicekey/__init__.py +++ b/psychopy/voicekey/__init__.py @@ -5,18 +5,16 @@ Copyright (c) Jeremy R. Gray, 2015 License: Distributed under the terms of the GPLv3 -Version: 0.4 Dev status: beta. Can work well in some circumstances, not widely tested. -Smoother with 64-bit python and pyo. _BaseVoiceKey is the main abstract class. Subclass and override the detect() -method. See SimpleThresholdVoiceKey - -See readme.txt for notes, vk_plot.py for demos and testing. +method. See SimpleThresholdVoiceKey or OnsetVoiceKey for examples. """ from __future__ import division +__version__ = 0.5 + import sys import os import numpy as np @@ -32,8 +30,7 @@ # pyo_server will point to a booted pyo server once pyo_init() is called: pyo_server = None -# Various bits and pieces: -from . signal import _BaseVoiceKeySignal +# helper functions for time, signal processing, and file I/O: from . vk_tools import * # Constants: @@ -207,7 +204,7 @@ def _set_signaler(self): def _set_tables(self): """Set up the pyo tables (allocate memory, etc). - One source -> three pyo tables: chunk=short, whole=all, baseline + One source -> three pyo tables: chunk=short, whole=all, baseline. triggers fill tables from self._source; make triggers in .start() """ sec_per_chunk = self.msPerChunk / 1000. @@ -439,9 +436,10 @@ def wait_for_event(self, plus=0): if naptime > 0: sleep(naptime) self.stop() - # next sleep() helps avoid pyo "ReferenceError: weakly-referenced - # object no longer exists" + # next sleep() helps avoid pyo error: + # "ReferenceError: weakly-referenced object no longer exists" sleep(1.5 * self.msPerChunk / 1000.) + return self.elapsed def save(self, ftype='', dtype='int16'): @@ -515,62 +513,46 @@ def detect(self): self.trip() -class VoicelessPlosiveVoiceKey(_BaseVoiceKey): - """Class to detect and signal the offset of a vowel followed by a voiceless - plosive, e.g. the end of "ah" in utterance "ah pa". +class OffsetVoiceKey(_BaseVoiceKey): + """Class to detect the offset of a single-word utterance. + + Ends the recording after a delay; default = 300ms later. """ - def __init__(self, sec=1.5, msPerChunk=2, file_out='', file_in='', - duration=0.070, proportion=0.7, signaler=None, - start=0, stop=-1, baseline=0): + def __init__(self, sec=10, file_out='', file_in='', delay=0.3, **kwargs): """Adjust parameters `duration` and `proportion` as needed. """ config = {'sec': sec, - 'msPerChunk': msPerChunk, 'file_out': file_out, 'file_in': file_in, - 'duration': duration, # min duration of vowel, in sec - 'proportion': proportion, # min prop of chunks > threshold - 'signaler': signaler, # obj for obj.signal() upon event - 'start': start, - 'stop': stop, - 'baseline': baseline, + 'delay': delay, } - super(VoicelessPlosiveVoiceKey, self).__init__(**config) + kwargs.update(config) + super(OffsetVoiceKey, self).__init__(**kwargs) def detect(self): - """Detect the near-end of the first sustained speech-like sound. - - Called every chunk, so keep it efficient. - - Define multiple conditions. Trip (= trigger the event) if all are met. - - minimum time has elapsed (baseline period) - - have gone above a minimum threshold recently - - met that threshold for some proportion of recent chunks (hold time) - - sound is currently greatly attenuated (trailing edge) + """Wait for onset, offset, delay, then end the recording. """ - if self.event_detected or not self.baseline: return - - thr_norm = 0.03 * self.max_bp # 3% of recent max value; not ensured to be recent - if not hasattr(self, '_hold'): - # compute once, cache - self._hold = -1 * int(self.config['duration'] * 1000. / self.msPerChunk) - self._offset = -1 * int(8. / self.msPerChunk) # ms -> chunks - - vals = self.power_bp[self._hold:self._offset] - max_val = np.max(vals) - prop_over_thr = np.mean(vals > thr_norm) # mean of 0, 1's - loud_enough = max_val > 5 * self.baseline and max_val > 500 - - recent = np.mean(self.power_bp[self._offset:]) - quiet_recently = recent < 2 * thr_norm - - conditions = (prop_over_thr > self.config['proportion'], - loud_enough, - quiet_recently) - if all(conditions): + if not self.event_onset: + window = 5 # chunks + threshold = 10 * self.baseline + conditions = all([x > threshold for x in self.power_bp[-window:]]) + if conditions: + self.event_lag = window * self.msPerChunk / 1000. + self.event_onset = self.elapsed - self.event_lag + self.event_offset = 0 + elif not self.event_offset: + window = 25 + threshold = 10 * self.baseline + conditions = all([x < threshold for x in self.power_bp[-window:]]) + if conditions: + self.event_lag = window * self.msPerChunk / 1000. + self.event_offset = self.elapsed - self.event_lag + self.event_time = self.event_offset # for plotting + elif self.elapsed > self.event_offset + self.config['delay']: self.trip() + self.stop() ### ----- Convenience classes ------------------------------------------------- diff --git a/psychopy/voicekey/vk_plot.py b/psychopy/voicekey/vk_plot.py deleted file mode 100644 index adb338bef37..00000000000 --- a/psychopy/voicekey/vk_plot.py +++ /dev/null @@ -1,283 +0,0 @@ -#!/usr/bin/env python2 -# encoding: utf-8 - -import random -from voicekey import * - -usage = """usage: python {0} [filename] [options] - -If no filename, will record to a new file using the microphone as input. -If given a filename, will use the file as input. - -Options: ---help, -h: print this message and exit ---test: run stability test at different buffersizes and msPerChunk ---10000: run 10000 iterations, summarized the mean & std chunks and event time ---rec: will use the Record class to record using microphone (no real-time analysis) ---tone: will use the Player class to play a tone (no real-time analysis) -""".format(sys.argv[0]) - -##### --- Demo / dev usage ----------------------------------------------- - -def demo_plot(vk, filename='', select=(0, -1)): - """Plot sound data, stats, processing time, and event marker. - - Known limitation: what if file rate differs from vk.rate? - """ - from pylab import (subplot, plot, show, title, axvline, axhline, - autoscale, ylim, fill_between, annotate, figtext) - - msPerChunk = vk.msPerChunk - rms = vk.power_bp - zx = vk.zcross - ts = vk.t_proc - t_on = vk.t_enter - duration = vk.t_exit[-1] - vk.t_enter[0] - mark = vk.event_onset - - # times within a file that were used to start and stop the detection: - t_start, t_stop = select - t_stop = t_start + duration # wait_for_event might trim the recording before it times out - - if not t_stop > t_start: - label = '{0}: event= {1:.3f}s'.format(filename, mark) - elif t_start == 0: - label_ = '{0} [{1:.3f}:{2:.3f}s]: event= {3:.3f}s' - label = label_.format(filename, t_start, t_stop, mark) - else: - label_ = '{0} [{1:.3f}:{2:.3f}s]: event= {3:.3f}s [{4:.3f}s]' - label = label_.format(filename, t_start, t_stop, mark, mark + t_start) - - # compensate for chunk slippage (assumes a constant rate across recording): - ct_ratio = msPerChunk * (len(rms) / duration) - if ct_ratio: - print("{0}; t/c slippage={1:.2f}".format(label, 1000. / ct_ratio)) - - ### ----- Plot ----- - lw = 0.5 # linewidth - - subplot(5, 1, 1) - title(label) - - # mark the begin & end of silent (baseline) period - base_start, base_stop = T_BASELINE_ON, T_BASELINE_OFF - - # raw sound data, zero slippage: - if hasattr(vk, '_wholetable'): - if filename and os.path.isfile(filename): - rate, raw = samples_from_file(filename, t_start, t_stop) - else: - rate = vk.rate - raw = np.array(vk._wholetable.getTable()) - if vk.sec > 12: - plot([0, 1], color='red') - annotate(' cowardly refusing to plot a long recording', (0., 0.8)) - else: - assert rate == vk.rate # file sample rate should be used for detection as well - plot(raw, color='black', linewidth=lw / 2.) - axvline(vk.event_time * rate, color='red', linewidth=2) - axvline(base_start * rate, color='blue', linewidth=lw, alpha=0.3) # start silent period - axvline(base_stop * rate, color='blue', linewidth=lw, alpha=0.3) - annotate('raw data by samples ({0:.0f} Hz)'.format(rate), - (0.02 * len(raw), 0.75 * max(raw))) - autoscale(axis='x', tight=True) - - mark_chunks = vk.event_time * ct_ratio - - # RMS of bandpass filtered data (vk.power_bp) - if rms: - subplot(5, 1, 2) - times = np.linspace(0, len(rms) * msPerChunk, len(rms)) - fill_between(times, rms, alpha=0.4, linewidth=lw) - plot(times, smooth(rms, 5)) - autoscale(axis='x', tight=True) - axvline(base_start * ct_ratio, color='blue', linewidth=lw, alpha=0.3) # start silent period - axvline(base_stop * ct_ratio, color='blue', linewidth=lw, alpha=0.3) - axvline(mark_chunks, color='red', linewidth=2) - annotate('RMS (audio power) by chunk', (0.02 * len(rms), 0.85 * max(rms))) - autoscale(axis='x', tight=True) - - # zero-crossings per ms within bandpass-8k-filtered data - if zx: - subplot(5, 1, 3) - times = np.linspace(0, len(zx) * msPerChunk, len(zx)) - plot(times, zx, linewidth=lw, color='gray') - plot(times, smooth(zx, 15), linewidth=lw*2, color='black') - autoscale(axis='x', tight=True) - axvline(base_start * ct_ratio, color='blue', linewidth=lw, alpha=0.3) # baseline - axvline(base_stop * ct_ratio, color='blue', linewidth=lw, alpha=0.3) - axvline(mark_chunks, color='red', linewidth=2) - base_start_m = int(T_BASELINE_ON * 1000. / msPerChunk) - base_stop_m = int(T_BASELINE_OFF * 1000. / msPerChunk) - hline = np.mean(zx[base_start_m:base_stop_m]) - axhline(hline, color='green') - annotate("zero-crossings per ms", (0.02 * len(zx), 0.85 * min(25, max(zx)))) - autoscale(axis='x', tight=True) - ylim([0, 25]) - - # timing info - if len(ts): - subplot(5, 1, 4) - times = np.linspace(0, len(ts) * msPerChunk, len(ts)) - # proportion of time actually used during processing each chunk - warn = 0.8 # warn if 80% of msPerChunk time is used for processing - axhline(warn, color='red', linewidth=lw) - fill_between(times, ts, alpha=0.5, linewidth=lw, color='green') - autoscale(axis='x', tight=True) - axvline(mark_chunks, color='black', linewidth=1) - annotate('proportion of time used to process a chunk', (0.02 * len(ts), 0.85)) - autoscale(axis='x', tight=True) - ylim([0, 1]) - - # ratio of measured chunk-to-chunk time relative to chunk size - # clock reporting issues can make smoothed data more interpretable - if t_on: - subplot(5, 1, 5) - times = np.linspace(0, len(t_on) * msPerChunk, len(t_on)-1) - tc_ratio = (np.array(t_on[1:]) - np.array(t_on[:-1])) * 1000. / msPerChunk - plot(times, tc_ratio, alpha=0.2, linewidth=lw) - win = 16 - sm_t = smooth(tc_ratio, win=win) - sm_t[:win] = np.ones(win) - sm_t[-win:] = np.ones(win) - if np.mean(sm_t) > 1.12: - color = 'red' - else: - color = 'black' - fill_between(times, 1, sm_t, linewidth=0, color=color, alpha=0.7) - annotate('t/c slippage by chunk, mean={0:.2f}'.format(np.mean(tc_ratio)), (0.02 * len(times), 1.7)) - autoscale(axis='x', tight=True) - ylim([0, 2.5]) - - bits = ('32-bit', '64-bit')[have_pyo64] - footnote = 'chunk= {0} ms, pyo: buffer= {1}, {2}'.format(msPerChunk, pyo_buffer, bits) - figtext(0.99, 0.95, footnote, horizontalalignment='right') - figtext(0.99, 0.01, "baseline: {0:.3f}".format(vk.baseline), horizontalalignment='right') - show() - -def demo_get_signaler(): - if '--u3' in sys.argv: - from voicekey.labjack_vks import LabJackU3VoiceKeySignal - sig = LabJackU3VoiceKeySignal() - else: - from voicekey.demo_vks import DemoVoiceKeySignal - sig = DemoVoiceKeySignal() - print ('Using {0}'.format(sig)) - return sig - -def demo_file_input(msPerChunk, file_in, select=(0, -1), plot=True): - if not os.path.isfile(file_in): - raise IOError('file??') - start, stop = select - vk = OnsetVoiceKey(msPerChunk=msPerChunk, - file_in=file_in, - signaler=demo_get_signaler(), - start=start, - stop=stop, - ) - sleep(random.random()/4) # detection time should be invariant - print('playing: {0}'.format(file_in)) - if plot: - vk.wait_for_event(0.5) - demo_plot(vk, vk.file_in, select=select) - else: - vk.wait_for_event(0.001) # faster return for speedier --10000 testing - assert vk.started # start() called - assert len(vk.t_enter) # do_chunk() called - return len(vk.t_enter), vk.event_onset - -def demo_mic_input(msPerChunk, plot=True): - vk = OnsetVoiceKey(sec=2.5, - msPerChunk=msPerChunk, - file_out='rec.wav', - signaler=demo_get_signaler() - ) - vk.wait_for_event(plus=0.6) # start, wait for event or time out, end 0.6s after event - assert len(vk.t_enter) - if plot: - demo_plot(vk, vk.filename) - -def test_stability(file_in): - # test stability of detection at various buf size + msPerChunk combinations - # Also want to test detection via mic <--> detection from file - # test: play a sound file, while recording - - print('\nfile: {0}'.format(file_in)) - results = {} - sl = {} # slippage - start, stop = 0, -1 - # for testfile in []: - sig = demo_get_signaler() - for rate in [44100]: - for buf in [32]: #[1, 8, 16, 32, 64]: - pyo_init(rate=rate, buffersize=buf) - bandpass_pre_cache(rate=rate) - for msPerChunk in [0.65, 1, 2, 2.4, 2.7]: - ts = [] - slip = [] - for i in range(12): - try: - vk = OnsetVoiceKey( - file_in=file_in, - signaler=sig, - start=start, - stop=stop, - config={'msPerChunk': msPerChunk} - ) - vk.wait_for_event(plus=0.01) - ts.append(round(vk.event_time, 4)) - slip.append(vk.slippage) - except: - t = None - raise - r = (rate, buf, msPerChunk) - results[r] = ts - sl[r] = slip - print("{0:.3f} {1:.4f}, slip {2:.2f} <-- {3}".format(np.mean(results[r]), np.std(results[r]), np.mean(sl[r]), r)) - -if __name__ == '__main__': - """Demo and diagnostics. See --help for usage. - """ - - if '-h' in sys.argv or '--help' in sys.argv: - sys.exit(usage) - - testfile = 'testfile.flac' - if '--test' in sys.argv: - test_stability(testfile) - sys.exit() - - pyo_buffer = 32 - pyo_init(rate=RATE, buffersize=pyo_buffer) - bandpass_pre_cache(rate=RATE) - msPerChunk = 2. - - if '--rec' in sys.argv: - r = Recorder() - r.record() - elif '--tone' in sys.argv: - p = Player(msPerChunk=msPerChunk, source=apodize(tone())) - p.play() - demo_plot(p) - elif '--10000' in sys.argv: - select = (0.4, .7) - results = [] - for i in range(10000): - print i, - try: - r = demo_file_input(msPerChunk, testfile, select, plot=False) - results.append(r) - except KeyboardInterrupt, AssertionError: - break - r, t = zip(*results) - msg = '\n{0} repetitions\nchunks : {1:.2f} {2:.2f} (mean, std)' - print(msg.format(i + 1, np.mean(r), np.std(r))) - msg = 't_event: {0:.4f} {1:.5f} (mean, std)' - print(msg.format(np.mean(t), np.std(t))) - elif len(sys.argv) > 1: - # select a portion of the file by (start, stop) times: - select = (0, -1) # default = whole file - demo_file_input(msPerChunk, sys.argv[1], select) - else: - print('recording: say something!') - demo_mic_input(msPerChunk)