Skip to content

Commit

Permalink
Support stop with partial recognized speech (#60)
Browse files Browse the repository at this point in the history
  • Loading branch information
compulim committed Aug 18, 2019
1 parent 2e326b4 commit 9bc0772
Show file tree
Hide file tree
Showing 4 changed files with 61 additions and 46 deletions.
1 change: 1 addition & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,7 @@ and this project adheres to [Semantic Versioning](http://semver.org/spec/v2.0.0.
- `*`: Fix [#47](https://github.com/compulim/web-speech-cognitive-services/issues/47), add `enableTelemetry` option for disabling collecting telemetry data in Speech SDK, in PR [#51](https://github.com/compulim/web-speech-cognitive-services/pull/51)
- `*`: Fix [#53](https://github.com/compulim/web-speech-cognitive-services/issues/53), added ESLint, in PR [#54](https://github.com/compulim/web-speech-cognitive-services/pull/54)
- Speech synthesis: Fix [#39](https://github.com/compulim/web-speech-cognitive-services/issues/39), support SSML utterance, in PR [#57](https://github.com/compulim/web-speech-cognitive-services/pull/57)
- Speech recognition: Fix [#59](https://github.com/compulim/web-speech-cognitive-services/issues/59), support `stop()` function by finalizing partial speech, in PR [#60](https://github.com/compulim/web-speech-cognitive-services/pull/60)

### Changed

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -40,7 +40,7 @@ Array [
]
`;

exports[`SpeechRecognition in continuous mode stop after recognized 2 speeches 1`] = `
exports[`SpeechRecognition in continuous mode stop after recognized 1 speech and 1 ongoing 1`] = `
Array [
"cognitiveservices:audioSourceReady",
"webspeech:start",
Expand All @@ -52,9 +52,9 @@ Array [
"webspeech:result ['hello']",
"cognitiveservices:recognized",
"webspeech:result ['Hello.' (isFinal)]",
"cognitiveservices:stop",
"cognitiveservices:recognized",
"webspeech:result ['Hello.' (isFinal), 'World.' (isFinal)]",
"cognitiveservices:stop",
"cognitiveservices:audioSourceOff",
"webspeech:speechend",
"webspeech:soundend",
Expand Down Expand Up @@ -97,6 +97,7 @@ Array [
"webspeech:speechend",
"webspeech:soundend",
"webspeech:audioend",
"webspeech:error { error: 'no-speech' }",
"webspeech:end",
]
`;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -108,6 +108,38 @@ export default ({
return {};
}

let onAudibleChunk;
let muted;

// We modify "attach" function and detect when audible chunk is read.
// We will only modify "attach" function once.
audioConfig.attach = improviseAsync(
audioConfig.attach.bind(audioConfig),
reader => ({
...reader,
read: improviseAsync(
reader.read.bind(reader),
chunk => {
// The magic number 150 is measured by:
// 1. Set microphone volume to 0
// 2. Observe the amplitude (100-110) for the first few chunks
// (This is short static caught when turning on the microphone)
// 3. Set the number a bit higher than the observation

if (averageAmplitude(chunk.buffer) > 150) {
onAudibleChunk && onAudibleChunk();
}

if (muted) {
return { buffer: new ArrayBuffer(0), isEnd: true, timeReceived: Date.now() };
}

return chunk;
}
)
})
);

SpeechRecognizer.enableTelemetry(enableTelemetry);

class SpeechRecognition extends EventTarget {
Expand Down Expand Up @@ -183,34 +215,12 @@ export default ({
let speechStarted;
let stopping;

// We modify "attach" function and detect when the first chunk is read.
recognizer.audioConfig.attach = improviseAsync(
recognizer.audioConfig.attach.bind(recognizer.audioConfig),
reader => {
let firstAudibleChunkEmitted;

return {
...reader,
read: improviseAsync(
reader.read.bind(reader),
chunk => {
// The magic number 150 is measured by:
// 1. Set microphone volume to 0
// 2. Observe the amplitude (100-110) for the first few chunks
// (This is short static caught when turning on the microphone)
// 3. Set the number a bit higher than the observation

if (!firstAudibleChunkEmitted && averageAmplitude(chunk.buffer) > 150) {
queue.push({ firstAudibleChunk: {} });
firstAudibleChunkEmitted = true;
}

return chunk;
}
)
};
}
);
muted = false;

onAudibleChunk = () => {
queue.push({ firstAudibleChunk: {} });
onAudibleChunk = null;
};

const { detach: detachAudioConfigEvent } = recognizer.audioConfig.events.attach(event => {
const { name } = event;
Expand Down Expand Up @@ -346,16 +356,16 @@ export default ({
error: 'aborted',
type: 'error'
};
} else if (finalizedResults.length) {
finalEvent = {
results: finalizedResults,
type: 'result'
};
} else {
// When we set to mute and { isEnd: true }, Speech Services will send us "recognized" event.
muted = true;
}

stopping = true;

await cognitiveServicesAsyncToPromise(recognizer.stopContinuousRecognitionAsync.bind(recognizer))();
if (abort) {
await cognitiveServicesAsyncToPromise(recognizer.stopContinuousRecognitionAsync.bind(recognizer))();
}
} else if (audioSourceReady) {
this.dispatchEvent(new SpeechRecognitionEvent('audiostart'));

Expand Down Expand Up @@ -417,12 +427,12 @@ export default ({
}));
}

if (!this.continuous) {
finalEvent = {
results: finalizedResults,
type: 'result'
};
finalEvent = {
results: finalizedResults,
type: 'result'
};

if (!this.continuous) {
recognizer.stopContinuousRecognitionAsync();
}
} else if (recognizing) {
Expand All @@ -442,6 +452,8 @@ export default ({
}
}

onAudibleChunk = null;

if (speechStarted) {
this.dispatchEvent(new SpeechRecognitionEvent('speechend'));
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -715,6 +715,7 @@ describe('SpeechRecognition', () => {
// webspeech:speechend
// webspeech:soundend
// webspeech:audioend
// webspeech:error { error: 'no-speech' }
// webspeech:end

await endEventEmitted;
Expand Down Expand Up @@ -765,7 +766,7 @@ describe('SpeechRecognition', () => {
expect(toSnapshot(events)).toMatchSnapshot();
});

test('stop after recognized 2 speeches', async () => {
test('stop after recognized 1 speech and 1 ongoing', async () => {
speechRecognition.start();
speechRecognition.continuous = true;
speechRecognition.interimResults = true;
Expand Down Expand Up @@ -796,15 +797,15 @@ describe('SpeechRecognition', () => {
// cognitiveservices:recognized
// webspeech:result ['Hello.' (isFinal)]

speechRecognition.stop();

// cognitiveservices:stop

recognizer.recognized(this, createRecognizedEvent('World.'));

// cognitiveservices:recognized
// webspeech:result ['Hello.' (isFinal), 'World.' (isFinal)]

speechRecognition.stop();

// cognitiveservices:stop

recognizer.audioConfig.emitEvent('AudioSourceOffEvent');

// cognitiveservices:audioSourceOff
Expand Down

0 comments on commit 9bc0772

Please sign in to comment.