From fed2ff76fe8bda02a79f90150c2a1e6e7cc5d97c Mon Sep 17 00:00:00 2001 From: sowens-csd Date: Thu, 7 Dec 2023 12:18:20 -0500 Subject: [PATCH] feat: fix for mobile browser duplicates --- speech_to_text/example/pubspec.lock | 28 ++++---- speech_to_text/lib/balanced_alternates.dart | 64 ++++++++++++++----- speech_to_text/lib/speech_to_text.dart | 5 ++ speech_to_text/lib/speech_to_text_web.dart | 13 ++-- speech_to_text/pubspec.lock | 26 ++++---- .../test/balanced_alternates_test.dart | 61 ++++++++++++++++-- 6 files changed, 138 insertions(+), 59 deletions(-) diff --git a/speech_to_text/example/pubspec.lock b/speech_to_text/example/pubspec.lock index 755cfd83..1cb298ad 100644 --- a/speech_to_text/example/pubspec.lock +++ b/speech_to_text/example/pubspec.lock @@ -37,10 +37,10 @@ packages: dependency: transitive description: name: collection - sha256: f092b211a4319e98e5ff58223576de6c2803db36221657b46c82574721240687 + sha256: ee67cb0715911d28db6bf4af1026078bd6f0128b07a5f66fb2ed94ec6783c09a url: "https://pub.dev" source: hosted - version: "1.17.2" + version: "1.18.0" fake_async: dependency: transitive description: @@ -116,10 +116,10 @@ packages: dependency: transitive description: name: meta - sha256: "3c74dbf8763d36539f114c799d8a2d87343b5067e9d796ca22b5eb8437090ee3" + sha256: a6e590c838b18133bb482a2745ad77c5bb7715fb0451209e1a7567d416678b8e url: "https://pub.dev" source: hosted - version: "1.9.1" + version: "1.10.0" nested: dependency: transitive description: @@ -179,7 +179,7 @@ packages: path: ".." relative: true source: path - version: "6.4.0" + version: "6.4.1" speech_to_text_macos: dependency: transitive description: @@ -200,18 +200,18 @@ packages: dependency: transitive description: name: stack_trace - sha256: c3c7d8edb15bee7f0f74debd4b9c5f3c2ea86766fe4178eb2a18eb30a0bdaed5 + sha256: "73713990125a6d93122541237550ee3352a2d84baad52d375a4cad2eb9b7ce0b" url: "https://pub.dev" source: hosted - version: "1.11.0" + version: "1.11.1" stream_channel: dependency: transitive description: name: stream_channel - sha256: "83615bee9045c1d322bbbd1ba209b7a749c2cbcdcb3fdd1df8eb488b3279c1c8" + sha256: ba2aa5d8cc609d96bbb2899c28934f9e1af5cddbd60a827822ea467161eb54e7 url: "https://pub.dev" source: hosted - version: "2.1.1" + version: "2.1.2" string_scanner: dependency: transitive description: @@ -232,10 +232,10 @@ packages: dependency: transitive description: name: test_api - sha256: "75760ffd7786fffdfb9597c35c5b27eaeec82be8edfb6d71d32651128ed7aab8" + sha256: "5c2f730018264d276c20e4f1503fd1308dfbbae39ec8ee63c5236311ac06954b" url: "https://pub.dev" source: hosted - version: "0.6.0" + version: "0.6.1" vector_math: dependency: transitive description: @@ -248,10 +248,10 @@ packages: dependency: transitive description: name: web - sha256: dc8ccd225a2005c1be616fe02951e2e342092edf968cf0844220383757ef8f10 + sha256: afe077240a270dcfd2aafe77602b4113645af95d0ad31128cc02bce5ac5d5152 url: "https://pub.dev" source: hosted - version: "0.1.4-beta" + version: "0.3.0" sdks: - dart: ">=3.1.0-185.0.dev <4.0.0" + dart: ">=3.2.0-194.0.dev <4.0.0" flutter: ">=3.10.0" diff --git a/speech_to_text/lib/balanced_alternates.dart b/speech_to_text/lib/balanced_alternates.dart index a3351b88..7df77e7d 100644 --- a/speech_to_text/lib/balanced_alternates.dart +++ b/speech_to_text/lib/balanced_alternates.dart @@ -1,10 +1,20 @@ import 'dart:math'; import 'package:speech_to_text/speech_recognition_result.dart'; +import 'package:speech_to_text_platform_interface/speech_to_text_platform_interface.dart'; class BalancedAlternates { final Map> _alternates = {}; + static bool isAggregateResultsEnabled(List? options) { + if (null == options) return true; + final any = options.any((option) => + option.platform == 'web' && + option.name == 'aggregate' && + option.value == false); + return !any; + } + /// Add a new phrase to a particular alternate. The way this works is /// that the first alternate is the most likely, the second alternate is /// the second most likely, etc. The first alternate is the one that @@ -26,29 +36,51 @@ class BalancedAlternates { /// phrase that is missing an alternate has that alternate filled in with the /// previous alternate. This is done so that the result is a complete /// transcript of all the alternates. - List getAlternates() { + List getAlternates(bool aggregateResults) { final phraseCount = _alternates.length; var result = []; final maxAlternates = _alternates.values .fold(0, (max, list) => max = list.length > max ? list.length : max); - for (var phraseIndex = 0; phraseIndex < phraseCount; ++phraseIndex) { - final phraseAlternates = _alternates[phraseIndex] ?? []; - for (var altIndex = max(1, phraseAlternates.length); - altIndex < maxAlternates; - ++altIndex) { - phraseAlternates.add(phraseAlternates[altIndex - 1]); - } - } + print( + 'Speech recognition alternates: $maxAlternates, phrases: $phraseCount'); - for (var altCount = 0; altCount < maxAlternates; ++altCount) { - var alternatePhrase = ''; - var alternateConfidence = 1.0; + if (aggregateResults) { for (var phraseIndex = 0; phraseIndex < phraseCount; ++phraseIndex) { - alternatePhrase += _alternates[phraseIndex]![altCount].recognizedWords; - alternateConfidence = min(alternateConfidence, - _alternates[phraseIndex]![altCount].confidence); + final phraseAlternates = _alternates[phraseIndex] ?? []; + for (var altIndex = max(1, phraseAlternates.length); + altIndex < maxAlternates; + ++altIndex) { + phraseAlternates.add(phraseAlternates[altIndex - 1]); + } + } + for (var altCount = 0; altCount < maxAlternates; ++altCount) { + var alternatePhrase = ''; + var alternateConfidence = 1.0; + for (var phraseIndex = 0; phraseIndex < phraseCount; ++phraseIndex) { + alternatePhrase += + _alternates[phraseIndex]![altCount].recognizedWords; + alternateConfidence = min(alternateConfidence, + _alternates[phraseIndex]![altCount].confidence); + } + result + .add(SpeechRecognitionWords(alternatePhrase, alternateConfidence)); + } + } else { + for (var phraseIndex = phraseCount - 1; phraseIndex >= 0; --phraseIndex) { + if ((_alternates[phraseIndex]?[0].recognizedWords.trim() ?? '') + .isEmpty) { + continue; + } + for (var altIndex = 0; + altIndex < _alternates[phraseIndex]!.length; + ++altIndex) { + result.add(_alternates[phraseIndex]![altIndex]); + } + // result.add(SpeechRecognitionWords( + // _alternates[phraseIndex]![0].recognizedWords, + // _alternates[phraseIndex]![0].confidence)); + break; } - result.add(SpeechRecognitionWords(alternatePhrase, alternateConfidence)); } return result; } diff --git a/speech_to_text/lib/speech_to_text.dart b/speech_to_text/lib/speech_to_text.dart index c30ef053..35866fe3 100644 --- a/speech_to_text/lib/speech_to_text.dart +++ b/speech_to_text/lib/speech_to_text.dart @@ -154,6 +154,11 @@ class SpeechToText { static final SpeechConfigOption iosNoBluetooth = SpeechConfigOption('ios', 'noBluetooth', true); + /// This option does nothing yet, may disable Bluetooth on iOS if there is + /// a need. + static final SpeechConfigOption webDoNotAggregate = + SpeechConfigOption('web', 'aggregate', false); + static final SpeechToText _instance = SpeechToText.withMethodChannel(); bool _initWorked = false; diff --git a/speech_to_text/lib/speech_to_text_web.dart b/speech_to_text/lib/speech_to_text_web.dart index 65009aae..d5318417 100644 --- a/speech_to_text/lib/speech_to_text_web.dart +++ b/speech_to_text/lib/speech_to_text_web.dart @@ -18,6 +18,7 @@ class SpeechToTextPlugin extends SpeechToTextPlatform { static const _doneNoResult = 'doneNoResult'; bool _resultSent = false; bool _doneSent = false; + bool _aggregateResults = true; /// Registers this class as the default instance of [SpeechToTextPlatform]. static void registerWith(Registrar registrar) { @@ -60,6 +61,8 @@ class SpeechToTextPlugin extends SpeechToTextPlatform { try { _webSpeech = html.SpeechRecognition(); if (null != _webSpeech) { + _aggregateResults = + BalancedAlternates.isAggregateResultsEnabled(options); _webSpeech!.onError.listen((error) => _onError(error)); _webSpeech!.onStart.listen((startEvent) => _onSpeechStart(startEvent)); _webSpeech!.onSpeechStart @@ -219,19 +222,11 @@ class SpeechToTextPlugin extends SpeechToTextPlatform { num? confidence = js_util.getProperty(alt, 'confidence'); if (null != transcript) { balanced.add(resultIndex, transcript, confidence?.toDouble() ?? 1.0); - // final fullTranscript = - // recogResults[altIndex].recognizedWords + transcript; - // final fullConfidence = min( - // recogResults[altIndex].confidence, confidence?.toDouble() ?? 1.0); - // recogResults[altIndex] = - // SpeechRecognitionWords(fullTranscript, fullConfidence.toDouble()); - // recogResults - // .add(SpeechRecognitionWords(transcript, confidence.toDouble())); } } ++resultIndex; } - recogResults = balanced.getAlternates(); + recogResults = balanced.getAlternates(_aggregateResults); var result = SpeechRecognitionResult(recogResults, isFinal); onTextRecognition?.call(jsonEncode(result.toJson())); _resultSent = true; diff --git a/speech_to_text/pubspec.lock b/speech_to_text/pubspec.lock index e63bf124..0010ee98 100644 --- a/speech_to_text/pubspec.lock +++ b/speech_to_text/pubspec.lock @@ -141,10 +141,10 @@ packages: dependency: transitive description: name: collection - sha256: f092b211a4319e98e5ff58223576de6c2803db36221657b46c82574721240687 + sha256: ee67cb0715911d28db6bf4af1026078bd6f0128b07a5f66fb2ed94ec6783c09a url: "https://pub.dev" source: hosted - version: "1.17.2" + version: "1.18.0" convert: dependency: transitive description: @@ -324,10 +324,10 @@ packages: dependency: "direct main" description: name: meta - sha256: "3c74dbf8763d36539f114c799d8a2d87343b5067e9d796ca22b5eb8437090ee3" + sha256: a6e590c838b18133bb482a2745ad77c5bb7715fb0451209e1a7567d416678b8e url: "https://pub.dev" source: hosted - version: "1.9.1" + version: "1.10.0" mime: dependency: transitive description: @@ -465,18 +465,18 @@ packages: dependency: transitive description: name: stack_trace - sha256: c3c7d8edb15bee7f0f74debd4b9c5f3c2ea86766fe4178eb2a18eb30a0bdaed5 + sha256: "73713990125a6d93122541237550ee3352a2d84baad52d375a4cad2eb9b7ce0b" url: "https://pub.dev" source: hosted - version: "1.11.0" + version: "1.11.1" stream_channel: dependency: transitive description: name: stream_channel - sha256: "83615bee9045c1d322bbbd1ba209b7a749c2cbcdcb3fdd1df8eb488b3279c1c8" + sha256: ba2aa5d8cc609d96bbb2899c28934f9e1af5cddbd60a827822ea467161eb54e7 url: "https://pub.dev" source: hosted - version: "2.1.1" + version: "2.1.2" stream_transform: dependency: transitive description: @@ -505,10 +505,10 @@ packages: dependency: transitive description: name: test_api - sha256: "75760ffd7786fffdfb9597c35c5b27eaeec82be8edfb6d71d32651128ed7aab8" + sha256: "5c2f730018264d276c20e4f1503fd1308dfbbae39ec8ee63c5236311ac06954b" url: "https://pub.dev" source: hosted - version: "0.6.0" + version: "0.6.1" timing: dependency: transitive description: @@ -545,10 +545,10 @@ packages: dependency: transitive description: name: web - sha256: dc8ccd225a2005c1be616fe02951e2e342092edf968cf0844220383757ef8f10 + sha256: afe077240a270dcfd2aafe77602b4113645af95d0ad31128cc02bce5ac5d5152 url: "https://pub.dev" source: hosted - version: "0.1.4-beta" + version: "0.3.0" web_socket_channel: dependency: transitive description: @@ -566,5 +566,5 @@ packages: source: hosted version: "3.1.2" sdks: - dart: ">=3.1.0-185.0.dev <4.0.0" + dart: ">=3.2.0-194.0.dev <4.0.0" flutter: ">=3.10.0" diff --git a/speech_to_text/test/balanced_alternates_test.dart b/speech_to_text/test/balanced_alternates_test.dart index 939cbd3a..228ea546 100644 --- a/speech_to_text/test/balanced_alternates_test.dart +++ b/speech_to_text/test/balanced_alternates_test.dart @@ -1,5 +1,7 @@ import 'package:flutter_test/flutter_test.dart'; import 'package:speech_to_text/balanced_alternates.dart'; +import 'package:speech_to_text/speech_to_text.dart'; +import 'package:speech_to_text_platform_interface/speech_to_text_platform_interface.dart'; void main() { late BalancedAlternates balancedAlternates; @@ -9,12 +11,12 @@ void main() { }); test('empty results with no alternates', () async { - expect(balancedAlternates.getAlternates(), isEmpty); + expect(balancedAlternates.getAlternates(true), isEmpty); }); test('one phrase, no alternates returns that phrase', () async { balancedAlternates.add(0, 'one', 0.85); - final alts = balancedAlternates.getAlternates(); + final alts = balancedAlternates.getAlternates(true); expect(alts, hasLength(1)); expect(alts[0].recognizedWords, 'one'); expect(alts[0].confidence, 0.85); @@ -23,7 +25,7 @@ void main() { test('one phrase, one alternate returns that phrase and alternate', () async { balancedAlternates.add(0, 'one', 0.85); balancedAlternates.add(0, 'an', 0.65); - final alts = balancedAlternates.getAlternates(); + final alts = balancedAlternates.getAlternates(true); expect(alts, hasLength(2)); expect(alts[0].recognizedWords, 'one'); expect(alts[0].confidence, 0.85); @@ -36,7 +38,7 @@ void main() { balancedAlternates.add(0, 'one', 0.85); balancedAlternates.add(0, 'an', 0.65); balancedAlternates.add(0, 'and', 0.55); - final alts = balancedAlternates.getAlternates(); + final alts = balancedAlternates.getAlternates(true); expect(alts, hasLength(3)); expect(alts[0].recognizedWords, 'one'); expect(alts[0].confidence, 0.85); @@ -49,7 +51,7 @@ void main() { test('two phrases, no alternates returns concatenated phrase', () async { balancedAlternates.add(0, 'one ', 0.85); balancedAlternates.add(1, 'tree', 0.95); - final alts = balancedAlternates.getAlternates(); + final alts = balancedAlternates.getAlternates(true); expect(alts, hasLength(1)); expect(alts[0].recognizedWords, 'one tree'); expect(alts[0].confidence, 0.85); @@ -59,7 +61,7 @@ void main() { balancedAlternates.add(0, 'an ', 0.65); balancedAlternates.add(1, 'tree', 0.95); balancedAlternates.add(1, 'free', 0.35); - final alts = balancedAlternates.getAlternates(); + final alts = balancedAlternates.getAlternates(true); expect(alts, hasLength(2)); expect(alts[0].recognizedWords, 'one tree'); expect(alts[0].confidence, 0.85); @@ -71,11 +73,56 @@ void main() { balancedAlternates.add(0, 'one ', 0.85); balancedAlternates.add(0, 'an ', 0.65); balancedAlternates.add(1, 'tree', 0.95); - final alts = balancedAlternates.getAlternates(); + final alts = balancedAlternates.getAlternates(true); expect(alts, hasLength(2)); expect(alts[0].recognizedWords, 'one tree'); expect(alts[0].confidence, 0.85); expect(alts[1].recognizedWords, 'an tree'); expect(alts[1].confidence, 0.65); }); + + group('Not aggregated', () { + test('one phrase, two alternates returns that phrase and alternates', + () async { + balancedAlternates.add(0, 'one', 0.85); + balancedAlternates.add(0, 'an', 0.65); + balancedAlternates.add(0, 'and', 0.55); + final alts = balancedAlternates.getAlternates(false); + expect(alts, hasLength(3)); + expect(alts[0].recognizedWords, 'one'); + expect(alts[0].confidence, 0.85); + expect(alts[1].recognizedWords, 'an'); + expect(alts[1].confidence, 0.65); + expect(alts[2].recognizedWords, 'and'); + expect(alts[2].confidence, 0.55); + }); + test('two phrases returns the last phrase and its alternate', () async { + balancedAlternates.add(0, 'one', 0.85); + balancedAlternates.add(1, 'one two', 0.80); + balancedAlternates.add(1, 'one to', 0.55); + final alts = balancedAlternates.getAlternates(false); + expect(alts, hasLength(2)); + expect(alts[0].recognizedWords, 'one two'); + expect(alts[0].confidence, 0.80); + expect(alts[1].recognizedWords, 'one to'); + expect(alts[1].confidence, 0.55); + }); + test('empty phrase is skipped', () async { + balancedAlternates.add(0, 'one', 0.85); + balancedAlternates.add(1, '', 0.80); + balancedAlternates.add(1, '', 0.55); + final alts = balancedAlternates.getAlternates(false); + expect(alts, hasLength(1)); + expect(alts[0].recognizedWords, 'one'); + expect(alts[0].confidence, 0.85); + }); + test('Validate config option tests', () { + List? options; + expect(BalancedAlternates.isAggregateResultsEnabled(options), isTrue); + options = []; + expect(BalancedAlternates.isAggregateResultsEnabled(options), isTrue); + options = [SpeechToText.webDoNotAggregate]; + expect(BalancedAlternates.isAggregateResultsEnabled(options), isFalse); + }); + }); }