Introduce a new enum type, mojom::SpeechSynthesisEndReason

This CL introduces a new enum type, mojom::SpeechSynthesisEndReason, to explain why an utterance is finished. The enum values are kInterrupted, kCancelled, kErrorOccurred, and kRegularEnd. It's passed by SpeechSynthesisClient::OnFinishedSpeaking() to blink. Particularly, when an utterance is finished by cancel(), JS can know by getting the error codes described in [1]. This CL also adds a test for SpeechSynthesis in prerendering. According to the spec[2], cancel() method call should have [DelayWhilePrerendering]. The test includes speak() and cancel() in prerendering and ensures that they are handled after activation. [1] https://wicg.github.io/speech-api/#speechsynthesiserrorevent-attributes [2] https://wicg.github.io/nav-speculation/prerendering.html#web-speech-patch Bug: 1365948 Change-Id: I6cdbc8c25ecea2f0b91ec154640ab235831bcc5c Reviewed-on: https://chromium-review.googlesource.com/c/chromium/src/+/3978652 Reviewed-by: Hiroki Nakagawa <nhiroki@chromium.org> Reviewed-by: Evan Liu <evliu@google.com> Commit-Queue: Julie Jeongeun Kim <jkim@igalia.com> Reviewed-by: Mike West <mkwst@chromium.org> Reviewed-by: David Tseng <dtseng@chromium.org> Cr-Commit-Position: refs/heads/main@{#1067390}
chromium · Nov 4, 2022 · f6a5e23 · f6a5e23
1 parent 71181d3
commit f6a5e23
Show file tree

Hide file tree

Showing 9 changed files with 175 additions and 20 deletions.
diff --git a/content/browser/speech/speech_synthesis_impl.cc b/content/browser/speech/speech_synthesis_impl.cc
@@ -36,10 +36,16 @@ class EventThunk : public UtteranceEventDelegate {
         client_->OnStartedSpeaking();
         break;
       case TTS_EVENT_END:
+        client_->OnFinishedSpeaking(
+            blink::mojom::SpeechSynthesisErrorCode::kNoError);
+        break;
       case TTS_EVENT_INTERRUPTED:
+        client_->OnFinishedSpeaking(
+            blink::mojom::SpeechSynthesisErrorCode::kInterrupted);
+        break;
       case TTS_EVENT_CANCELLED:
-        // The web platform API does not differentiate these events.
-        client_->OnFinishedSpeaking();
+        client_->OnFinishedSpeaking(
+            blink::mojom::SpeechSynthesisErrorCode::kCancelled);
         break;
       case TTS_EVENT_WORD:
         client_->OnEncounteredWordBoundary(char_index, char_length);

diff --git a/third_party/blink/public/mojom/speech/speech_synthesis.mojom b/third_party/blink/public/mojom/speech/speech_synthesis.mojom
@@ -26,6 +26,24 @@ struct SpeechSynthesisVoice {
   bool is_default;
 };
 
+// These enum values are error codes described in SpeechSynthesisErrorCode,
+// https://wicg.github.io/speech-api/#enumdef-speechsynthesiserrorcode.
+// This enum is passed to OnFinishedSpeaking().
+// TODO(https://crbug.com/1365948): Specify all errors mentioned in the spec.
+enum SpeechSynthesisErrorCode {
+  // A cancel method call caused the SpeechSynthesisUtterance to be removed
+  // from the queue before it had begun being spoken.
+  kCancelled,
+  // A cancel method call caused the SpeechSynthesisUtterance to be
+  // interrupted after it has begun being spoken and before it completed.
+  kInterrupted,
+  // Used for all errors except the above errors.
+  kErrorOccurred,
+  // No errors. If the utterance finishes without errors, this value is
+  // passed to OnFinishedSpeaking().
+  kNoError,
+};
+
 // This interface receives updates to the list of voices. See SpeechSynthesis'
 // AddVoiceListObserver method.
 interface SpeechSynthesisVoiceListObserver {
@@ -40,7 +58,7 @@ interface SpeechSynthesisClient {
   OnStartedSpeaking();
 
   // The utterance finished, and no further events will be received.
-  OnFinishedSpeaking();
+  OnFinishedSpeaking(SpeechSynthesisErrorCode error_code);
 
   // The utterance was paused.
   OnPausedSpeaking();

diff --git a/third_party/blink/renderer/modules/speech/speech_synthesis.cc b/third_party/blink/renderer/modules/speech/speech_synthesis.cc
@@ -223,13 +223,16 @@ void SpeechSynthesis::DidResumeSpeaking(SpeechSynthesisUtterance* utterance) {
   FireEvent(event_type_names::kResume, utterance, 0, 0, String());
 }
 
-void SpeechSynthesis::DidFinishSpeaking(SpeechSynthesisUtterance* utterance) {
-  HandleSpeakingCompleted(utterance, false);
+void SpeechSynthesis::DidFinishSpeaking(
+    SpeechSynthesisUtterance* utterance,
+    mojom::blink::SpeechSynthesisErrorCode error_code) {
+  HandleSpeakingCompleted(utterance, error_code);
 }
 
 void SpeechSynthesis::SpeakingErrorOccurred(
     SpeechSynthesisUtterance* utterance) {
-  HandleSpeakingCompleted(utterance, true);
+  HandleSpeakingCompleted(
+      utterance, mojom::blink::SpeechSynthesisErrorCode::kErrorOccurred);
 }
 
 void SpeechSynthesis::WordBoundaryEventOccurred(
@@ -271,7 +274,7 @@ void SpeechSynthesis::StartSpeakingImmediately() {
 
 void SpeechSynthesis::HandleSpeakingCompleted(
     SpeechSynthesisUtterance* utterance,
-    bool error_occurred) {
+    mojom::blink::SpeechSynthesisErrorCode error_code) {
   DCHECK(utterance);
 
   // Special handling for audio descriptions.
@@ -285,16 +288,31 @@ void SpeechSynthesis::HandleSpeakingCompleted(
     should_start_speaking = !utterance_queue_.empty();
   }
 
+  // https://wicg.github.io/speech-api/#speechsynthesiserrorevent-attributes
+  // The below errors are matched with SpeechSynthesisErrorCode values.
+  static constexpr char kErrorCanceled[] = "canceled";
+  static constexpr char kErrorInterrupted[] = "interrupted";
+  static constexpr char kErrorSynthesisFailed[] = "synthesis-failed";
+
   // Always fire the event, because the platform may have asynchronously
   // sent an event on an utterance before it got the message that we
   // canceled it, and we should always report to the user what actually
   // happened.
-  if (error_occurred) {
-    // TODO(csharrison): Actually pass the correct message. For now just use a
-    // generic error.
-    FireErrorEvent(utterance, 0, "synthesis-failed");
-  } else {
-    FireEvent(event_type_names::kEnd, utterance, 0, 0, String());
+  switch (error_code) {
+    case mojom::blink::SpeechSynthesisErrorCode::kInterrupted:
+      FireErrorEvent(utterance, 0, kErrorInterrupted);
+      break;
+    case mojom::blink::SpeechSynthesisErrorCode::kCancelled:
+      FireErrorEvent(utterance, 0, kErrorCanceled);
+      break;
+    case mojom::blink::SpeechSynthesisErrorCode::kErrorOccurred:
+      // TODO(csharrison): Actually pass the correct message. For now just use a
+      // generic error.
+      FireErrorEvent(utterance, 0, kErrorSynthesisFailed);
+      break;
+    case mojom::blink::SpeechSynthesisErrorCode::kNoError:
+      FireEvent(event_type_names::kEnd, utterance, 0, 0, String());
+      break;
   }
 
   // Start the next utterance if we just finished one and one was pending.

diff --git a/third_party/blink/renderer/modules/speech/speech_synthesis.h b/third_party/blink/renderer/modules/speech/speech_synthesis.h
@@ -94,7 +94,8 @@ class MODULES_EXPORT SpeechSynthesis final
   void DidStartSpeaking(SpeechSynthesisUtterance*);
   void DidPauseSpeaking(SpeechSynthesisUtterance*);
   void DidResumeSpeaking(SpeechSynthesisUtterance*);
-  void DidFinishSpeaking(SpeechSynthesisUtterance*);
+  void DidFinishSpeaking(SpeechSynthesisUtterance*,
+                         mojom::blink::SpeechSynthesisErrorCode);
   void SpeakingErrorOccurred(SpeechSynthesisUtterance*);
   void WordBoundaryEventOccurred(SpeechSynthesisUtterance*,
                                  unsigned char_index,
@@ -110,7 +111,9 @@ class MODULES_EXPORT SpeechSynthesis final
  private:
   void VoicesDidChange();
   void StartSpeakingImmediately();
-  void HandleSpeakingCompleted(SpeechSynthesisUtterance*, bool error_occurred);
+  void HandleSpeakingCompleted(
+      SpeechSynthesisUtterance*,
+      mojom::blink::SpeechSynthesisErrorCode error_code);
   void FireEvent(const AtomicString& type,
                  SpeechSynthesisUtterance*,
                  uint32_t char_index,

diff --git a/third_party/blink/renderer/modules/speech/speech_synthesis_utterance.cc b/third_party/blink/renderer/modules/speech/speech_synthesis_utterance.cc
@@ -89,10 +89,11 @@ void SpeechSynthesisUtterance::OnStartedSpeaking() {
   synthesis_->DidStartSpeaking(this);
 }
 
-void SpeechSynthesisUtterance::OnFinishedSpeaking() {
+void SpeechSynthesisUtterance::OnFinishedSpeaking(
+    mojom::blink::SpeechSynthesisErrorCode error_code) {
   DCHECK(synthesis_);
   finished_ = true;
-  synthesis_->DidFinishSpeaking(this);
+  synthesis_->DidFinishSpeaking(this, error_code);
 }
 
 void SpeechSynthesisUtterance::OnPausedSpeaking() {
@@ -154,7 +155,7 @@ void SpeechSynthesisUtterance::Start(SpeechSynthesis* synthesis) {
 void SpeechSynthesisUtterance::OnDisconnected() {
   // If the remote end disconnects, just simulate that we finished normally.
   if (!finished_)
-    OnFinishedSpeaking();
+    OnFinishedSpeaking(mojom::blink::SpeechSynthesisErrorCode::kNoError);
 }
 
 }  // namespace blink
diff --git a/third_party/blink/renderer/modules/speech/speech_synthesis_utterance.h b/third_party/blink/renderer/modules/speech/speech_synthesis_utterance.h
@@ -94,7 +94,8 @@ class SpeechSynthesisUtterance final
 
   // mojom::blink::SpeechSynthesisClient
   void OnStartedSpeaking() override;
-  void OnFinishedSpeaking() override;
+  void OnFinishedSpeaking(
+      mojom::blink::SpeechSynthesisErrorCode error_code) override;
   void OnPausedSpeaking() override;
   void OnResumedSpeaking() override;
   void OnEncounteredWordBoundary(uint32_t char_index,

diff --git a/third_party/blink/renderer/modules/speech/testing/mojom_speech_synthesis_mock.cc b/third_party/blink/renderer/modules/speech/testing/mojom_speech_synthesis_mock.cc
@@ -64,7 +64,8 @@ void MojomSpeechSynthesisMock::SpeakingErrorOccurred(TimerBase*) {
 
 void MojomSpeechSynthesisMock::SpeakingFinished(TimerBase*) {
   DCHECK(current_utterance_);
-  current_client_->OnFinishedSpeaking();
+  current_client_->OnFinishedSpeaking(
+      blink::mojom::SpeechSynthesisErrorCode::kNoError);
   SpeakNext();
 }
 

diff --git a/.../web_tests/external/wpt/speculation-rules/prerender/resources/speech-synthesis.https.html b/.../web_tests/external/wpt/speculation-rules/prerender/resources/speech-synthesis.https.html
@@ -0,0 +1,48 @@
+<!DOCTYPE html>
+<script src="/resources/testharness.js"></script>
+<script src="/resources/testharnessreport.js"></script>
+<script src="/speculation-rules/prerender/resources/utils.js"></script>
+<script src="/speculation-rules/prerender/resources/deferred-promise-utils.js"></script>
+<script src="webspeech.js"></script>
+
+<script>
+const params = new URLSearchParams(location.search);
+
+// The main test page (restriction-speech-synthesis.https.html) loads the
+// initiator page, then the initiator page will prerender itself with the
+// `prerendering` parameter.
+const isPrerendering = params.has('prerendering');
+
+if (!isPrerendering) {
+  loadInitiatorPage();
+} else {
+  const method = params.get('method');
+  const prerenderEventCollector = new PrerenderEventCollector();
+  const promise = new Promise((resolve, reject) => {
+    switch(method) {
+      case 'speak': {
+        const utter = new SpeechSynthesisUtterance('1');
+        // https://wicg.github.io/speech-api/#tts-methods
+        // This tests that speak() is completed after prerendering activation.
+        utter.onend = () => { resolve(); }
+        speechSynthesis.speak(utter);
+        break;
+      }
+      case 'cancel': {
+        const utter = new SpeechSynthesisUtterance('1');
+        // https://wicg.github.io/speech-api/#speechsynthesiserrorevent-attributes
+        // A cancel method call causes 'canceled' or 'interrupted'.
+        // This tests if one of them happens after prerendering activation.
+        utter.onerror = (e) => {
+          if (e.error == 'canceled' || e.error == 'interrupted')
+            resolve();
+        }
+        speechSynthesis.speak(utter);
+        speechSynthesis.cancel();
+        break;
+      }
+    }
+  });
+  prerenderEventCollector.start(promise, `speechSynthesis.${method}`);
+}
+</script>
diff --git a/...link/web_tests/external/wpt/speculation-rules/prerender/restriction-speech-synthesis.html b/...link/web_tests/external/wpt/speculation-rules/prerender/restriction-speech-synthesis.html
@@ -0,0 +1,59 @@
+<!DOCTYPE html>
+<title>Access to the speech synthesis is deferred</title>
+<meta name="timeout" content="long">
+<script src="/resources/testharness.js"></script>
+<script src="/resources/testharnessreport.js"></script>
+<script src="/common/utils.js"></script>
+<script src="/speculation-rules/prerender/resources/utils.js"></script>
+<body>
+<script>
+
+setup(() => assertSpeculationRulesIsSupported());
+
+function RunTest(method, description) {
+  promise_test(async t => {
+    const uid = token();
+    const bc = new PrerenderChannel('test-channel', uid);
+    t.add_cleanup(_ => bc.close());
+
+    const gotMessage = new Promise(resolve => {
+      bc.addEventListener('message', e => {
+        resolve(e.data);
+      }, {
+        once: true
+      });
+    });
+    const url = `resources/speech-synthesis.https.html?method=${method}&uid=${uid}`;
+    window.open(url, '_blank', 'noopener');
+
+    const result = await gotMessage;
+    const expected = [
+      {
+        event: `started waiting speechSynthesis.${method}`,
+        prerendering: true
+      },
+      {
+        event: `prerendering change`,
+        prerendering: false
+      },
+      {
+        event: `finished waiting speechSynthesis.${method}`,
+        prerendering: false
+      },
+    ];
+    assert_equals(result.length, expected.length);
+    for (let i = 0; i < result.length; i++) {
+      assert_equals(result[i].event, expected[i].event, `event${i}`);
+      assert_equals(result[i].prerendering, expected[i].prerendering,
+        `prerendering${i}`);
+    }
+
+    // Send a close signal to PrerenderEventCollector on the prerendered page.
+    new PrerenderChannel('close', uid).postMessage('');
+  }, description);
+}
+
+RunTest('speak', `speechSynthesis.speak(utterance) should be deferred until the prerendered page is activated`);
+RunTest('cancel', `speechSynthesis.cancel() should be deferred until the prerendered page is activated`);
+</script>
+</body>