Skip to content
Permalink
Browse files
feat: Support for spoken punctuation and spoken emojis (#143)
  • Loading branch information
yoshi-automation committed Apr 8, 2021
1 parent cc9cc3e commit b6bddbe46172debd962c3d8e566a7c410fb4f279
Show file tree
Hide file tree
Showing 7 changed files with 60 additions and 16 deletions.
@@ -60,6 +60,7 @@ class SpeechClient(SpeechHelpers, SpeechClient):


__all__ = (
"AdaptationClient",
"CreateCustomClassRequest",
"CreatePhraseSetRequest",
"CustomClass",
@@ -82,7 +83,6 @@ class SpeechClient(SpeechHelpers, SpeechClient):
"RecognizeResponse",
"SpeakerDiarizationConfig",
"SpeechAdaptation",
"SpeechClient",
"SpeechContext",
"SpeechRecognitionAlternative",
"SpeechRecognitionResult",
@@ -94,5 +94,5 @@ class SpeechClient(SpeechHelpers, SpeechClient):
"UpdateCustomClassRequest",
"UpdatePhraseSetRequest",
"WordInfo",
"AdaptationClient",
"SpeechClient",
)
@@ -1,3 +1,4 @@

// Copyright 2021 Google LLC
//
// Licensed under the Apache License, Version 2.0 (the "License");
@@ -24,6 +25,7 @@ import "google/longrunning/operations.proto";
import "google/protobuf/any.proto";
import "google/protobuf/duration.proto";
import "google/protobuf/timestamp.proto";
import "google/protobuf/wrappers.proto";
import "google/rpc/status.proto";

option cc_enable_arenas = true;
@@ -316,15 +318,15 @@ message RecognitionConfig {
// Speech adaptation configuration improves the accuracy of speech
// recognition. When speech adaptation is set it supersedes the
// `speech_contexts` field. For more information, see the [speech
// adaptation](https://cloud.google.com/speech-to-text/docs/context-strength)
// adaptation](https://cloud.google.com/speech-to-text/docs/adaptation)
// documentation.
SpeechAdaptation adaptation = 20;

// Array of [SpeechContext][google.cloud.speech.v1p1beta1.SpeechContext].
// A means to provide context to assist the speech recognition. For more
// information, see
// [speech
// adaptation](https://cloud.google.com/speech-to-text/docs/context-strength).
// adaptation](https://cloud.google.com/speech-to-text/docs/adaptation).
repeated SpeechContext speech_contexts = 6;

// If `true`, the top result includes a list of words and
@@ -344,6 +346,22 @@ message RecognitionConfig {
// The default 'false' value does not add punctuation to result hypotheses.
bool enable_automatic_punctuation = 11;

// The spoken punctuation behavior for the call
// If not set, uses default behavior based on model of choice
// e.g. command_and_search will enable spoken punctuation by default
// If 'true', replaces spoken punctuation with the corresponding symbols in
// the request. For example, "how are you question mark" becomes "how are
// you?". See https://cloud.google.com/speech-to-text/docs/spoken-punctuation
// for support. If 'false', spoken punctuation is not replaced.
google.protobuf.BoolValue enable_spoken_punctuation = 22;

// The spoken emoji behavior for the call
// If not set, uses default behavior based on model of choice
// If 'true', adds spoken emoji formatting for the request. This will replace
// spoken emojis with the corresponding Unicode symbols in the final
// transcript. If 'false', spoken emojis are not replaced.
google.protobuf.BoolValue enable_spoken_emojis = 23;

// If 'true', enables speaker detection for each recognized word in
// the top alternative of the recognition result using a speaker_tag provided
// in the WordInfo.
@@ -674,8 +692,8 @@ message LongRunningRecognizeMetadata {
// audio, and `single_utterance` is set to false, then no messages are streamed
// back to the client.
//
// Here's an example of a series of ten `StreamingRecognizeResponse`s that might
// be returned while processing audio:
// Here's an example of a series of `StreamingRecognizeResponse`s that might be
// returned while processing audio:
//
// 1. results { alternatives { transcript: "tube" } stability: 0.01 }
//
@@ -394,7 +394,7 @@ def streaming_recognize(
single_utterance is set to false, then no messages
are streamed back to the client.
Here's an example of a series of ten
Here's an example of a series of
StreamingRecognizeResponses that might be returned
while processing audio:
@@ -584,7 +584,7 @@ def streaming_recognize(
single_utterance is set to false, then no messages
are streamed back to the client.
Here's an example of a series of ten
Here's an example of a series of
StreamingRecognizeResponses that might be returned
while processing audio:
@@ -21,6 +21,7 @@
from google.cloud.speech_v1p1beta1.types import resource
from google.protobuf import duration_pb2 as duration # type: ignore
from google.protobuf import timestamp_pb2 as timestamp # type: ignore
from google.protobuf import wrappers_pb2 as wrappers # type: ignore
from google.rpc import status_pb2 as status # type: ignore


@@ -271,14 +272,14 @@ class RecognitionConfig(proto.Message):
speech recognition. When speech adaptation is set it
supersedes the ``speech_contexts`` field. For more
information, see the `speech
adaptation <https://cloud.google.com/speech-to-text/docs/context-strength>`__
adaptation <https://cloud.google.com/speech-to-text/docs/adaptation>`__
documentation.
speech_contexts (Sequence[google.cloud.speech_v1p1beta1.types.SpeechContext]):
Array of
[SpeechContext][google.cloud.speech.v1p1beta1.SpeechContext].
A means to provide context to assist the speech recognition.
For more information, see `speech
adaptation <https://cloud.google.com/speech-to-text/docs/context-strength>`__.
adaptation <https://cloud.google.com/speech-to-text/docs/adaptation>`__.
enable_word_time_offsets (bool):
If ``true``, the top result includes a list of words and the
start and end time offsets (timestamps) for those words. If
@@ -296,6 +297,23 @@ class RecognitionConfig(proto.Message):
requests in other languages has no effect at
all. The default 'false' value does not add
punctuation to result hypotheses.
enable_spoken_punctuation (google.protobuf.wrappers_pb2.BoolValue):
The spoken punctuation behavior for the call If not set,
uses default behavior based on model of choice e.g.
command_and_search will enable spoken punctuation by default
If 'true', replaces spoken punctuation with the
corresponding symbols in the request. For example, "how are
you question mark" becomes "how are you?". See
https://cloud.google.com/speech-to-text/docs/spoken-punctuation
for support. If 'false', spoken punctuation is not replaced.
enable_spoken_emojis (google.protobuf.wrappers_pb2.BoolValue):
The spoken emoji behavior for the call
If not set, uses default behavior based on model
of choice If 'true', adds spoken emoji
formatting for the request. This will replace
spoken emojis with the corresponding Unicode
symbols in the final transcript. If 'false',
spoken emojis are not replaced.
enable_speaker_diarization (bool):
If 'true', enables speaker detection for each recognized
word in the top alternative of the recognition result using
@@ -436,6 +454,14 @@ class AudioEncoding(proto.Enum):

enable_automatic_punctuation = proto.Field(proto.BOOL, number=11)

enable_spoken_punctuation = proto.Field(
proto.MESSAGE, number=22, message=wrappers.BoolValue,
)

enable_spoken_emojis = proto.Field(
proto.MESSAGE, number=23, message=wrappers.BoolValue,
)

enable_speaker_diarization = proto.Field(proto.BOOL, number=16)

diarization_speaker_count = proto.Field(proto.INT32, number=17)
@@ -749,9 +775,8 @@ class StreamingRecognizeResponse(proto.Message):
client. If there is no recognizable audio, and ``single_utterance``
is set to false, then no messages are streamed back to the client.
Here's an example of a series of ten
``StreamingRecognizeResponse``\ s that might be returned while
processing audio:
Here's an example of a series of ``StreamingRecognizeResponse``\ s
that might be returned while processing audio:
1. results { alternatives { transcript: "tube" } stability: 0.01 }
@@ -4,15 +4,15 @@
"git": {
"name": ".",
"remote": "https://github.com/googleapis/python-speech.git",
"sha": "5da4b5590e092c993688f9a048efd08ff2e65407"
"sha": "cc9cc3ecdef32a8bf1198aa2ff8561398bf359f8"
}
},
{
"git": {
"name": "googleapis",
"remote": "https://github.com/googleapis/googleapis.git",
"sha": "149a3a84c29c9b8189576c7442ccb6dcf6a8f95b",
"internalRef": "364411656"
"sha": "847464c110e3cb6a5078b6b15086c73c4b622938",
"internalRef": "367346981"
}
},
{
@@ -42,6 +42,7 @@
from google.cloud.speech_v1p1beta1.types import resource
from google.longrunning import operations_pb2
from google.oauth2 import service_account
from google.protobuf import wrappers_pb2 as wrappers # type: ignore
from google.rpc import status_pb2 as status # type: ignore


0 comments on commit b6bddbe

Please sign in to comment.