docs: Add documentation for latest models to RecognitionConfig

PiperOrigin-RevId: 446200223
googleapis · May 3, 2022 · 4f586ea · 4f586ea
1 parent 27ee094
commit 4f586ea
Show file tree

Hide file tree

Showing 2 changed files with 84 additions and 50 deletions.
diff --git a/google/cloud/speech/v1/cloud_speech.proto b/google/cloud/speech/v1/cloud_speech.proto
@@ -36,7 +36,8 @@ option objc_class_prefix = "GCS";
 // Service that implements Google Cloud Speech API.
 service Speech {
   option (google.api.default_host) = "speech.googleapis.com";
-  option (google.api.oauth_scopes) = "https://www.googleapis.com/auth/cloud-platform";
+  option (google.api.oauth_scopes) =
+      "https://www.googleapis.com/auth/cloud-platform";
 
   // Performs synchronous speech recognition: receive results after all audio
   // has been sent and processed.
@@ -54,7 +55,8 @@ service Speech {
   // a `LongRunningRecognizeResponse` message.
   // For more information on asynchronous speech recognition, see the
   // [how-to](https://cloud.google.com/speech-to-text/docs/async-recognize).
-  rpc LongRunningRecognize(LongRunningRecognizeRequest) returns (google.longrunning.Operation) {
+  rpc LongRunningRecognize(LongRunningRecognizeRequest)
+      returns (google.longrunning.Operation) {
     option (google.api.http) = {
       post: "/v1/speech:longrunningrecognize"
       body: "*"
@@ -68,8 +70,8 @@ service Speech {
 
   // Performs bidirectional streaming speech recognition: receive results while
   // sending audio. This method is only available via the gRPC API (not REST).
-  rpc StreamingRecognize(stream StreamingRecognizeRequest) returns (stream StreamingRecognizeResponse) {
-  }
+  rpc StreamingRecognize(stream StreamingRecognizeRequest)
+      returns (stream StreamingRecognizeResponse) {}
 }
 
 // The top-level message sent by the client for the `Recognize` method.
@@ -93,7 +95,8 @@ message LongRunningRecognizeRequest {
   RecognitionAudio audio = 2 [(google.api.field_behavior) = REQUIRED];
 
   // Optional. Specifies an optional destination for the recognition results.
-  TranscriptOutputConfig output_config = 4 [(google.api.field_behavior) = OPTIONAL];
+  TranscriptOutputConfig output_config = 4
+      [(google.api.field_behavior) = OPTIONAL];
 }
 
 // Specifies an optional destination for the recognition results.
@@ -193,7 +196,8 @@ message RecognitionConfig {
   // an `AudioEncoding` when you send  send `FLAC` or `WAV` audio, the
   // encoding configuration must match the encoding described in the audio
   // header; otherwise the request returns an
-  // [google.rpc.Code.INVALID_ARGUMENT][google.rpc.Code.INVALID_ARGUMENT] error code.
+  // [google.rpc.Code.INVALID_ARGUMENT][google.rpc.Code.INVALID_ARGUMENT] error
+  // code.
   enum AudioEncoding {
     // Not specified.
     ENCODING_UNSPECIFIED = 0;
@@ -246,7 +250,8 @@ message RecognitionConfig {
 
   // Encoding of audio data sent in all `RecognitionAudio` messages.
   // This field is optional for `FLAC` and `WAV` audio files and required
-  // for all other audio formats. For details, see [AudioEncoding][google.cloud.speech.v1.RecognitionConfig.AudioEncoding].
+  // for all other audio formats. For details, see
+  // [AudioEncoding][google.cloud.speech.v1.RecognitionConfig.AudioEncoding].
   AudioEncoding encoding = 1;
 
   // Sample rate in Hertz of the audio data sent in all
@@ -255,7 +260,8 @@ message RecognitionConfig {
   // source to 16000 Hz. If that's not possible, use the native sample rate of
   // the audio source (instead of re-sampling).
   // This field is optional for FLAC and WAV audio files, but is
-  // required for all other audio formats. For details, see [AudioEncoding][google.cloud.speech.v1.RecognitionConfig.AudioEncoding].
+  // required for all other audio formats. For details, see
+  // [AudioEncoding][google.cloud.speech.v1.RecognitionConfig.AudioEncoding].
   int32 sample_rate_hertz = 2;
 
   // The number of channels in the input audio data.
@@ -383,6 +389,19 @@ message RecognitionConfig {
   //     <td><b>Description</b></td>
   //   </tr>
   //   <tr>
+  //     <td><code>latest_long</code></td>
+  //     <td>Best for long form content like media or conversation.</td>
+  //   </tr>
+  //   <tr>
+  //     <td><code>latest_short</code></td>
+  //     <td>Best for short form content like commands or single shot directed
+  //     speech.</td>
+  //   </tr>
+  //   <tr>
+  //     <td><code>command_and_search</code></td>
+  //     <td>Best for short queries such as voice commands or voice search.</td>
+  //   </tr>
+  //   <tr>
   //     <td><code>command_and_search</code></td>
   //     <td>Best for short queries such as voice commands or voice search.</td>
   //   </tr>
@@ -436,10 +455,8 @@ message SpeakerDiarizationConfig {
   int32 max_speaker_count = 3;
 
   // Output only. Unused.
-  int32 speaker_tag = 5 [
-    deprecated = true,
-    (google.api.field_behavior) = OUTPUT_ONLY
-  ];
+  int32 speaker_tag = 5
+      [deprecated = true, (google.api.field_behavior) = OUTPUT_ONLY];
 }
 
 // Description of audio data to be recognized.
@@ -599,8 +616,8 @@ message SpeechContext {
 
 // Contains audio data in the encoding specified in the `RecognitionConfig`.
 // Either `content` or `uri` must be supplied. Supplying both or neither
-// returns [google.rpc.Code.INVALID_ARGUMENT][google.rpc.Code.INVALID_ARGUMENT]. See
-// [content limits](https://cloud.google.com/speech-to-text/quotas#content).
+// returns [google.rpc.Code.INVALID_ARGUMENT][google.rpc.Code.INVALID_ARGUMENT].
+// See [content limits](https://cloud.google.com/speech-to-text/quotas#content).
 message RecognitionAudio {
   // The audio source, which is either inline content or a Google Cloud
   // Storage uri.
@@ -615,8 +632,9 @@ message RecognitionAudio {
     // Currently, only Google Cloud Storage URIs are
     // supported, which must be specified in the following format:
     // `gs://bucket_name/object_name` (other URI formats return
-    // [google.rpc.Code.INVALID_ARGUMENT][google.rpc.Code.INVALID_ARGUMENT]). For more information, see
-    // [Request URIs](https://cloud.google.com/storage/docs/reference-uris).
+    // [google.rpc.Code.INVALID_ARGUMENT][google.rpc.Code.INVALID_ARGUMENT]).
+    // For more information, see [Request
+    // URIs](https://cloud.google.com/storage/docs/reference-uris).
     string uri = 2;
   }
 }
@@ -667,8 +685,8 @@ message LongRunningRecognizeMetadata {
   // Time of the most recent processing update.
   google.protobuf.Timestamp last_update_time = 3;
 
-  // Output only. The URI of the audio file being transcribed. Empty if the audio was sent
-  // as byte content.
+  // Output only. The URI of the audio file being transcribed. Empty if the
+  // audio was sent as byte content.
   string uri = 4 [(google.api.field_behavior) = OUTPUT_ONLY];
 }
 
@@ -787,9 +805,9 @@ message StreamingRecognitionResult {
   // For audio_channel_count = N, its output values can range from '1' to 'N'.
   int32 channel_tag = 5;
 
-  // Output only. The [BCP-47](https://www.rfc-editor.org/rfc/bcp/bcp47.txt) language tag
-  // of the language in this result. This language code was detected to have
-  // the most likelihood of being spoken in the audio.
+  // Output only. The [BCP-47](https://www.rfc-editor.org/rfc/bcp/bcp47.txt)
+  // language tag of the language in this result. This language code was
+  // detected to have the most likelihood of being spoken in the audio.
   string language_code = 6 [(google.api.field_behavior) = OUTPUT_ONLY];
 }
 
@@ -810,9 +828,9 @@ message SpeechRecognitionResult {
   // beginning of the audio.
   google.protobuf.Duration result_end_time = 4;
 
-  // Output only. The [BCP-47](https://www.rfc-editor.org/rfc/bcp/bcp47.txt) language tag
-  // of the language in this result. This language code was detected to have
-  // the most likelihood of being spoken in the audio.
+  // Output only. The [BCP-47](https://www.rfc-editor.org/rfc/bcp/bcp47.txt)
+  // language tag of the language in this result. This language code was
+  // detected to have the most likelihood of being spoken in the audio.
   string language_code = 5 [(google.api.field_behavior) = OUTPUT_ONLY];
 }
 

diff --git a/google/cloud/speech/v1p1beta1/cloud_speech.proto b/google/cloud/speech/v1p1beta1/cloud_speech.proto
@@ -37,7 +37,8 @@ option objc_class_prefix = "GCS";
 // Service that implements Google Cloud Speech API.
 service Speech {
   option (google.api.default_host) = "speech.googleapis.com";
-  option (google.api.oauth_scopes) = "https://www.googleapis.com/auth/cloud-platform";
+  option (google.api.oauth_scopes) =
+      "https://www.googleapis.com/auth/cloud-platform";
 
   // Performs synchronous speech recognition: receive results after all audio
   // has been sent and processed.
@@ -55,7 +56,8 @@ service Speech {
   // a `LongRunningRecognizeResponse` message.
   // For more information on asynchronous speech recognition, see the
   // [how-to](https://cloud.google.com/speech-to-text/docs/async-recognize).
-  rpc LongRunningRecognize(LongRunningRecognizeRequest) returns (google.longrunning.Operation) {
+  rpc LongRunningRecognize(LongRunningRecognizeRequest)
+      returns (google.longrunning.Operation) {
     option (google.api.http) = {
       post: "/v1p1beta1/speech:longrunningrecognize"
       body: "*"
@@ -69,8 +71,8 @@ service Speech {
 
   // Performs bidirectional streaming speech recognition: receive results while
   // sending audio. This method is only available via the gRPC API (not REST).
-  rpc StreamingRecognize(stream StreamingRecognizeRequest) returns (stream StreamingRecognizeResponse) {
-  }
+  rpc StreamingRecognize(stream StreamingRecognizeRequest)
+      returns (stream StreamingRecognizeResponse) {}
 }
 
 // The top-level message sent by the client for the `Recognize` method.
@@ -94,7 +96,8 @@ message LongRunningRecognizeRequest {
   RecognitionAudio audio = 2 [(google.api.field_behavior) = REQUIRED];
 
   // Optional. Specifies an optional destination for the recognition results.
-  TranscriptOutputConfig output_config = 4 [(google.api.field_behavior) = OPTIONAL];
+  TranscriptOutputConfig output_config = 4
+      [(google.api.field_behavior) = OPTIONAL];
 }
 
 // Specifies an optional destination for the recognition results.
@@ -194,7 +197,8 @@ message RecognitionConfig {
   // an `AudioEncoding` when you send  send `FLAC` or `WAV` audio, the
   // encoding configuration must match the encoding described in the audio
   // header; otherwise the request returns an
-  // [google.rpc.Code.INVALID_ARGUMENT][google.rpc.Code.INVALID_ARGUMENT] error code.
+  // [google.rpc.Code.INVALID_ARGUMENT][google.rpc.Code.INVALID_ARGUMENT] error
+  // code.
   enum AudioEncoding {
     // Not specified.
     ENCODING_UNSPECIFIED = 0;
@@ -253,7 +257,8 @@ message RecognitionConfig {
 
   // Encoding of audio data sent in all `RecognitionAudio` messages.
   // This field is optional for `FLAC` and `WAV` audio files and required
-  // for all other audio formats. For details, see [AudioEncoding][google.cloud.speech.v1p1beta1.RecognitionConfig.AudioEncoding].
+  // for all other audio formats. For details, see
+  // [AudioEncoding][google.cloud.speech.v1p1beta1.RecognitionConfig.AudioEncoding].
   AudioEncoding encoding = 1;
 
   // Sample rate in Hertz of the audio data sent in all
@@ -262,7 +267,8 @@ message RecognitionConfig {
   // source to 16000 Hz. If that's not possible, use the native sample rate of
   // the audio source (instead of re-sampling).
   // This field is optional for FLAC and WAV audio files, but is
-  // required for all other audio formats. For details, see [AudioEncoding][google.cloud.speech.v1p1beta1.RecognitionConfig.AudioEncoding].
+  // required for all other audio formats. For details, see
+  // [AudioEncoding][google.cloud.speech.v1p1beta1.RecognitionConfig.AudioEncoding].
   int32 sample_rate_hertz = 2;
 
   // The number of channels in the input audio data.
@@ -407,6 +413,15 @@ message RecognitionConfig {
   //     <td><b>Description</b></td>
   //   </tr>
   //   <tr>
+  //     <td><code>latest_long</code></td>
+  //     <td>Best for long form content like media or conversation.</td>
+  //   </tr>
+  //   <tr>
+  //     <td><code>latest_short</code></td>
+  //     <td>Best for short form content like commands or single shot directed
+  //     speech.</td>
+  //   </tr>
+  //   <tr>
   //     <td><code>command_and_search</code></td>
   //     <td>Best for short queries such as voice commands or voice search.</td>
   //   </tr>
@@ -460,10 +475,8 @@ message SpeakerDiarizationConfig {
   int32 max_speaker_count = 3;
 
   // Output only. Unused.
-  int32 speaker_tag = 5 [
-    deprecated = true,
-    (google.api.field_behavior) = OUTPUT_ONLY
-  ];
+  int32 speaker_tag = 5
+      [deprecated = true, (google.api.field_behavior) = OUTPUT_ONLY];
 }
 
 // Description of audio data to be recognized.
@@ -627,8 +640,8 @@ message SpeechContext {
 
 // Contains audio data in the encoding specified in the `RecognitionConfig`.
 // Either `content` or `uri` must be supplied. Supplying both or neither
-// returns [google.rpc.Code.INVALID_ARGUMENT][google.rpc.Code.INVALID_ARGUMENT]. See
-// [content limits](https://cloud.google.com/speech-to-text/quotas#content).
+// returns [google.rpc.Code.INVALID_ARGUMENT][google.rpc.Code.INVALID_ARGUMENT].
+// See [content limits](https://cloud.google.com/speech-to-text/quotas#content).
 message RecognitionAudio {
   // The audio source, which is either inline content or a Google Cloud
   // Storage uri.
@@ -643,8 +656,9 @@ message RecognitionAudio {
     // Currently, only Google Cloud Storage URIs are
     // supported, which must be specified in the following format:
     // `gs://bucket_name/object_name` (other URI formats return
-    // [google.rpc.Code.INVALID_ARGUMENT][google.rpc.Code.INVALID_ARGUMENT]). For more information, see
-    // [Request URIs](https://cloud.google.com/storage/docs/reference-uris).
+    // [google.rpc.Code.INVALID_ARGUMENT][google.rpc.Code.INVALID_ARGUMENT]).
+    // For more information, see [Request
+    // URIs](https://cloud.google.com/storage/docs/reference-uris).
     string uri = 2;
   }
 }
@@ -695,12 +709,14 @@ message LongRunningRecognizeMetadata {
   // Time of the most recent processing update.
   google.protobuf.Timestamp last_update_time = 3;
 
-  // Output only. The URI of the audio file being transcribed. Empty if the audio was sent
-  // as byte content.
+  // Output only. The URI of the audio file being transcribed. Empty if the
+  // audio was sent as byte content.
   string uri = 4 [(google.api.field_behavior) = OUTPUT_ONLY];
 
-  // Output only. A copy of the TranscriptOutputConfig if it was set in the request.
-  TranscriptOutputConfig output_config = 5 [(google.api.field_behavior) = OUTPUT_ONLY];
+  // Output only. A copy of the TranscriptOutputConfig if it was set in the
+  // request.
+  TranscriptOutputConfig output_config = 5
+      [(google.api.field_behavior) = OUTPUT_ONLY];
 }
 
 // `StreamingRecognizeResponse` is the only message returned to the client by
@@ -818,9 +834,9 @@ message StreamingRecognitionResult {
   // For audio_channel_count = N, its output values can range from '1' to 'N'.
   int32 channel_tag = 5;
 
-  // Output only. The [BCP-47](https://www.rfc-editor.org/rfc/bcp/bcp47.txt) language tag
-  // of the language in this result. This language code was detected to have
-  // the most likelihood of being spoken in the audio.
+  // Output only. The [BCP-47](https://www.rfc-editor.org/rfc/bcp/bcp47.txt)
+  // language tag of the language in this result. This language code was
+  // detected to have the most likelihood of being spoken in the audio.
   string language_code = 6 [(google.api.field_behavior) = OUTPUT_ONLY];
 }
 
@@ -841,9 +857,9 @@ message SpeechRecognitionResult {
   // beginning of the audio.
   google.protobuf.Duration result_end_time = 4;
 
-  // Output only. The [BCP-47](https://www.rfc-editor.org/rfc/bcp/bcp47.txt) language tag
-  // of the language in this result. This language code was detected to have
-  // the most likelihood of being spoken in the audio.
+  // Output only. The [BCP-47](https://www.rfc-editor.org/rfc/bcp/bcp47.txt)
+  // language tag of the language in this result. This language code was
+  // detected to have the most likelihood of being spoken in the audio.
   string language_code = 5 [(google.api.field_behavior) = OUTPUT_ONLY];
 }