feat: Add adaptation proto for v1 api

PiperOrigin-RevId: 447561222
googleapis · May 9, 2022 · ec9193e · ec9193e
1 parent 9f45b0d
commit ec9193e
Show file tree

Hide file tree

Showing 5 changed files with 423 additions and 37 deletions.
diff --git a/google/cloud/speech/v1/BUILD.bazel b/google/cloud/speech/v1/BUILD.bazel
@@ -22,6 +22,7 @@ proto_library(
     name = "speech_proto",
     srcs = [
         "cloud_speech.proto",
+        "cloud_speech_adaptation.proto",
         "resource.proto",
     ],
     deps = [
@@ -31,8 +32,9 @@ proto_library(
         "//google/api:resource_proto",
         "//google/longrunning:operations_proto",
         "//google/rpc:status_proto",
-        "@com_google_protobuf//:any_proto",
         "@com_google_protobuf//:duration_proto",
+        "@com_google_protobuf//:empty_proto",
+        "@com_google_protobuf//:field_mask_proto",
         "@com_google_protobuf//:timestamp_proto",
         "@com_google_protobuf//:wrappers_proto",
     ],
@@ -87,6 +89,7 @@ java_gapic_library(
 java_gapic_test(
     name = "speech_java_gapic_test_suite",
     test_classes = [
+        "com.google.cloud.speech.v1.AdaptationClientTest",
         "com.google.cloud.speech.v1.SpeechClientTest",
     ],
     runtime_deps = [":speech_java_gapic_test"],
@@ -138,7 +141,6 @@ go_gapic_library(
         "//google/longrunning:longrunning_go_proto",
         "@com_google_cloud_go//longrunning:go_default_library",
         "@com_google_cloud_go//longrunning/autogen:go_default_library",
-        "@io_bazel_rules_go//proto/wkt:any_go_proto",
         "@io_bazel_rules_go//proto/wkt:duration_go_proto",
     ],
 )
@@ -174,6 +176,7 @@ py_gapic_library(
     name = "speech_py_gapic",
     srcs = [":speech_proto"],
     grpc_service_config = "speech_grpc_service_config.json",
+    service_yaml = "speech_v1.yaml",
 )
 
 # Open Source Packages

diff --git a/google/cloud/speech/v1/cloud_speech.proto b/google/cloud/speech/v1/cloud_speech.proto
@@ -1,4 +1,4 @@
-// Copyright 2021 Google LLC
+// Copyright 2022 Google LLC
 //
 // Licensed under the Apache License, Version 2.0 (the "License");
 // you may not use this file except in compliance with the License.
@@ -36,8 +36,7 @@ option objc_class_prefix = "GCS";
 // Service that implements Google Cloud Speech API.
 service Speech {
   option (google.api.default_host) = "speech.googleapis.com";
-  option (google.api.oauth_scopes) =
-      "https://www.googleapis.com/auth/cloud-platform";
+  option (google.api.oauth_scopes) = "https://www.googleapis.com/auth/cloud-platform";
 
   // Performs synchronous speech recognition: receive results after all audio
   // has been sent and processed.
@@ -55,8 +54,7 @@ service Speech {
   // a `LongRunningRecognizeResponse` message.
   // For more information on asynchronous speech recognition, see the
   // [how-to](https://cloud.google.com/speech-to-text/docs/async-recognize).
-  rpc LongRunningRecognize(LongRunningRecognizeRequest)
-      returns (google.longrunning.Operation) {
+  rpc LongRunningRecognize(LongRunningRecognizeRequest) returns (google.longrunning.Operation) {
     option (google.api.http) = {
       post: "/v1/speech:longrunningrecognize"
       body: "*"
@@ -70,8 +68,8 @@ service Speech {
 
   // Performs bidirectional streaming speech recognition: receive results while
   // sending audio. This method is only available via the gRPC API (not REST).
-  rpc StreamingRecognize(stream StreamingRecognizeRequest)
-      returns (stream StreamingRecognizeResponse) {}
+  rpc StreamingRecognize(stream StreamingRecognizeRequest) returns (stream StreamingRecognizeResponse) {
+  }
 }
 
 // The top-level message sent by the client for the `Recognize` method.
@@ -95,8 +93,7 @@ message LongRunningRecognizeRequest {
   RecognitionAudio audio = 2 [(google.api.field_behavior) = REQUIRED];
 
   // Optional. Specifies an optional destination for the recognition results.
-  TranscriptOutputConfig output_config = 4
-      [(google.api.field_behavior) = OPTIONAL];
+  TranscriptOutputConfig output_config = 4 [(google.api.field_behavior) = OPTIONAL];
 }
 
 // Specifies an optional destination for the recognition results.
@@ -196,8 +193,7 @@ message RecognitionConfig {
   // an `AudioEncoding` when you send  send `FLAC` or `WAV` audio, the
   // encoding configuration must match the encoding described in the audio
   // header; otherwise the request returns an
-  // [google.rpc.Code.INVALID_ARGUMENT][google.rpc.Code.INVALID_ARGUMENT] error
-  // code.
+  // [google.rpc.Code.INVALID_ARGUMENT][google.rpc.Code.INVALID_ARGUMENT] error code.
   enum AudioEncoding {
     // Not specified.
     ENCODING_UNSPECIFIED = 0;
@@ -250,8 +246,7 @@ message RecognitionConfig {
 
   // Encoding of audio data sent in all `RecognitionAudio` messages.
   // This field is optional for `FLAC` and `WAV` audio files and required
-  // for all other audio formats. For details, see
-  // [AudioEncoding][google.cloud.speech.v1.RecognitionConfig.AudioEncoding].
+  // for all other audio formats. For details, see [AudioEncoding][google.cloud.speech.v1.RecognitionConfig.AudioEncoding].
   AudioEncoding encoding = 1;
 
   // Sample rate in Hertz of the audio data sent in all
@@ -260,8 +255,7 @@ message RecognitionConfig {
   // source to 16000 Hz. If that's not possible, use the native sample rate of
   // the audio source (instead of re-sampling).
   // This field is optional for FLAC and WAV audio files, but is
-  // required for all other audio formats. For details, see
-  // [AudioEncoding][google.cloud.speech.v1.RecognitionConfig.AudioEncoding].
+  // required for all other audio formats. For details, see [AudioEncoding][google.cloud.speech.v1.RecognitionConfig.AudioEncoding].
   int32 sample_rate_hertz = 2;
 
   // The number of channels in the input audio data.
@@ -402,10 +396,6 @@ message RecognitionConfig {
   //     <td>Best for short queries such as voice commands or voice search.</td>
   //   </tr>
   //   <tr>
-  //     <td><code>command_and_search</code></td>
-  //     <td>Best for short queries such as voice commands or voice search.</td>
-  //   </tr>
-  //   <tr>
   //     <td><code>phone_call</code></td>
   //     <td>Best for audio that originated from a phone call (typically
   //     recorded at an 8khz sampling rate).</td>
@@ -423,6 +413,16 @@ message RecognitionConfig {
   //         For example, long-form audio. Ideally the audio is high-fidelity,
   //         recorded at a 16khz or greater sampling rate.</td>
   //   </tr>
+  //   <tr>
+  //     <td><code>medical_conversation</code></td>
+  //     <td>Best for audio that originated from a conversation between a
+  //         medical provider and patient.</td>
+  //   </tr>
+  //   <tr>
+  //     <td><code>medical_dictation</code></td>
+  //     <td>Best for audio that originated from dictation notes by a medical
+  //         provider.</td>
+  //   </tr>
   // </table>
   string model = 13;
 
@@ -455,12 +455,16 @@ message SpeakerDiarizationConfig {
   int32 max_speaker_count = 3;
 
   // Output only. Unused.
-  int32 speaker_tag = 5
-      [deprecated = true, (google.api.field_behavior) = OUTPUT_ONLY];
+  int32 speaker_tag = 5 [
+    deprecated = true,
+    (google.api.field_behavior) = OUTPUT_ONLY
+  ];
 }
 
 // Description of audio data to be recognized.
 message RecognitionMetadata {
+  option deprecated = true;
+
   // Use case categories that the audio recognition request can be described
   // by.
   enum InteractionType {
@@ -616,8 +620,8 @@ message SpeechContext {
 
 // Contains audio data in the encoding specified in the `RecognitionConfig`.
 // Either `content` or `uri` must be supplied. Supplying both or neither
-// returns [google.rpc.Code.INVALID_ARGUMENT][google.rpc.Code.INVALID_ARGUMENT].
-// See [content limits](https://cloud.google.com/speech-to-text/quotas#content).
+// returns [google.rpc.Code.INVALID_ARGUMENT][google.rpc.Code.INVALID_ARGUMENT]. See
+// [content limits](https://cloud.google.com/speech-to-text/quotas#content).
 message RecognitionAudio {
   // The audio source, which is either inline content or a Google Cloud
   // Storage uri.
@@ -632,9 +636,8 @@ message RecognitionAudio {
     // Currently, only Google Cloud Storage URIs are
     // supported, which must be specified in the following format:
     // `gs://bucket_name/object_name` (other URI formats return
-    // [google.rpc.Code.INVALID_ARGUMENT][google.rpc.Code.INVALID_ARGUMENT]).
-    // For more information, see [Request
-    // URIs](https://cloud.google.com/storage/docs/reference-uris).
+    // [google.rpc.Code.INVALID_ARGUMENT][google.rpc.Code.INVALID_ARGUMENT]). For more information, see
+    // [Request URIs](https://cloud.google.com/storage/docs/reference-uris).
     string uri = 2;
   }
 }
@@ -685,8 +688,8 @@ message LongRunningRecognizeMetadata {
   // Time of the most recent processing update.
   google.protobuf.Timestamp last_update_time = 3;
 
-  // Output only. The URI of the audio file being transcribed. Empty if the
-  // audio was sent as byte content.
+  // Output only. The URI of the audio file being transcribed. Empty if the audio was sent
+  // as byte content.
   string uri = 4 [(google.api.field_behavior) = OUTPUT_ONLY];
 }
 
@@ -805,9 +808,9 @@ message StreamingRecognitionResult {
   // For audio_channel_count = N, its output values can range from '1' to 'N'.
   int32 channel_tag = 5;
 
-  // Output only. The [BCP-47](https://www.rfc-editor.org/rfc/bcp/bcp47.txt)
-  // language tag of the language in this result. This language code was
-  // detected to have the most likelihood of being spoken in the audio.
+  // Output only. The [BCP-47](https://www.rfc-editor.org/rfc/bcp/bcp47.txt) language tag
+  // of the language in this result. This language code was detected to have
+  // the most likelihood of being spoken in the audio.
   string language_code = 6 [(google.api.field_behavior) = OUTPUT_ONLY];
 }
 
@@ -828,15 +831,18 @@ message SpeechRecognitionResult {
   // beginning of the audio.
   google.protobuf.Duration result_end_time = 4;
 
-  // Output only. The [BCP-47](https://www.rfc-editor.org/rfc/bcp/bcp47.txt)
-  // language tag of the language in this result. This language code was
-  // detected to have the most likelihood of being spoken in the audio.
+  // Output only. The [BCP-47](https://www.rfc-editor.org/rfc/bcp/bcp47.txt) language tag
+  // of the language in this result. This language code was detected to have
+  // the most likelihood of being spoken in the audio.
   string language_code = 5 [(google.api.field_behavior) = OUTPUT_ONLY];
 }
 
 // Alternative hypotheses (a.k.a. n-best list).
 message SpeechRecognitionAlternative {
   // Transcript text representing the words that the user spoke.
+  // In languages that use spaces to separate words, the transcript might have a
+  // leading space if it isn't the first result. You can concatenate each result
+  // to obtain the full transcript without using a separator.
   string transcript = 1;
 
   // The confidence estimate between 0.0 and 1.0. A higher number