Skip to content

Commit

Permalink
feat: Add adaptation proto for v1 api
Browse files Browse the repository at this point in the history
PiperOrigin-RevId: 447561222
  • Loading branch information
Google APIs authored and Copybara-Service committed May 9, 2022
1 parent 9f45b0d commit ec9193e
Show file tree
Hide file tree
Showing 5 changed files with 423 additions and 37 deletions.
7 changes: 5 additions & 2 deletions google/cloud/speech/v1/BUILD.bazel
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@ proto_library(
name = "speech_proto",
srcs = [
"cloud_speech.proto",
"cloud_speech_adaptation.proto",
"resource.proto",
],
deps = [
Expand All @@ -31,8 +32,9 @@ proto_library(
"//google/api:resource_proto",
"//google/longrunning:operations_proto",
"//google/rpc:status_proto",
"@com_google_protobuf//:any_proto",
"@com_google_protobuf//:duration_proto",
"@com_google_protobuf//:empty_proto",
"@com_google_protobuf//:field_mask_proto",
"@com_google_protobuf//:timestamp_proto",
"@com_google_protobuf//:wrappers_proto",
],
Expand Down Expand Up @@ -87,6 +89,7 @@ java_gapic_library(
java_gapic_test(
name = "speech_java_gapic_test_suite",
test_classes = [
"com.google.cloud.speech.v1.AdaptationClientTest",
"com.google.cloud.speech.v1.SpeechClientTest",
],
runtime_deps = [":speech_java_gapic_test"],
Expand Down Expand Up @@ -138,7 +141,6 @@ go_gapic_library(
"//google/longrunning:longrunning_go_proto",
"@com_google_cloud_go//longrunning:go_default_library",
"@com_google_cloud_go//longrunning/autogen:go_default_library",
"@io_bazel_rules_go//proto/wkt:any_go_proto",
"@io_bazel_rules_go//proto/wkt:duration_go_proto",
],
)
Expand Down Expand Up @@ -174,6 +176,7 @@ py_gapic_library(
name = "speech_py_gapic",
srcs = [":speech_proto"],
grpc_service_config = "speech_grpc_service_config.json",
service_yaml = "speech_v1.yaml",
)

# Open Source Packages
Expand Down
74 changes: 40 additions & 34 deletions google/cloud/speech/v1/cloud_speech.proto
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
// Copyright 2021 Google LLC
// Copyright 2022 Google LLC
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
Expand Down Expand Up @@ -36,8 +36,7 @@ option objc_class_prefix = "GCS";
// Service that implements Google Cloud Speech API.
service Speech {
option (google.api.default_host) = "speech.googleapis.com";
option (google.api.oauth_scopes) =
"https://www.googleapis.com/auth/cloud-platform";
option (google.api.oauth_scopes) = "https://www.googleapis.com/auth/cloud-platform";

// Performs synchronous speech recognition: receive results after all audio
// has been sent and processed.
Expand All @@ -55,8 +54,7 @@ service Speech {
// a `LongRunningRecognizeResponse` message.
// For more information on asynchronous speech recognition, see the
// [how-to](https://cloud.google.com/speech-to-text/docs/async-recognize).
rpc LongRunningRecognize(LongRunningRecognizeRequest)
returns (google.longrunning.Operation) {
rpc LongRunningRecognize(LongRunningRecognizeRequest) returns (google.longrunning.Operation) {
option (google.api.http) = {
post: "/v1/speech:longrunningrecognize"
body: "*"
Expand All @@ -70,8 +68,8 @@ service Speech {

// Performs bidirectional streaming speech recognition: receive results while
// sending audio. This method is only available via the gRPC API (not REST).
rpc StreamingRecognize(stream StreamingRecognizeRequest)
returns (stream StreamingRecognizeResponse) {}
rpc StreamingRecognize(stream StreamingRecognizeRequest) returns (stream StreamingRecognizeResponse) {
}
}

// The top-level message sent by the client for the `Recognize` method.
Expand All @@ -95,8 +93,7 @@ message LongRunningRecognizeRequest {
RecognitionAudio audio = 2 [(google.api.field_behavior) = REQUIRED];

// Optional. Specifies an optional destination for the recognition results.
TranscriptOutputConfig output_config = 4
[(google.api.field_behavior) = OPTIONAL];
TranscriptOutputConfig output_config = 4 [(google.api.field_behavior) = OPTIONAL];
}

// Specifies an optional destination for the recognition results.
Expand Down Expand Up @@ -196,8 +193,7 @@ message RecognitionConfig {
// an `AudioEncoding` when you send send `FLAC` or `WAV` audio, the
// encoding configuration must match the encoding described in the audio
// header; otherwise the request returns an
// [google.rpc.Code.INVALID_ARGUMENT][google.rpc.Code.INVALID_ARGUMENT] error
// code.
// [google.rpc.Code.INVALID_ARGUMENT][google.rpc.Code.INVALID_ARGUMENT] error code.
enum AudioEncoding {
// Not specified.
ENCODING_UNSPECIFIED = 0;
Expand Down Expand Up @@ -250,8 +246,7 @@ message RecognitionConfig {

// Encoding of audio data sent in all `RecognitionAudio` messages.
// This field is optional for `FLAC` and `WAV` audio files and required
// for all other audio formats. For details, see
// [AudioEncoding][google.cloud.speech.v1.RecognitionConfig.AudioEncoding].
// for all other audio formats. For details, see [AudioEncoding][google.cloud.speech.v1.RecognitionConfig.AudioEncoding].
AudioEncoding encoding = 1;

// Sample rate in Hertz of the audio data sent in all
Expand All @@ -260,8 +255,7 @@ message RecognitionConfig {
// source to 16000 Hz. If that's not possible, use the native sample rate of
// the audio source (instead of re-sampling).
// This field is optional for FLAC and WAV audio files, but is
// required for all other audio formats. For details, see
// [AudioEncoding][google.cloud.speech.v1.RecognitionConfig.AudioEncoding].
// required for all other audio formats. For details, see [AudioEncoding][google.cloud.speech.v1.RecognitionConfig.AudioEncoding].
int32 sample_rate_hertz = 2;

// The number of channels in the input audio data.
Expand Down Expand Up @@ -402,10 +396,6 @@ message RecognitionConfig {
// <td>Best for short queries such as voice commands or voice search.</td>
// </tr>
// <tr>
// <td><code>command_and_search</code></td>
// <td>Best for short queries such as voice commands or voice search.</td>
// </tr>
// <tr>
// <td><code>phone_call</code></td>
// <td>Best for audio that originated from a phone call (typically
// recorded at an 8khz sampling rate).</td>
Expand All @@ -423,6 +413,16 @@ message RecognitionConfig {
// For example, long-form audio. Ideally the audio is high-fidelity,
// recorded at a 16khz or greater sampling rate.</td>
// </tr>
// <tr>
// <td><code>medical_conversation</code></td>
// <td>Best for audio that originated from a conversation between a
// medical provider and patient.</td>
// </tr>
// <tr>
// <td><code>medical_dictation</code></td>
// <td>Best for audio that originated from dictation notes by a medical
// provider.</td>
// </tr>
// </table>
string model = 13;

Expand Down Expand Up @@ -455,12 +455,16 @@ message SpeakerDiarizationConfig {
int32 max_speaker_count = 3;

// Output only. Unused.
int32 speaker_tag = 5
[deprecated = true, (google.api.field_behavior) = OUTPUT_ONLY];
int32 speaker_tag = 5 [
deprecated = true,
(google.api.field_behavior) = OUTPUT_ONLY
];
}

// Description of audio data to be recognized.
message RecognitionMetadata {
option deprecated = true;

// Use case categories that the audio recognition request can be described
// by.
enum InteractionType {
Expand Down Expand Up @@ -616,8 +620,8 @@ message SpeechContext {

// Contains audio data in the encoding specified in the `RecognitionConfig`.
// Either `content` or `uri` must be supplied. Supplying both or neither
// returns [google.rpc.Code.INVALID_ARGUMENT][google.rpc.Code.INVALID_ARGUMENT].
// See [content limits](https://cloud.google.com/speech-to-text/quotas#content).
// returns [google.rpc.Code.INVALID_ARGUMENT][google.rpc.Code.INVALID_ARGUMENT]. See
// [content limits](https://cloud.google.com/speech-to-text/quotas#content).
message RecognitionAudio {
// The audio source, which is either inline content or a Google Cloud
// Storage uri.
Expand All @@ -632,9 +636,8 @@ message RecognitionAudio {
// Currently, only Google Cloud Storage URIs are
// supported, which must be specified in the following format:
// `gs://bucket_name/object_name` (other URI formats return
// [google.rpc.Code.INVALID_ARGUMENT][google.rpc.Code.INVALID_ARGUMENT]).
// For more information, see [Request
// URIs](https://cloud.google.com/storage/docs/reference-uris).
// [google.rpc.Code.INVALID_ARGUMENT][google.rpc.Code.INVALID_ARGUMENT]). For more information, see
// [Request URIs](https://cloud.google.com/storage/docs/reference-uris).
string uri = 2;
}
}
Expand Down Expand Up @@ -685,8 +688,8 @@ message LongRunningRecognizeMetadata {
// Time of the most recent processing update.
google.protobuf.Timestamp last_update_time = 3;

// Output only. The URI of the audio file being transcribed. Empty if the
// audio was sent as byte content.
// Output only. The URI of the audio file being transcribed. Empty if the audio was sent
// as byte content.
string uri = 4 [(google.api.field_behavior) = OUTPUT_ONLY];
}

Expand Down Expand Up @@ -805,9 +808,9 @@ message StreamingRecognitionResult {
// For audio_channel_count = N, its output values can range from '1' to 'N'.
int32 channel_tag = 5;

// Output only. The [BCP-47](https://www.rfc-editor.org/rfc/bcp/bcp47.txt)
// language tag of the language in this result. This language code was
// detected to have the most likelihood of being spoken in the audio.
// Output only. The [BCP-47](https://www.rfc-editor.org/rfc/bcp/bcp47.txt) language tag
// of the language in this result. This language code was detected to have
// the most likelihood of being spoken in the audio.
string language_code = 6 [(google.api.field_behavior) = OUTPUT_ONLY];
}

Expand All @@ -828,15 +831,18 @@ message SpeechRecognitionResult {
// beginning of the audio.
google.protobuf.Duration result_end_time = 4;

// Output only. The [BCP-47](https://www.rfc-editor.org/rfc/bcp/bcp47.txt)
// language tag of the language in this result. This language code was
// detected to have the most likelihood of being spoken in the audio.
// Output only. The [BCP-47](https://www.rfc-editor.org/rfc/bcp/bcp47.txt) language tag
// of the language in this result. This language code was detected to have
// the most likelihood of being spoken in the audio.
string language_code = 5 [(google.api.field_behavior) = OUTPUT_ONLY];
}

// Alternative hypotheses (a.k.a. n-best list).
message SpeechRecognitionAlternative {
// Transcript text representing the words that the user spoke.
// In languages that use spaces to separate words, the transcript might have a
// leading space if it isn't the first result. You can concatenate each result
// to obtain the full transcript without using a separator.
string transcript = 1;

// The confidence estimate between 0.0 and 1.0. A higher number
Expand Down

0 comments on commit ec9193e

Please sign in to comment.