Skip to content

Commit

Permalink
feat: add processing strategy to batch recognition requests (#4268)
Browse files Browse the repository at this point in the history
- [ ] Regenerate this pull request now.

PiperOrigin-RevId: 530882015

Source-Link: https://togithub.com/googleapis/googleapis/commit/189bdfa76e674f4067e3c183c067aa353fb3aca3

Source-Link: https://togithub.com/googleapis/googleapis-gen/commit/64c7a843afdda8c651ffca6e1a374ac7801d3728
Copy-Tag: eyJwIjoicGFja2FnZXMvZ29vZ2xlLWNsb3VkLXNwZWVjaC8uT3dsQm90LnlhbWwiLCJoIjoiNjRjN2E4NDNhZmRkYThjNjUxZmZjYTZlMWEzNzRhYzc4MDFkMzcyOCJ9
  • Loading branch information
gcf-owl-bot[bot] committed May 10, 2023
1 parent 07bb06c commit 8b5caeb
Show file tree
Hide file tree
Showing 14 changed files with 206 additions and 155 deletions.
105 changes: 52 additions & 53 deletions packages/google-cloud-speech/README.md

Large diffs are not rendered by default.

Original file line number Diff line number Diff line change
Expand Up @@ -444,8 +444,8 @@ message ListRecognizersRequest {
];

// The maximum number of Recognizers to return. The service may return fewer
// than this value. If unspecified, at most 20 Recognizers will be returned.
// The maximum value is 20; values above 20 will be coerced to 20.
// than this value. If unspecified, at most 5 Recognizers will be returned.
// The maximum value is 100; values above 100 will be coerced to 100.
int32 page_size = 2;

// A page token, received from a previous
Expand Down Expand Up @@ -588,56 +588,19 @@ message Recognizer {
// Required. Which model to use for recognition requests. Select the model
// best suited to your domain to get best results.
//
// Supported models:
//
// - `latest_long`
//
// Best for long form content like media or conversation.
//
// - `latest_short`
//
// Best for short form content like commands or single shot directed speech.
// When using this model, the service will stop transcribing audio after the
// first utterance is detected and completed.
//
// When using this model,
// [SEPARATE_RECOGNITION_PER_CHANNEL][google.cloud.speech.v2.RecognitionFeatures.MultiChannelMode.SEPARATE_RECOGNITION_PER_CHANNEL]
// is not supported; multi-channel audio is accepted, but only the first
// channel will be processed and transcribed.
//
// - `telephony`
//
// Best for audio that originated from a phone call (typically recorded at
// an 8khz sampling rate).
//
// - `medical_conversation`
//
// For conversations between a medical provider—for example, a doctor or
// nurse—and a patient. Use this model when both a provider and a patient
// are speaking. Words uttered by each speaker are automatically detected
// and labeled in the returned transcript.
//
// For supported features please see [medical models
// documentation](https://cloud.google.com/speech-to-text/docs/medical-models).
//
// - `medical_dictation`
//
// For dictated notes spoken by a single medical provider—for example, a
// doctor dictating notes about a patient's blood test results.
//
// For supported features please see [medical models
// documentation](https://cloud.google.com/speech-to-text/docs/medical-models).
//
// - `usm`
//
// The next generation of Speech-to-Text models from Google.
// Guidance for choosing which model to use can be found in the [Transcription
// Models
// Documentation](https://cloud.google.com/speech-to-text/v2/docs/transcription-model)
// and the models supported in each region can be found in the [Table Of
// Supported
// Models](https://cloud.google.com/speech-to-text/v2/docs/speech-to-text-supported-languages).
string model = 4 [(google.api.field_behavior) = REQUIRED];

// Required. The language of the supplied audio as a
// [BCP-47](https://www.rfc-editor.org/rfc/bcp/bcp47.txt) language tag.
//
// Supported languages for each model are listed at:
// https://cloud.google.com/speech-to-text/docs/languages
// Supported languages for each model are listed in the [Table of Supported
// Models](https://cloud.google.com/speech-to-text/v2/docs/speech-to-text-supported-languages).
//
// If additional languages are provided, recognition result will contain
// recognition in the most likely language detected. The recognition result
Expand Down Expand Up @@ -1178,6 +1141,17 @@ message StreamingRecognizeRequest {
// [BatchRecognize][google.cloud.speech.v2.Speech.BatchRecognize]
// method.
message BatchRecognizeRequest {
// Possible processing strategies for batch requests.
enum ProcessingStrategy {
// Default value for the processing strategy. The request is processed as
// soon as its received.
PROCESSING_STRATEGY_UNSPECIFIED = 0;

// If selected, processes the request during lower utilization periods for a
// price discount. The request is fulfilled within 24 hours.
DYNAMIC_BATCHING = 1;
}

// Required. Resource name of the recognizer to be used for ASR.
string recognizer = 1 [
(google.api.field_behavior) = REQUIRED,
Expand Down Expand Up @@ -1215,6 +1189,9 @@ message BatchRecognizeRequest {

// Configuration options for where to output the transcripts of each file.
RecognitionOutputConfig recognition_output_config = 6;

// Processing strategy to use for this request.
ProcessingStrategy processing_strategy = 7;
}

// Output configurations for Cloud Storage.
Expand Down Expand Up @@ -1793,10 +1770,10 @@ message ListCustomClassesRequest {
}
];

// Number of results per requests. A valid page_size ranges from 0 to 20
// Number of results per requests. A valid page_size ranges from 0 to 100
// inclusive. If the page_size is zero or unspecified, a page size of 5 will
// be chosen. If the page size exceeds 20, it will be coerced down to 20. Note
// that a call might return fewer results than the requested page size.
// be chosen. If the page size exceeds 100, it will be coerced down to 100.
// Note that a call might return fewer results than the requested page size.
int32 page_size = 2;

// A page token, received from a previous
Expand Down Expand Up @@ -1948,8 +1925,8 @@ message ListPhraseSetsRequest {
];

// The maximum number of PhraseSets to return. The service may return fewer
// than this value. If unspecified, at most 20 PhraseSets will be returned.
// The maximum value is 20; values above 20 will be coerced to 20.
// than this value. If unspecified, at most 5 PhraseSets will be returned.
// The maximum value is 100; values above 100 will be coerced to 100.
int32 page_size = 2;

// A page token, received from a previous
Expand Down
15 changes: 15 additions & 0 deletions packages/google-cloud-speech/protos/protos.d.ts

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

56 changes: 56 additions & 0 deletions packages/google-cloud-speech/protos/protos.js

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

12 changes: 12 additions & 0 deletions packages/google-cloud-speech/protos/protos.json

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

18 changes: 0 additions & 18 deletions packages/google-cloud-speech/samples/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -62,7 +62,6 @@
* [Speech.update_phrase_set](#speech.update_phrase_set)
* [Speech.update_recognizer](#speech.update_recognizer)
* [Quickstart](#quickstart)
* [Quickstart.test](#quickstart.test)

## Before you begin

Expand Down Expand Up @@ -924,23 +923,6 @@ __Usage:__
`node packages/google-cloud-speech/samples/quickstart.js`


-----




### Quickstart.test

View the [source code](https://github.com/googleapis/google-cloud-node/blob/main/packages/google-cloud-speech/samples/test/quickstart.test.js).

[![Open in Cloud Shell][shell_img]](https://console.cloud.google.com/cloudshell/open?git_repo=https://github.com/googleapis/google-cloud-node&page=editor&open_in_editor=packages/google-cloud-speech/samples/test/quickstart.test.js,samples/README.md)

__Usage:__


`node packages/google-cloud-speech/samples/test/quickstart.test.js`





Expand Down
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
{
"clientLibrary": {
"name": "nodejs-speech",
"version": "5.4.0",
"version": "5.4.1",
"language": "TYPESCRIPT",
"apis": [
{
Expand Down
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
{
"clientLibrary": {
"name": "nodejs-speech",
"version": "5.4.0",
"version": "5.4.1",
"language": "TYPESCRIPT",
"apis": [
{
Expand Down
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
{
"clientLibrary": {
"name": "nodejs-speech",
"version": "5.4.0",
"version": "5.4.1",
"language": "TYPESCRIPT",
"apis": [
{
Expand Down Expand Up @@ -418,7 +418,7 @@
"segments": [
{
"start": 25,
"end": 88,
"end": 92,
"type": "FULL"
}
],
Expand Down Expand Up @@ -446,6 +446,10 @@
{
"name": "recognition_output_config",
"type": ".google.cloud.speech.v2.RecognitionOutputConfig"
},
{
"name": "processing_strategy",
"type": ".google.cloud.speech.v2.BatchRecognizeRequest.ProcessingStrategy"
}
],
"resultType": ".google.longrunning.Operation",
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -66,6 +66,10 @@ function main(recognizer) {
* Configuration options for where to output the transcripts of each file.
*/
// const recognitionOutputConfig = {}
/**
* Processing strategy to use for this request.
*/
// const processingStrategy = {}

// Imports the Speech library
const {SpeechClient} = require('@google-cloud/speech').v2;
Expand Down

0 comments on commit 8b5caeb

Please sign in to comment.