Skip to content
This repository has been archived by the owner on Feb 18, 2024. It is now read-only.

Commit

Permalink
fix: allow calls with no request, add JSON proto
Browse files Browse the repository at this point in the history
  • Loading branch information
yoshi-automation authored and alexander-fenster committed Aug 2, 2019
1 parent 678e134 commit 61349c0
Show file tree
Hide file tree
Showing 7 changed files with 1,802 additions and 19 deletions.
44 changes: 36 additions & 8 deletions protos/google/cloud/speech/v1p1beta1/cloud_speech.proto
Original file line number Diff line number Diff line change
Expand Up @@ -306,19 +306,24 @@ message RecognitionConfig {
// *Optional* If 'true', enables speaker detection for each recognized word in
// the top alternative of the recognition result using a speaker_tag provided
// in the WordInfo.
// Note: When this is true, we send all the words from the beginning of the
// Note: Use diarization_config instead.
bool enable_speaker_diarization = 16 [deprecated = true];

// *Optional*
// If set, specifies the estimated number of speakers in the conversation.
// Defaults to '2'. Ignored unless enable_speaker_diarization is set to true.
// Note: Use diarization_config instead.
int32 diarization_speaker_count = 17 [deprecated = true];

// *Optional* Config to enable speaker diarization and set additional
// parameters to make diarization better suited for your application.
// Note: When this is enabled, we send all the words from the beginning of the
// audio for the top alternative in every consecutive STREAMING responses.
// This is done in order to improve our speaker tags as our models learn to
// identify the speakers in the conversation over time.
// For non-streaming requests, the diarization results will be provided only
// in the top alternative of the FINAL SpeechRecognitionResult.
bool enable_speaker_diarization = 16;

// *Optional*
// If set, specifies the estimated number of speakers in the conversation.
// If not set, defaults to '2'.
// Ignored unless enable_speaker_diarization is set to true."
int32 diarization_speaker_count = 17;
SpeakerDiarizationConfig diarization_config = 19;

// *Optional* Metadata regarding this request.
RecognitionMetadata metadata = 9;
Expand Down Expand Up @@ -368,6 +373,29 @@ message RecognitionConfig {
bool use_enhanced = 14;
}

// *Optional* Config to enable speaker diarization.
message SpeakerDiarizationConfig {
// *Optional* If 'true', enables speaker detection for each recognized word in
// the top alternative of the recognition result using a speaker_tag provided
// in the WordInfo.
bool enable_speaker_diarization = 1;

// Note: Set min_speaker_count = max_speaker_count to fix the number of
// speakers to be detected in the audio.

// *Optional*
// Minimum number of speakers in the conversation. This range gives you more
// flexibility by allowing the system to automatically determine the correct
// number of speakers. If not set, the default value is 2.
int32 min_speaker_count = 2;

// *Optional*
// Maximum number of speakers in the conversation. This range gives you more
// flexibility by allowing the system to automatically determine the correct
// number of speakers. If not set, the default value is 6.
int32 max_speaker_count = 3;
}

// Description of audio data to be recognized.
message RecognitionMetadata {
// Use case categories that the audio recognition request can be described
Expand Down
Loading

0 comments on commit 61349c0

Please sign in to comment.