Skip to content
This repository has been archived by the owner on Jul 20, 2023. It is now read-only.

Commit

Permalink
feat: Added StreamingAnalyzeContent API (#1004)
Browse files Browse the repository at this point in the history
- [ ] Regenerate this pull request now.

feat: Added obfuscated_external_user_id to Participant
feat: Can directly set Cloud Speech model on the SpeechToTextConfig

PiperOrigin-RevId: 483696090

Source-Link: https://togithub.com/googleapis/googleapis/commit/3772bf3656425cb32ed3525894f8b1a2a5dfa789

Source-Link: https://togithub.com/googleapis/googleapis-gen/commit/e358d269b1f0eb9964189d18768823bc9e8ab41c
Copy-Tag: eyJwIjoiLmdpdGh1Yi8uT3dsQm90LnlhbWwiLCJoIjoiZTM1OGQyNjliMWYwZWI5OTY0MTg5ZDE4NzY4ODIzYmM5ZThhYjQxYyJ9

BEGIN_NESTED_COMMIT
feat: Can directly set Cloud Speech model on the SpeechToTextConfig
PiperOrigin-RevId: 482665674

Source-Link: https://togithub.com/googleapis/googleapis/commit/64926d52febbf298cb82a8f472ade4a3969ba922

Source-Link: https://togithub.com/googleapis/googleapis-gen/commit/351722be163def7666f13cb76b2a295d9f2a8450
Copy-Tag: eyJwIjoiLmdpdGh1Yi8uT3dsQm90LnlhbWwiLCJoIjoiMzUxNzIyYmUxNjNkZWY3NjY2ZjEzY2I3NmIyYTI5NWQ5ZjJhODQ1MCJ9
END_NESTED_COMMIT
  • Loading branch information
gcf-owl-bot[bot] committed Nov 11, 2022
1 parent 730d834 commit 2c98a57
Show file tree
Hide file tree
Showing 50 changed files with 3,244 additions and 10,890 deletions.
70 changes: 70 additions & 0 deletions protos/google/cloud/dialogflow/v2/audio_config.proto
Original file line number Diff line number Diff line change
Expand Up @@ -369,6 +369,67 @@ message OutputAudioConfig {
SynthesizeSpeechConfig synthesize_speech_config = 3;
}

// [DTMF](https://en.wikipedia.org/wiki/Dual-tone_multi-frequency_signaling)
// digit in Telephony Gateway.
enum TelephonyDtmf {
// Not specified. This value may be used to indicate an absent digit.
TELEPHONY_DTMF_UNSPECIFIED = 0;

// Number: '1'.
DTMF_ONE = 1;

// Number: '2'.
DTMF_TWO = 2;

// Number: '3'.
DTMF_THREE = 3;

// Number: '4'.
DTMF_FOUR = 4;

// Number: '5'.
DTMF_FIVE = 5;

// Number: '6'.
DTMF_SIX = 6;

// Number: '7'.
DTMF_SEVEN = 7;

// Number: '8'.
DTMF_EIGHT = 8;

// Number: '9'.
DTMF_NINE = 9;

// Number: '0'.
DTMF_ZERO = 10;

// Letter: 'A'.
DTMF_A = 11;

// Letter: 'B'.
DTMF_B = 12;

// Letter: 'C'.
DTMF_C = 13;

// Letter: 'D'.
DTMF_D = 14;

// Asterisk/star: '*'.
DTMF_STAR = 15;

// Pound/diamond/hash/square/gate/octothorpe: '#'.
DTMF_POUND = 16;
}

// A wrapper of repeated TelephonyDtmf digits.
message TelephonyDtmfEvents {
// A sequence of TelephonyDtmf digits.
repeated TelephonyDtmf dtmf_events = 1;
}

// Configures speech transcription for [ConversationProfile][google.cloud.dialogflow.v2.ConversationProfile].
message SpeechToTextConfig {
// The speech model used in speech to text.
Expand All @@ -379,4 +440,13 @@ message SpeechToTextConfig {
// version of the specified model for the language does not exist, then it
// would emit an error.
SpeechModelVariant speech_model_variant = 1;

// Which Speech model to select. Select the model best suited to your domain
// to get best results. If a model is not explicitly specified, then a default
// model is used.
// Refer to
// [Cloud Speech API
// documentation](https://cloud.google.com/speech-to-text/docs/basics#select-model)
// for more details.
string model = 2;
}
221 changes: 221 additions & 0 deletions protos/google/cloud/dialogflow/v2/participant.proto
Original file line number Diff line number Diff line change
Expand Up @@ -109,6 +109,24 @@ service Participants {
option (google.api.method_signature) = "participant,event_input";
}

// Adds a text (chat, for example), or audio (phone recording, for example)
// message from a participant into the conversation.
// Note: This method is only available through the gRPC API (not REST).
//
// The top-level message sent to the client by the server is
// `StreamingAnalyzeContentResponse`. Multiple response messages can be
// returned in order. The first one or more messages contain the
// `recognition_result` field. Each result represents a more complete
// transcript of what the user said. The next message contains the
// `reply_text` field and potentially the `reply_audio` field. The message can
// also contain the `automated_agent_reply` field.
//
// Note: Always use agent versions for production traffic
// sent to virtual agents. See [Versions and
// environments](https://cloud.google.com/dialogflow/es/docs/agents-versions).
rpc StreamingAnalyzeContent(stream StreamingAnalyzeContentRequest) returns (stream StreamingAnalyzeContentResponse) {
}

// Gets suggested articles for a participant based on specific historical
// messages.
rpc SuggestArticles(SuggestArticlesRequest) returns (SuggestArticlesResponse) {
Expand Down Expand Up @@ -190,6 +208,36 @@ message Participant {
// media stream to this participant. This field can be updated.
string sip_recording_media_label = 6 [(google.api.field_behavior) = OPTIONAL];

// Optional. Obfuscated user id that should be associated with the created participant.
//
// You can specify a user id as follows:
//
// 1. If you set this field in
// [CreateParticipantRequest][google.cloud.dialogflow.v2.CreateParticipantRequest.participant] or
// [UpdateParticipantRequest][google.cloud.dialogflow.v2.UpdateParticipantRequest.participant],
// Dialogflow adds the obfuscated user id with the participant.
//
// 2. If you set this field in
// [AnalyzeContent][google.cloud.dialogflow.v2.AnalyzeContentRequest.obfuscated_external_user_id] or
// [StreamingAnalyzeContent][google.cloud.dialogflow.v2.StreamingAnalyzeContentRequest.obfuscated_external_user_id],
// Dialogflow will update [Participant.obfuscated_external_user_id][google.cloud.dialogflow.v2.Participant.obfuscated_external_user_id].
//
// Dialogflow returns an error if you try to add a user id for a
// non-[END_USER][google.cloud.dialogflow.v2.Participant.Role.END_USER] participant.
//
// Dialogflow uses this user id for billing and measurement purposes. For
// example, Dialogflow determines whether a user in one conversation returned
// in a later conversation.
//
// Note:
//
// * Please never pass raw user ids to Dialogflow. Always obfuscate your user
// id first.
// * Dialogflow only accepts a UTF-8 encoded string, e.g., a hex digest of a
// hash function like SHA-512.
// * The length of the user id must be <= 256 characters.
string obfuscated_external_user_id = 7 [(google.api.field_behavior) = OPTIONAL];

// Optional. Key-value filters on the metadata of documents returned by article
// suggestion. If specified, article suggestion only returns suggested
// documents that match all filters in their [Document.metadata][google.cloud.dialogflow.v2.Document.metadata]. Multiple
Expand Down Expand Up @@ -425,6 +473,171 @@ message AnalyzeContentResponse {
DtmfParameters dtmf_parameters = 9;
}

// The top-level message sent by the client to the
// [Participants.StreamingAnalyzeContent][google.cloud.dialogflow.v2.Participants.StreamingAnalyzeContent] method.
//
// Multiple request messages should be sent in order:
//
// 1. The first message must contain
// [participant][google.cloud.dialogflow.v2.StreamingAnalyzeContentRequest.participant],
// [config][google.cloud.dialogflow.v2.StreamingAnalyzeContentRequest.config] and optionally
// [query_params][google.cloud.dialogflow.v2.StreamingAnalyzeContentRequest.query_params]. If you want
// to receive an audio response, it should also contain
// [reply_audio_config][google.cloud.dialogflow.v2.StreamingAnalyzeContentRequest.reply_audio_config].
// The message must not contain
// [input][google.cloud.dialogflow.v2.StreamingAnalyzeContentRequest.input].
//
// 2. If [config][google.cloud.dialogflow.v2.StreamingAnalyzeContentRequest.config] in the first message
// was set to [audio_config][google.cloud.dialogflow.v2.StreamingAnalyzeContentRequest.audio_config],
// all subsequent messages must contain
// [input_audio][google.cloud.dialogflow.v2.StreamingAnalyzeContentRequest.input_audio] to continue
// with Speech recognition.
// However, note that:
//
// * Dialogflow will bill you for the audio so far.
// * Dialogflow discards all Speech recognition results in favor of the
// text input.
//
// 3. If [StreamingAnalyzeContentRequest.config][google.cloud.dialogflow.v2.StreamingAnalyzeContentRequest.config] in the first message was set
// to [StreamingAnalyzeContentRequest.text_config][google.cloud.dialogflow.v2.StreamingAnalyzeContentRequest.text_config], then the second message
// must contain only [input_text][google.cloud.dialogflow.v2.StreamingAnalyzeContentRequest.input_text].
// Moreover, you must not send more than two messages.
//
// After you sent all input, you must half-close or abort the request stream.
message StreamingAnalyzeContentRequest {
// Required. The name of the participant this text comes from.
// Format: `projects/<Project ID>/locations/<Location
// ID>/conversations/<Conversation ID>/participants/<Participant ID>`.
string participant = 1 [
(google.api.field_behavior) = REQUIRED,
(google.api.resource_reference) = {
type: "dialogflow.googleapis.com/Participant"
}
];

// The input config.
oneof config {
// Instructs the speech recognizer how to process the speech audio.
InputAudioConfig audio_config = 2;

// The natural language text to be processed.
InputTextConfig text_config = 3;
}

// Speech synthesis configuration.
// The speech synthesis settings for a virtual agent that may be configured
// for the associated conversation profile are not used when calling
// StreamingAnalyzeContent. If this configuration is not supplied, speech
// synthesis is disabled.
OutputAudioConfig reply_audio_config = 4;

// The input.
oneof input {
// The input audio content to be recognized. Must be sent if `audio_config`
// is set in the first message. The complete audio over all streaming
// messages must not exceed 1 minute.
bytes input_audio = 5;

// The UTF-8 encoded natural language text to be processed. Must be sent if
// `text_config` is set in the first message. Text length must not exceed
// 256 bytes for virtual agent interactions. The `input_text` field can be
// only sent once.
string input_text = 6;

// The DTMF digits used to invoke intent and fill in parameter value.
//
// This input is ignored if the previous response indicated that DTMF input
// is not accepted.
TelephonyDtmfEvents input_dtmf = 9;
}

// Parameters for a Dialogflow virtual-agent query.
QueryParameters query_params = 7;

// Parameters for a human assist query.
AssistQueryParameters assist_query_params = 8;

// Additional parameters to be put into Dialogflow CX session parameters. To
// remove a parameter from the session, clients should explicitly set the
// parameter value to null.
//
// Note: this field should only be used if you are connecting to a Dialogflow
// CX agent.
google.protobuf.Struct cx_parameters = 13;

// Enable partial virtual agent responses. If this flag is not enabled,
// response stream still contains only one final response even if some
// `Fulfillment`s in Dialogflow virtual agent have been configured to return
// partial responses.
bool enable_partial_automated_agent_reply = 12;
}

// The top-level message returned from the `StreamingAnalyzeContent` method.
//
// Multiple response messages can be returned in order:
//
// 1. If the input was set to streaming audio, the first one or more messages
// contain `recognition_result`. Each `recognition_result` represents a more
// complete transcript of what the user said. The last `recognition_result`
// has `is_final` set to `true`.
//
// 2. In virtual agent stage: if `enable_partial_automated_agent_reply` is
// true, the following N (currently 1 <= N <= 4) messages
// contain `automated_agent_reply` and optionally `reply_audio`
// returned by the virtual agent. The first (N-1)
// `automated_agent_reply`s will have `automated_agent_reply_type` set to
// `PARTIAL`. The last `automated_agent_reply` has
// `automated_agent_reply_type` set to `FINAL`.
// If `enable_partial_automated_agent_reply` is not enabled, response stream
// only contains the final reply.
//
// In human assist stage: the following N (N >= 1) messages contain
// `human_agent_suggestion_results`, `end_user_suggestion_results` or
// `message`.
message StreamingAnalyzeContentResponse {
// The result of speech recognition.
StreamingRecognitionResult recognition_result = 1;

// The output text content.
// This field is set if an automated agent responded with a text for the user.
string reply_text = 2;

// The audio data bytes encoded as specified in the request.
// This field is set if:
//
// - The `reply_audio_config` field is specified in the request.
// - The automated agent, which this output comes from, responded with audio.
// In such case, the `reply_audio.config` field contains settings used to
// synthesize the speech.
//
// In some scenarios, multiple output audio fields may be present in the
// response structure. In these cases, only the top-most-level audio output
// has content.
OutputAudio reply_audio = 3;

// Only set if a Dialogflow automated agent has responded.
// Note that: [AutomatedAgentReply.detect_intent_response.output_audio][]
// and [AutomatedAgentReply.detect_intent_response.output_audio_config][]
// are always empty, use [reply_audio][google.cloud.dialogflow.v2.StreamingAnalyzeContentResponse.reply_audio] instead.
AutomatedAgentReply automated_agent_reply = 4;

// Message analyzed by CCAI.
Message message = 6;

// The suggestions for most recent human agent. The order is the same as
// [HumanAgentAssistantConfig.SuggestionConfig.feature_configs][google.cloud.dialogflow.v2.HumanAgentAssistantConfig.SuggestionConfig.feature_configs] of
// [HumanAgentAssistantConfig.human_agent_suggestion_config][google.cloud.dialogflow.v2.HumanAgentAssistantConfig.human_agent_suggestion_config].
repeated SuggestionResult human_agent_suggestion_results = 7;

// The suggestions for end user. The order is the same as
// [HumanAgentAssistantConfig.SuggestionConfig.feature_configs][google.cloud.dialogflow.v2.HumanAgentAssistantConfig.SuggestionConfig.feature_configs] of
// [HumanAgentAssistantConfig.end_user_suggestion_config][google.cloud.dialogflow.v2.HumanAgentAssistantConfig.end_user_suggestion_config].
repeated SuggestionResult end_user_suggestion_results = 8;

// Indicates the parameters of DTMF.
DtmfParameters dtmf_parameters = 10;
}

// The request message for [Participants.SuggestArticles][google.cloud.dialogflow.v2.Participants.SuggestArticles].
message SuggestArticlesRequest {
// Required. The name of the participant to fetch suggestion for.
Expand Down Expand Up @@ -720,6 +933,14 @@ message SuggestionResult {
}
}

// Defines the language used in the input text.
message InputTextConfig {
// Required. The language of this conversational query. See [Language
// Support](https://cloud.google.com/dialogflow/docs/reference/language)
// for a list of the currently supported language codes.
string language_code = 1 [(google.api.field_behavior) = REQUIRED];
}

// Represents a part of a message possibly annotated with an entity. The part
// can be an entity or purely a part of the message between two entities or
// message start/end.
Expand Down

0 comments on commit 2c98a57

Please sign in to comment.