Skip to content
This repository has been archived by the owner on Jul 20, 2023. It is now read-only.

Commit

Permalink
feat: add the API of StreamingAnalyzeContent (#952)
Browse files Browse the repository at this point in the history
docs: add the fields for setting CX virtual agent session parameters
PiperOrigin-RevId: 446825520
Source-Link: googleapis/googleapis@f6bb255
Source-Link: googleapis/googleapis-gen@cd1450b
Copy-Tag: eyJwIjoiLmdpdGh1Yi8uT3dsQm90LnlhbWwiLCJoIjoiY2QxNDUwYjQwM2I1YTNmOTI2NzZkOGEzNGYwY2ZjMDdmYzc2N2I0MCJ9
See https://github.com/googleapis/repo-automation-bots/blob/main/packages/owl-bot/README.md
feat: add the API of StreamingAnalyzeContent
PiperOrigin-RevId: 446850583
Source-Link: googleapis/googleapis@b9927eb
Source-Link: googleapis/googleapis-gen@d442854
Copy-Tag: eyJwIjoiLmdpdGh1Yi8uT3dsQm90LnlhbWwiLCJoIjoiZDQ0Mjg1NDk5YTBlNzU3ZTY4ZDM5ZGMxOGY5MWQyODAwNjk0YWQ5MCJ9
See https://github.com/googleapis/repo-automation-bots/blob/main/packages/owl-bot/README.md
Co-authored-by: Owl Bot <gcf-owl-bot[bot]@users.noreply.github.com>
  • Loading branch information
gcf-owl-bot[bot] and gcf-owl-bot[bot] committed May 11, 2022
1 parent f2ca3fa commit ca02d94
Show file tree
Hide file tree
Showing 19 changed files with 3,138 additions and 6 deletions.
3 changes: 3 additions & 0 deletions protos/google/cloud/dialogflow/v2/fulfillment.proto
Original file line number Diff line number Diff line change
Expand Up @@ -81,8 +81,10 @@ message Fulfillment {

// Represents configuration for a generic web service.
// Dialogflow supports two mechanisms for authentications:
//
// - Basic authentication with username and password.
// - Authentication with additional authentication headers.
//
// More information could be found at:
// https://cloud.google.com/dialogflow/docs/fulfillment-configure.
message GenericWebService {
Expand Down Expand Up @@ -127,6 +129,7 @@ message Fulfillment {

// Required. The unique identifier of the fulfillment.
// Supported formats:
//
// - `projects/<Project ID>/agent/fulfillment`
// - `projects/<Project ID>/locations/<Location ID>/agent/fulfillment`
//
Expand Down
8 changes: 8 additions & 0 deletions protos/google/cloud/dialogflow/v2/participant.proto
Original file line number Diff line number Diff line change
Expand Up @@ -352,6 +352,14 @@ message AnalyzeContentRequest {
// Parameters for a human assist query.
AssistQueryParameters assist_query_params = 14;

// Additional parameters to be put into Dialogflow CX session parameters. To
// remove a parameter from the session, clients should explicitly set the
// parameter value to null.
//
// Note: this field should only be used if you are connecting to a Dialogflow
// CX agent.
google.protobuf.Struct cx_parameters = 18;

// A unique identifier for this request. Restricted to 36 ASCII characters.
// A random UUID is recommended.
// This request is only idempotent if a `request_id` is provided.
Expand Down
4 changes: 4 additions & 0 deletions protos/google/cloud/dialogflow/v2beta1/audio_config.proto
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,10 @@ option java_multiple_files = true;
option java_outer_classname = "AudioConfigProto";
option java_package = "com.google.cloud.dialogflow.v2beta1";
option objc_class_prefix = "DF";
option (google.api.resource_definition) = {
type: "automl.googleapis.com/Model"
pattern: "projects/{project}/locations/{location}/models/{model}"
};

// Audio encoding of the audio content sent in the conversational query request.
// Refer to the
Expand Down
4 changes: 4 additions & 0 deletions protos/google/cloud/dialogflow/v2beta1/fulfillment.proto
Original file line number Diff line number Diff line change
Expand Up @@ -81,8 +81,10 @@ message Fulfillment {

// Represents configuration for a generic web service.
// Dialogflow supports two mechanisms for authentications:
//
// - Basic authentication with username and password.
// - Authentication with additional authentication headers.
//
// More information could be found at:
// https://cloud.google.com/dialogflow/docs/fulfillment-configure.
message GenericWebService {
Expand Down Expand Up @@ -127,6 +129,7 @@ message Fulfillment {

// Required. The unique identifier of the fulfillment.
// Supported formats:
//
// - `projects/<Project ID>/agent/fulfillment`
// - `projects/<Project ID>/locations/<Location ID>/agent/fulfillment`
//
Expand Down Expand Up @@ -155,6 +158,7 @@ message Fulfillment {
message GetFulfillmentRequest {
// Required. The name of the fulfillment.
// Supported formats:
//
// - `projects/<Project ID>/agent/fulfillment`
// - `projects/<Project ID>/locations/<Location ID>/agent/fulfillment`
string name = 1 [
Expand Down
233 changes: 233 additions & 0 deletions protos/google/cloud/dialogflow/v2beta1/participant.proto
Original file line number Diff line number Diff line change
Expand Up @@ -109,6 +109,24 @@ service Participants {
option (google.api.method_signature) = "participant,event_input";
}

// Adds a text (e.g., chat) or audio (e.g., phone recording) message from a
// participant into the conversation.
// Note: This method is only available through the gRPC API (not REST).
//
// The top-level message sent to the client by the server is
// `StreamingAnalyzeContentResponse`. Multiple response messages can be
// returned in order. The first one or more messages contain the
// `recognition_result` field. Each result represents a more complete
// transcript of what the user said. The next message contains the
// `reply_text` field, and potentially the `reply_audio` and/or the
// `automated_agent_reply` fields.
//
// Note: Always use agent versions for production traffic
// sent to virtual agents. See [Versions and
// environments](https://cloud.google.com/dialogflow/es/docs/agents-versions).
rpc StreamingAnalyzeContent(stream StreamingAnalyzeContentRequest) returns (stream StreamingAnalyzeContentResponse) {
}

// Gets suggested articles for a participant based on specific historical
// messages.
//
Expand Down Expand Up @@ -543,6 +561,14 @@ message AnalyzeContentRequest {
// Parameters for a human assist query.
AssistQueryParameters assist_query_params = 14;

// Additional parameters to be put into Dialogflow CX session parameters. To
// remove a parameter from the session, clients should explicitly set the
// parameter value to null.
//
// Note: this field should only be used if you are connecting to a Dialogflow
// CX agent.
google.protobuf.Struct cx_parameters = 18;

// Optional. The send time of the message from end user or human agent's
// perspective. It is used for identifying the same message under one
// participant.
Expand Down Expand Up @@ -624,6 +650,182 @@ message AnalyzeContentResponse {
DtmfParameters dtmf_parameters = 9;
}

// Defines the language used in the input text.
message InputTextConfig {
// Required. The language of this conversational query. See [Language
// Support](https://cloud.google.com/dialogflow/docs/reference/language)
// for a list of the currently supported language codes.
string language_code = 1;
}

// The top-level message sent by the client to the
// [Participants.StreamingAnalyzeContent][google.cloud.dialogflow.v2beta1.Participants.StreamingAnalyzeContent] method.
//
// Multiple request messages should be sent in order:
//
// 1. The first message must contain
// [participant][google.cloud.dialogflow.v2beta1.StreamingAnalyzeContentRequest.participant],
// [config][google.cloud.dialogflow.v2beta1.StreamingAnalyzeContentRequest.config] and optionally
// [query_params][google.cloud.dialogflow.v2beta1.StreamingAnalyzeContentRequest.query_params]. If you want
// to receive an audio response, it should also contain
// [reply_audio_config][google.cloud.dialogflow.v2beta1.StreamingAnalyzeContentRequest.reply_audio_config].
// The message must not contain
// [input][google.cloud.dialogflow.v2beta1.StreamingAnalyzeContentRequest.input].
//
// 2. If [config][google.cloud.dialogflow.v2beta1.StreamingAnalyzeContentRequest.config] in the first message
// was set to [audio_config][google.cloud.dialogflow.v2beta1.StreamingAnalyzeContentRequest.audio_config],
// all subsequent messages must contain
// [input_audio][google.cloud.dialogflow.v2beta1.StreamingAnalyzeContentRequest.input_audio] to continue
// with Speech recognition.
// If you decide to rather analyze text input after you already started
// Speech recognition, please send a message with
// [StreamingAnalyzeContentRequest.input_text][google.cloud.dialogflow.v2beta1.StreamingAnalyzeContentRequest.input_text].
//
// However, note that:
//
// * Dialogflow will bill you for the audio so far.
// * Dialogflow discards all Speech recognition results in favor of the
// text input.
//
// 3. If [StreamingAnalyzeContentRequest.config][google.cloud.dialogflow.v2beta1.StreamingAnalyzeContentRequest.config] in the first message was set
// to [StreamingAnalyzeContentRequest.text_config][google.cloud.dialogflow.v2beta1.StreamingAnalyzeContentRequest.text_config], then the second message
// must contain only [input_text][google.cloud.dialogflow.v2beta1.StreamingAnalyzeContentRequest.input_text].
// Moreover, you must not send more than two messages.
//
// After you sent all input, you must half-close or abort the request stream.
message StreamingAnalyzeContentRequest {
// Required. The name of the participant this text comes from.
// Format: `projects/<Project ID>/locations/<Location
// ID>/conversations/<Conversation ID>/participants/<Participant ID>`.
string participant = 1 [
(google.api.field_behavior) = REQUIRED,
(google.api.resource_reference) = {
type: "dialogflow.googleapis.com/Participant"
}
];

// Required. The input config.
oneof config {
// Instructs the speech recognizer how to process the speech audio.
InputAudioConfig audio_config = 2;

// The natural language text to be processed.
InputTextConfig text_config = 3;
}

// Speech synthesis configuration.
// The speech synthesis settings for a virtual agent that may be configured
// for the associated conversation profile are not used when calling
// StreamingAnalyzeContent. If this configuration is not supplied, speech
// synthesis is disabled.
OutputAudioConfig reply_audio_config = 4;

// Required. The input.
oneof input {
// The input audio content to be recognized. Must be sent if `audio_config`
// is set in the first message. The complete audio over all streaming
// messages must not exceed 1 minute.
bytes input_audio = 5;

// The UTF-8 encoded natural language text to be processed. Must be sent if
// `text_config` is set in the first message. Text length must not exceed
// 256 bytes. The `input_text` field can be only sent once.
string input_text = 6;

// The DTMF digits used to invoke intent and fill in parameter value.
//
// This input is ignored if the previous response indicated that DTMF input
// is not accepted.
TelephonyDtmfEvents input_dtmf = 9;
}

// Parameters for a Dialogflow virtual-agent query.
QueryParameters query_params = 7;

// Parameters for a human assist query.
AssistQueryParameters assist_query_params = 8;

// Additional parameters to be put into Dialogflow CX session parameters. To
// remove a parameter from the session, clients should explicitly set the
// parameter value to null.
//
// Note: this field should only be used if you are connecting to a Dialogflow
// CX agent.
google.protobuf.Struct cx_parameters = 13;

// Enable partial virtual agent responses. If this flag is not enabled,
// response stream still contains only one final response even if some
// `Fulfillment`s in Dialogflow virtual agent have been configured to return
// partial responses.
bool enable_partial_automated_agent_reply = 12;
}

// The top-level message returned from the `StreamingAnalyzeContent` method.
//
// Multiple response messages can be returned in order:
//
// 1. If the input was set to streaming audio, the first one or more messages
// contain `recognition_result`. Each `recognition_result` represents a more
// complete transcript of what the user said. The last `recognition_result`
// has `is_final` set to `true`.
//
// 2. In virtual agent stage: if `enable_partial_automated_agent_reply` is
// true, the following N (currently 1 <= N <= 4) messages
// contain `automated_agent_reply` and optionally `reply_audio`
// returned by the virtual agent. The first (N-1)
// `automated_agent_reply`s will have `automated_agent_reply_type` set to
// `PARTIAL`. The last `automated_agent_reply` has
// `automated_agent_reply_type` set to `FINAL`.
// If `enable_partial_automated_agent_reply` is not enabled, response stream
// only contains the final reply.
//
// In human assist stage: the following N (N >= 1) messages contain
// `human_agent_suggestion_results`, `end_user_suggestion_results` or
// `message`.
message StreamingAnalyzeContentResponse {
// The result of speech recognition.
StreamingRecognitionResult recognition_result = 1;

// Optional. The output text content.
// This field is set if an automated agent responded with a text for the user.
string reply_text = 2;

// Optional. The audio data bytes encoded as specified in the request.
// This field is set if:
//
// - The `reply_audio_config` field is specified in the request.
// - The automated agent, which this output comes from, responded with audio.
// In such case, the `reply_audio.config` field contains settings used to
// synthesize the speech.
//
// In some scenarios, multiple output audio fields may be present in the
// response structure. In these cases, only the top-most-level audio output
// has content.
OutputAudio reply_audio = 3;

// Optional. Only set if a Dialogflow automated agent has responded.
// Note that: [AutomatedAgentReply.detect_intent_response.output_audio][]
// and [AutomatedAgentReply.detect_intent_response.output_audio_config][]
// are always empty, use [reply_audio][google.cloud.dialogflow.v2beta1.StreamingAnalyzeContentResponse.reply_audio] instead.
AutomatedAgentReply automated_agent_reply = 4;

// Output only. Message analyzed by CCAI.
Message message = 6;

// The suggestions for most recent human agent. The order is the same as
// [HumanAgentAssistantConfig.SuggestionConfig.feature_configs][google.cloud.dialogflow.v2beta1.HumanAgentAssistantConfig.SuggestionConfig.feature_configs] of
// [HumanAgentAssistantConfig.human_agent_suggestion_config][google.cloud.dialogflow.v2beta1.HumanAgentAssistantConfig.human_agent_suggestion_config].
repeated SuggestionResult human_agent_suggestion_results = 7;

// The suggestions for end user. The order is the same as
// [HumanAgentAssistantConfig.SuggestionConfig.feature_configs][google.cloud.dialogflow.v2beta1.HumanAgentAssistantConfig.SuggestionConfig.feature_configs] of
// [HumanAgentAssistantConfig.end_user_suggestion_config][google.cloud.dialogflow.v2beta1.HumanAgentAssistantConfig.end_user_suggestion_config].
repeated SuggestionResult end_user_suggestion_results = 8;

// Indicates the parameters of DTMF.
DtmfParameters dtmf_parameters = 10;
}

// Represents a part of a message possibly annotated with an entity. The part
// can be an entity or purely a part of the message between two entities or
// message start/end.
Expand Down Expand Up @@ -1103,6 +1305,33 @@ message ResponseMessage {

}

// Represents an audio message that is composed of both segments
// synthesized from the Dialogflow agent prompts and ones hosted externally
// at the specified URIs.
message MixedAudio {
// Represents one segment of audio.
message Segment {
// Content of the segment.
oneof content {
// Raw audio synthesized from the Dialogflow agent's response using
// the output config specified in the request.
bytes audio = 1;

// Client-specific URI that points to an audio clip accessible to the
// client.
string uri = 2;
}

// Whether the playback of this segment can be interrupted by the end
// user's speech and the client should then start the next Dialogflow
// request.
bool allow_playback_interruption = 3;
}

// Segments this audio response is composed of.
repeated Segment segments = 1;
}

// Represents the signal that telles the client to transfer the phone call
// connected to the agent to a third-party endpoint.
message TelephonyTransferCall {
Expand Down Expand Up @@ -1132,6 +1361,10 @@ message ResponseMessage {
// ended.
EndInteraction end_interaction = 4;

// An audio response message composed of both the synthesized Dialogflow
// agent responses and the audios hosted in places known to the client.
MixedAudio mixed_audio = 5;

// A signal that the client should transfer the phone call connected to
// this agent to a third-party endpoint.
TelephonyTransferCall telephony_transfer_call = 6;
Expand Down

0 comments on commit ca02d94

Please sign in to comment.