feat: Added StreamingAnalyzeContent API (#1004)

- [ ] Regenerate this pull request now. feat: Added obfuscated_external_user_id to Participant feat: Can directly set Cloud Speech model on the SpeechToTextConfig PiperOrigin-RevId: 483696090 Source-Link: https://togithub.com/googleapis/googleapis/commit/3772bf3656425cb32ed3525894f8b1a2a5dfa789 Source-Link: https://togithub.com/googleapis/googleapis-gen/commit/e358d269b1f0eb9964189d18768823bc9e8ab41c Copy-Tag: eyJwIjoiLmdpdGh1Yi8uT3dsQm90LnlhbWwiLCJoIjoiZTM1OGQyNjliMWYwZWI5OTY0MTg5ZDE4NzY4ODIzYmM5ZThhYjQxYyJ9 BEGIN_NESTED_COMMIT feat: Can directly set Cloud Speech model on the SpeechToTextConfig PiperOrigin-RevId: 482665674 Source-Link: https://togithub.com/googleapis/googleapis/commit/64926d52febbf298cb82a8f472ade4a3969ba922 Source-Link: https://togithub.com/googleapis/googleapis-gen/commit/351722be163def7666f13cb76b2a295d9f2a8450 Copy-Tag: eyJwIjoiLmdpdGh1Yi8uT3dsQm90LnlhbWwiLCJoIjoiMzUxNzIyYmUxNjNkZWY3NjY2ZjEzY2I3NmIyYTI5NWQ5ZjJhODQ1MCJ9 END_NESTED_COMMIT
googleapis · Nov 11, 2022 · 2c98a57 · 2c98a57
1 parent 730d834
commit 2c98a57
Show file tree

Hide file tree

Showing 50 changed files with 3,244 additions and 10,890 deletions.
diff --git a/protos/google/cloud/dialogflow/v2/audio_config.proto b/protos/google/cloud/dialogflow/v2/audio_config.proto
@@ -369,6 +369,67 @@ message OutputAudioConfig {
   SynthesizeSpeechConfig synthesize_speech_config = 3;
 }
 
+// [DTMF](https://en.wikipedia.org/wiki/Dual-tone_multi-frequency_signaling)
+// digit in Telephony Gateway.
+enum TelephonyDtmf {
+  // Not specified. This value may be used to indicate an absent digit.
+  TELEPHONY_DTMF_UNSPECIFIED = 0;
+
+  // Number: '1'.
+  DTMF_ONE = 1;
+
+  // Number: '2'.
+  DTMF_TWO = 2;
+
+  // Number: '3'.
+  DTMF_THREE = 3;
+
+  // Number: '4'.
+  DTMF_FOUR = 4;
+
+  // Number: '5'.
+  DTMF_FIVE = 5;
+
+  // Number: '6'.
+  DTMF_SIX = 6;
+
+  // Number: '7'.
+  DTMF_SEVEN = 7;
+
+  // Number: '8'.
+  DTMF_EIGHT = 8;
+
+  // Number: '9'.
+  DTMF_NINE = 9;
+
+  // Number: '0'.
+  DTMF_ZERO = 10;
+
+  // Letter: 'A'.
+  DTMF_A = 11;
+
+  // Letter: 'B'.
+  DTMF_B = 12;
+
+  // Letter: 'C'.
+  DTMF_C = 13;
+
+  // Letter: 'D'.
+  DTMF_D = 14;
+
+  // Asterisk/star: '*'.
+  DTMF_STAR = 15;
+
+  // Pound/diamond/hash/square/gate/octothorpe: '#'.
+  DTMF_POUND = 16;
+}
+
+// A wrapper of repeated TelephonyDtmf digits.
+message TelephonyDtmfEvents {
+  // A sequence of TelephonyDtmf digits.
+  repeated TelephonyDtmf dtmf_events = 1;
+}
+
 // Configures speech transcription for [ConversationProfile][google.cloud.dialogflow.v2.ConversationProfile].
 message SpeechToTextConfig {
   // The speech model used in speech to text.
@@ -379,4 +440,13 @@ message SpeechToTextConfig {
   // version of the specified model for the language does not exist, then it
   // would emit an error.
   SpeechModelVariant speech_model_variant = 1;
+
+  // Which Speech model to select. Select the model best suited to your domain
+  // to get best results. If a model is not explicitly specified, then a default
+  // model is used.
+  // Refer to
+  // [Cloud Speech API
+  // documentation](https://cloud.google.com/speech-to-text/docs/basics#select-model)
+  // for more details.
+  string model = 2;
 }
diff --git a/protos/google/cloud/dialogflow/v2/participant.proto b/protos/google/cloud/dialogflow/v2/participant.proto
@@ -109,6 +109,24 @@ service Participants {
     option (google.api.method_signature) = "participant,event_input";
   }
 
+  // Adds a text (chat, for example), or audio (phone recording, for example)
+  // message from a participant into the conversation.
+  // Note: This method is only available through the gRPC API (not REST).
+  //
+  // The top-level message sent to the client by the server is
+  // `StreamingAnalyzeContentResponse`. Multiple response messages can be
+  // returned in order. The first one or more messages contain the
+  // `recognition_result` field. Each result represents a more complete
+  // transcript of what the user said. The next message contains the
+  // `reply_text` field and potentially the `reply_audio` field. The message can
+  // also contain the `automated_agent_reply` field.
+  //
+  // Note: Always use agent versions for production traffic
+  // sent to virtual agents. See [Versions and
+  // environments](https://cloud.google.com/dialogflow/es/docs/agents-versions).
+  rpc StreamingAnalyzeContent(stream StreamingAnalyzeContentRequest) returns (stream StreamingAnalyzeContentResponse) {
+  }
+
   // Gets suggested articles for a participant based on specific historical
   // messages.
   rpc SuggestArticles(SuggestArticlesRequest) returns (SuggestArticlesResponse) {
@@ -190,6 +208,36 @@ message Participant {
   // media stream to this participant. This field can be updated.
   string sip_recording_media_label = 6 [(google.api.field_behavior) = OPTIONAL];
 
+  // Optional. Obfuscated user id that should be associated with the created participant.
+  //
+  // You can specify a user id as follows:
+  //
+  // 1. If you set this field in
+  //    [CreateParticipantRequest][google.cloud.dialogflow.v2.CreateParticipantRequest.participant] or
+  //    [UpdateParticipantRequest][google.cloud.dialogflow.v2.UpdateParticipantRequest.participant],
+  //    Dialogflow adds the obfuscated user id with the participant.
+  //
+  // 2. If you set this field in
+  //    [AnalyzeContent][google.cloud.dialogflow.v2.AnalyzeContentRequest.obfuscated_external_user_id] or
+  //    [StreamingAnalyzeContent][google.cloud.dialogflow.v2.StreamingAnalyzeContentRequest.obfuscated_external_user_id],
+  //    Dialogflow will update [Participant.obfuscated_external_user_id][google.cloud.dialogflow.v2.Participant.obfuscated_external_user_id].
+  //
+  // Dialogflow returns an error if you try to add a user id for a
+  // non-[END_USER][google.cloud.dialogflow.v2.Participant.Role.END_USER] participant.
+  //
+  // Dialogflow uses this user id for billing and measurement purposes. For
+  // example, Dialogflow determines whether a user in one conversation returned
+  // in a later conversation.
+  //
+  // Note:
+  //
+  // * Please never pass raw user ids to Dialogflow. Always obfuscate your user
+  //   id first.
+  // * Dialogflow only accepts a UTF-8 encoded string, e.g., a hex digest of a
+  //   hash function like SHA-512.
+  // * The length of the user id must be <= 256 characters.
+  string obfuscated_external_user_id = 7 [(google.api.field_behavior) = OPTIONAL];
+
   // Optional. Key-value filters on the metadata of documents returned by article
   // suggestion. If specified, article suggestion only returns suggested
   // documents that match all filters in their [Document.metadata][google.cloud.dialogflow.v2.Document.metadata]. Multiple
@@ -425,6 +473,171 @@ message AnalyzeContentResponse {
   DtmfParameters dtmf_parameters = 9;
 }
 
+// The top-level message sent by the client to the
+// [Participants.StreamingAnalyzeContent][google.cloud.dialogflow.v2.Participants.StreamingAnalyzeContent] method.
+//
+// Multiple request messages should be sent in order:
+//
+// 1.  The first message must contain
+//     [participant][google.cloud.dialogflow.v2.StreamingAnalyzeContentRequest.participant],
+//     [config][google.cloud.dialogflow.v2.StreamingAnalyzeContentRequest.config] and optionally
+//     [query_params][google.cloud.dialogflow.v2.StreamingAnalyzeContentRequest.query_params]. If you want
+//     to receive an audio response, it should also contain
+//     [reply_audio_config][google.cloud.dialogflow.v2.StreamingAnalyzeContentRequest.reply_audio_config].
+//     The message must not contain
+//     [input][google.cloud.dialogflow.v2.StreamingAnalyzeContentRequest.input].
+//
+// 2.  If [config][google.cloud.dialogflow.v2.StreamingAnalyzeContentRequest.config] in the first message
+//     was set to [audio_config][google.cloud.dialogflow.v2.StreamingAnalyzeContentRequest.audio_config],
+//     all subsequent messages must contain
+//     [input_audio][google.cloud.dialogflow.v2.StreamingAnalyzeContentRequest.input_audio] to continue
+//     with Speech recognition.
+//     However, note that:
+//
+//     * Dialogflow will bill you for the audio so far.
+//     * Dialogflow discards all Speech recognition results in favor of the
+//       text input.
+//
+//  3. If [StreamingAnalyzeContentRequest.config][google.cloud.dialogflow.v2.StreamingAnalyzeContentRequest.config] in the first message was set
+//    to [StreamingAnalyzeContentRequest.text_config][google.cloud.dialogflow.v2.StreamingAnalyzeContentRequest.text_config], then the second message
+//    must contain only [input_text][google.cloud.dialogflow.v2.StreamingAnalyzeContentRequest.input_text].
+//    Moreover, you must not send more than two messages.
+//
+//  After you sent all input, you must half-close or abort the request stream.
+message StreamingAnalyzeContentRequest {
+  // Required. The name of the participant this text comes from.
+  // Format: `projects/<Project ID>/locations/<Location
+  // ID>/conversations/<Conversation ID>/participants/<Participant ID>`.
+  string participant = 1 [
+    (google.api.field_behavior) = REQUIRED,
+    (google.api.resource_reference) = {
+      type: "dialogflow.googleapis.com/Participant"
+    }
+  ];
+
+  // The input config.
+  oneof config {
+    // Instructs the speech recognizer how to process the speech audio.
+    InputAudioConfig audio_config = 2;
+
+    // The natural language text to be processed.
+    InputTextConfig text_config = 3;
+  }
+
+  // Speech synthesis configuration.
+  // The speech synthesis settings for a virtual agent that may be configured
+  // for the associated conversation profile are not used when calling
+  // StreamingAnalyzeContent. If this configuration is not supplied, speech
+  // synthesis is disabled.
+  OutputAudioConfig reply_audio_config = 4;
+
+  // The input.
+  oneof input {
+    // The input audio content to be recognized. Must be sent if `audio_config`
+    // is set in the first message. The complete audio over all streaming
+    // messages must not exceed 1 minute.
+    bytes input_audio = 5;
+
+    // The UTF-8 encoded natural language text to be processed. Must be sent if
+    // `text_config` is set in the first message. Text length must not exceed
+    // 256 bytes for virtual agent interactions. The `input_text` field can be
+    // only sent once.
+    string input_text = 6;
+
+    // The DTMF digits used to invoke intent and fill in parameter value.
+    //
+    // This input is ignored if the previous response indicated that DTMF input
+    // is not accepted.
+    TelephonyDtmfEvents input_dtmf = 9;
+  }
+
+  // Parameters for a Dialogflow virtual-agent query.
+  QueryParameters query_params = 7;
+
+  // Parameters for a human assist query.
+  AssistQueryParameters assist_query_params = 8;
+
+  // Additional parameters to be put into Dialogflow CX session parameters. To
+  // remove a parameter from the session, clients should explicitly set the
+  // parameter value to null.
+  //
+  // Note: this field should only be used if you are connecting to a Dialogflow
+  // CX agent.
+  google.protobuf.Struct cx_parameters = 13;
+
+  // Enable partial virtual agent responses. If this flag is not enabled,
+  // response stream still contains only one final response even if some
+  // `Fulfillment`s in Dialogflow virtual agent have been configured to return
+  // partial responses.
+  bool enable_partial_automated_agent_reply = 12;
+}
+
+// The top-level message returned from the `StreamingAnalyzeContent` method.
+//
+// Multiple response messages can be returned in order:
+//
+// 1.  If the input was set to streaming audio, the first one or more messages
+//     contain `recognition_result`. Each `recognition_result` represents a more
+//     complete transcript of what the user said. The last `recognition_result`
+//     has `is_final` set to `true`.
+//
+// 2.  In virtual agent stage: if `enable_partial_automated_agent_reply` is
+//     true, the following N (currently 1 <= N <= 4) messages
+//     contain `automated_agent_reply` and optionally `reply_audio`
+//     returned by the virtual agent. The first (N-1)
+//     `automated_agent_reply`s will have `automated_agent_reply_type` set to
+//     `PARTIAL`. The last `automated_agent_reply` has
+//     `automated_agent_reply_type` set to `FINAL`.
+//     If `enable_partial_automated_agent_reply` is not enabled, response stream
+//     only contains the final reply.
+//
+//     In human assist stage: the following N (N >= 1) messages contain
+//     `human_agent_suggestion_results`, `end_user_suggestion_results` or
+//     `message`.
+message StreamingAnalyzeContentResponse {
+  // The result of speech recognition.
+  StreamingRecognitionResult recognition_result = 1;
+
+  // The output text content.
+  // This field is set if an automated agent responded with a text for the user.
+  string reply_text = 2;
+
+  // The audio data bytes encoded as specified in the request.
+  // This field is set if:
+  //
+  //  - The `reply_audio_config` field is specified in the request.
+  //  - The automated agent, which this output comes from, responded with audio.
+  //    In such case, the `reply_audio.config` field contains settings used to
+  //    synthesize the speech.
+  //
+  // In some scenarios, multiple output audio fields may be present in the
+  // response structure. In these cases, only the top-most-level audio output
+  // has content.
+  OutputAudio reply_audio = 3;
+
+  // Only set if a Dialogflow automated agent has responded.
+  // Note that: [AutomatedAgentReply.detect_intent_response.output_audio][]
+  // and [AutomatedAgentReply.detect_intent_response.output_audio_config][]
+  // are always empty, use [reply_audio][google.cloud.dialogflow.v2.StreamingAnalyzeContentResponse.reply_audio] instead.
+  AutomatedAgentReply automated_agent_reply = 4;
+
+  // Message analyzed by CCAI.
+  Message message = 6;
+
+  // The suggestions for most recent human agent. The order is the same as
+  // [HumanAgentAssistantConfig.SuggestionConfig.feature_configs][google.cloud.dialogflow.v2.HumanAgentAssistantConfig.SuggestionConfig.feature_configs] of
+  // [HumanAgentAssistantConfig.human_agent_suggestion_config][google.cloud.dialogflow.v2.HumanAgentAssistantConfig.human_agent_suggestion_config].
+  repeated SuggestionResult human_agent_suggestion_results = 7;
+
+  // The suggestions for end user. The order is the same as
+  // [HumanAgentAssistantConfig.SuggestionConfig.feature_configs][google.cloud.dialogflow.v2.HumanAgentAssistantConfig.SuggestionConfig.feature_configs] of
+  // [HumanAgentAssistantConfig.end_user_suggestion_config][google.cloud.dialogflow.v2.HumanAgentAssistantConfig.end_user_suggestion_config].
+  repeated SuggestionResult end_user_suggestion_results = 8;
+
+  // Indicates the parameters of DTMF.
+  DtmfParameters dtmf_parameters = 10;
+}
+
 // The request message for [Participants.SuggestArticles][google.cloud.dialogflow.v2.Participants.SuggestArticles].
 message SuggestArticlesRequest {
   // Required. The name of the participant to fetch suggestion for.
@@ -720,6 +933,14 @@ message SuggestionResult {
   }
 }
 
+// Defines the language used in the input text.
+message InputTextConfig {
+  // Required. The language of this conversational query. See [Language
+  // Support](https://cloud.google.com/dialogflow/docs/reference/language)
+  // for a list of the currently supported language codes.
+  string language_code = 1 [(google.api.field_behavior) = REQUIRED];
+}
+
 // Represents a part of a message possibly annotated with an entity. The part
 // can be an entity or purely a part of the message between two entities or
 // message start/end.