diff --git a/google/cloud/dialogflow/v2/agent.proto b/google/cloud/dialogflow/v2/agent.proto index 804e30bac4925..8f7a4bbb03218 100644 --- a/google/cloud/dialogflow/v2/agent.proto +++ b/google/cloud/dialogflow/v2/agent.proto @@ -293,14 +293,15 @@ message Agent { // Not specified. This value should never be used. TIER_UNSPECIFIED = 0; - // Standard tier. + // Trial Edition, previously known as Standard Edition. TIER_STANDARD = 1; - // Enterprise tier (Essentials). + // Essentials Edition, previously known as Enterprise Essential Edition. TIER_ENTERPRISE = 2; - // Enterprise tier (Plus). - TIER_ENTERPRISE_PLUS = 3; + // Essentials Edition (same as TIER_ENTERPRISE), previously known as + // Enterprise Plus Edition. + TIER_ENTERPRISE_PLUS = 3 [deprecated = true]; } // Required. The project of this agent. diff --git a/google/cloud/dialogflow/v2/audio_config.proto b/google/cloud/dialogflow/v2/audio_config.proto index 232b222a45992..b27e86104a79e 100644 --- a/google/cloud/dialogflow/v2/audio_config.proto +++ b/google/cloud/dialogflow/v2/audio_config.proto @@ -30,36 +30,6 @@ option java_outer_classname = "AudioConfigProto"; option java_package = "com.google.cloud.dialogflow.v2"; option objc_class_prefix = "DF"; -// Hints for the speech recognizer to help with recognition in a specific -// conversation state. -message SpeechContext { - // Optional. A list of strings containing words and phrases that the speech - // recognizer should recognize with higher likelihood. - // - // This list can be used to: - // - // * improve accuracy for words and phrases you expect the user to say, - // e.g. typical commands for your Dialogflow agent - // * add additional words to the speech recognizer vocabulary - // * ... - // - // See the [Cloud Speech - // documentation](https://cloud.google.com/speech-to-text/quotas) for usage - // limits. - repeated string phrases = 1; - - // Optional. Boost for this context compared to other contexts: - // - // * If the boost is positive, Dialogflow will increase the probability that - // the phrases in this context are recognized over similar sounding phrases. - // * If the boost is unspecified or non-positive, Dialogflow will not apply - // any boost. - // - // Dialogflow recommends that you use boosts in the range (0, 20] and that you - // find a value that fits your use case with binary search. - float boost = 2; -} - // Audio encoding of the audio content sent in the conversational query request. // Refer to the // [Cloud Speech API @@ -109,6 +79,36 @@ enum AudioEncoding { AUDIO_ENCODING_SPEEX_WITH_HEADER_BYTE = 7; } +// Hints for the speech recognizer to help with recognition in a specific +// conversation state. +message SpeechContext { + // Optional. A list of strings containing words and phrases that the speech + // recognizer should recognize with higher likelihood. + // + // This list can be used to: + // + // * improve accuracy for words and phrases you expect the user to say, + // e.g. typical commands for your Dialogflow agent + // * add additional words to the speech recognizer vocabulary + // * ... + // + // See the [Cloud Speech + // documentation](https://cloud.google.com/speech-to-text/quotas) for usage + // limits. + repeated string phrases = 1; + + // Optional. Boost for this context compared to other contexts: + // + // * If the boost is positive, Dialogflow will increase the probability that + // the phrases in this context are recognized over similar sounding phrases. + // * If the boost is unspecified or non-positive, Dialogflow will not apply + // any boost. + // + // Dialogflow recommends that you use boosts in the range (0, 20] and that you + // find a value that fits your use case with binary search. + float boost = 2; +} + // Information for a word recognized by the speech recognizer. message SpeechWordInfo { // The word this info is for. diff --git a/google/cloud/dialogflow/v2/session.proto b/google/cloud/dialogflow/v2/session.proto index 1b14baf9f70f9..7f83322111763 100644 --- a/google/cloud/dialogflow/v2/session.proto +++ b/google/cloud/dialogflow/v2/session.proto @@ -479,10 +479,11 @@ message StreamingDetectIntentRequest { // // Multiple response messages can be returned in order: // -// 1. If the input was set to streaming audio, the first one or more messages -// contain `recognition_result`. Each `recognition_result` represents a more -// complete transcript of what the user said. The last `recognition_result` -// has `is_final` set to `true`. +// 1. If the `StreamingDetectIntentRequest.input_audio` field was +// set, the `recognition_result` field is populated for one +// or more messages. +// See the [StreamingRecognitionResult][google.cloud.dialogflow.v2.StreamingRecognitionResult] message for details +// about the result message sequence. // // 2. The next message contains `response_id`, `query_result` // and optionally `webhook_status` if a WebHook was called. @@ -520,35 +521,39 @@ message StreamingDetectIntentResponse { // that is currently being processed or an indication that this is the end // of the single requested utterance. // -// Example: +// While end-user audio is being processed, Dialogflow sends a series of +// results. Each result may contain a `transcript` value. A transcript +// represents a portion of the utterance. While the recognizer is processing +// audio, transcript values may be interim values or finalized values. +// Once a transcript is finalized, the `is_final` value is set to true and +// processing continues for the next transcript. // -// 1. transcript: "tube" +// If `StreamingDetectIntentRequest.query_input.audio_config.single_utterance` +// was true, and the recognizer has completed processing audio, +// the `message_type` value is set to `END_OF_SINGLE_UTTERANCE and the +// following (last) result contains the last finalized transcript. // -// 2. transcript: "to be a" +// The complete end-user utterance is determined by concatenating the +// finalized transcript values received for the series of results. // -// 3. transcript: "to be" +// In the following example, single utterance is enabled. In the case where +// single utterance is not enabled, result 7 would not occur. // -// 4. transcript: "to be or not to be" -// is_final: true +// ``` +// Num | transcript | message_type | is_final +// --- | ----------------------- | ----------------------- | -------- +// 1 | "tube" | TRANSCRIPT | false +// 2 | "to be a" | TRANSCRIPT | false +// 3 | "to be" | TRANSCRIPT | false +// 4 | "to be or not to be" | TRANSCRIPT | true +// 5 | "that's" | TRANSCRIPT | false +// 6 | "that is | TRANSCRIPT | false +// 7 | unset | END_OF_SINGLE_UTTERANCE | unset +// 8 | " that is the question" | TRANSCRIPT | true +// ``` // -// 5. transcript: " that's" -// -// 6. transcript: " that is" -// -// 7. message_type: `END_OF_SINGLE_UTTERANCE` -// -// 8. transcript: " that is the question" -// is_final: true -// -// Only two of the responses contain final results (#4 and #8 indicated by -// `is_final: true`). Concatenating these generates the full transcript: "to be -// or not to be that is the question". -// -// In each response we populate: -// -// * for `TRANSCRIPT`: `transcript` and possibly `is_final`. -// -// * for `END_OF_SINGLE_UTTERANCE`: only `message_type`. +// Concatenating the finalized transcripts with `is_final` set to true, +// the complete utterance becomes "to be or not to be that is the question". message StreamingRecognitionResult { // Type of the response message. enum MessageType {