Skip to content

Commit

Permalink
docs: clarified meaning of the legacy editions
Browse files Browse the repository at this point in the history
docs: clarified semantic of the streaming APIs

PiperOrigin-RevId: 404815104
  • Loading branch information
Google APIs authored and Copybara-Service committed Oct 21, 2021
1 parent f034e5f commit bb1f3e9
Show file tree
Hide file tree
Showing 3 changed files with 68 additions and 62 deletions.
9 changes: 5 additions & 4 deletions google/cloud/dialogflow/v2/agent.proto
Expand Up @@ -293,14 +293,15 @@ message Agent {
// Not specified. This value should never be used.
TIER_UNSPECIFIED = 0;

// Standard tier.
// Trial Edition, previously known as Standard Edition.
TIER_STANDARD = 1;

// Enterprise tier (Essentials).
// Essentials Edition, previously known as Enterprise Essential Edition.
TIER_ENTERPRISE = 2;

// Enterprise tier (Plus).
TIER_ENTERPRISE_PLUS = 3;
// Essentials Edition (same as TIER_ENTERPRISE), previously known as
// Enterprise Plus Edition.
TIER_ENTERPRISE_PLUS = 3 [deprecated = true];
}

// Required. The project of this agent.
Expand Down
60 changes: 30 additions & 30 deletions google/cloud/dialogflow/v2/audio_config.proto
Expand Up @@ -30,36 +30,6 @@ option java_outer_classname = "AudioConfigProto";
option java_package = "com.google.cloud.dialogflow.v2";
option objc_class_prefix = "DF";

// Hints for the speech recognizer to help with recognition in a specific
// conversation state.
message SpeechContext {
// Optional. A list of strings containing words and phrases that the speech
// recognizer should recognize with higher likelihood.
//
// This list can be used to:
//
// * improve accuracy for words and phrases you expect the user to say,
// e.g. typical commands for your Dialogflow agent
// * add additional words to the speech recognizer vocabulary
// * ...
//
// See the [Cloud Speech
// documentation](https://cloud.google.com/speech-to-text/quotas) for usage
// limits.
repeated string phrases = 1;

// Optional. Boost for this context compared to other contexts:
//
// * If the boost is positive, Dialogflow will increase the probability that
// the phrases in this context are recognized over similar sounding phrases.
// * If the boost is unspecified or non-positive, Dialogflow will not apply
// any boost.
//
// Dialogflow recommends that you use boosts in the range (0, 20] and that you
// find a value that fits your use case with binary search.
float boost = 2;
}

// Audio encoding of the audio content sent in the conversational query request.
// Refer to the
// [Cloud Speech API
Expand Down Expand Up @@ -109,6 +79,36 @@ enum AudioEncoding {
AUDIO_ENCODING_SPEEX_WITH_HEADER_BYTE = 7;
}

// Hints for the speech recognizer to help with recognition in a specific
// conversation state.
message SpeechContext {
// Optional. A list of strings containing words and phrases that the speech
// recognizer should recognize with higher likelihood.
//
// This list can be used to:
//
// * improve accuracy for words and phrases you expect the user to say,
// e.g. typical commands for your Dialogflow agent
// * add additional words to the speech recognizer vocabulary
// * ...
//
// See the [Cloud Speech
// documentation](https://cloud.google.com/speech-to-text/quotas) for usage
// limits.
repeated string phrases = 1;

// Optional. Boost for this context compared to other contexts:
//
// * If the boost is positive, Dialogflow will increase the probability that
// the phrases in this context are recognized over similar sounding phrases.
// * If the boost is unspecified or non-positive, Dialogflow will not apply
// any boost.
//
// Dialogflow recommends that you use boosts in the range (0, 20] and that you
// find a value that fits your use case with binary search.
float boost = 2;
}

// Information for a word recognized by the speech recognizer.
message SpeechWordInfo {
// The word this info is for.
Expand Down
61 changes: 33 additions & 28 deletions google/cloud/dialogflow/v2/session.proto
Expand Up @@ -479,10 +479,11 @@ message StreamingDetectIntentRequest {
//
// Multiple response messages can be returned in order:
//
// 1. If the input was set to streaming audio, the first one or more messages
// contain `recognition_result`. Each `recognition_result` represents a more
// complete transcript of what the user said. The last `recognition_result`
// has `is_final` set to `true`.
// 1. If the `StreamingDetectIntentRequest.input_audio` field was
// set, the `recognition_result` field is populated for one
// or more messages.
// See the [StreamingRecognitionResult][google.cloud.dialogflow.v2.StreamingRecognitionResult] message for details
// about the result message sequence.
//
// 2. The next message contains `response_id`, `query_result`
// and optionally `webhook_status` if a WebHook was called.
Expand Down Expand Up @@ -520,35 +521,39 @@ message StreamingDetectIntentResponse {
// that is currently being processed or an indication that this is the end
// of the single requested utterance.
//
// Example:
// While end-user audio is being processed, Dialogflow sends a series of
// results. Each result may contain a `transcript` value. A transcript
// represents a portion of the utterance. While the recognizer is processing
// audio, transcript values may be interim values or finalized values.
// Once a transcript is finalized, the `is_final` value is set to true and
// processing continues for the next transcript.
//
// 1. transcript: "tube"
// If `StreamingDetectIntentRequest.query_input.audio_config.single_utterance`
// was true, and the recognizer has completed processing audio,
// the `message_type` value is set to `END_OF_SINGLE_UTTERANCE and the
// following (last) result contains the last finalized transcript.
//
// 2. transcript: "to be a"
// The complete end-user utterance is determined by concatenating the
// finalized transcript values received for the series of results.
//
// 3. transcript: "to be"
// In the following example, single utterance is enabled. In the case where
// single utterance is not enabled, result 7 would not occur.
//
// 4. transcript: "to be or not to be"
// is_final: true
// ```
// Num | transcript | message_type | is_final
// --- | ----------------------- | ----------------------- | --------
// 1 | "tube" | TRANSCRIPT | false
// 2 | "to be a" | TRANSCRIPT | false
// 3 | "to be" | TRANSCRIPT | false
// 4 | "to be or not to be" | TRANSCRIPT | true
// 5 | "that's" | TRANSCRIPT | false
// 6 | "that is | TRANSCRIPT | false
// 7 | unset | END_OF_SINGLE_UTTERANCE | unset
// 8 | " that is the question" | TRANSCRIPT | true
// ```
//
// 5. transcript: " that's"
//
// 6. transcript: " that is"
//
// 7. message_type: `END_OF_SINGLE_UTTERANCE`
//
// 8. transcript: " that is the question"
// is_final: true
//
// Only two of the responses contain final results (#4 and #8 indicated by
// `is_final: true`). Concatenating these generates the full transcript: "to be
// or not to be that is the question".
//
// In each response we populate:
//
// * for `TRANSCRIPT`: `transcript` and possibly `is_final`.
//
// * for `END_OF_SINGLE_UTTERANCE`: only `message_type`.
// Concatenating the finalized transcripts with `is_final` set to true,
// the complete utterance becomes "to be or not to be that is the question".
message StreamingRecognitionResult {
// Type of the response message.
enum MessageType {
Expand Down

0 comments on commit bb1f3e9

Please sign in to comment.