-
Notifications
You must be signed in to change notification settings - Fork 2.2k
/
session.proto
792 lines (695 loc) · 34.5 KB
/
session.proto
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
// Copyright 2023 Google LLC
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
syntax = "proto3";
package google.cloud.dialogflow.v2;
import "google/api/annotations.proto";
import "google/api/client.proto";
import "google/api/field_behavior.proto";
import "google/api/resource.proto";
import "google/cloud/dialogflow/v2/audio_config.proto";
import "google/cloud/dialogflow/v2/context.proto";
import "google/cloud/dialogflow/v2/intent.proto";
import "google/cloud/dialogflow/v2/session_entity_type.proto";
import "google/protobuf/duration.proto";
import "google/protobuf/field_mask.proto";
import "google/protobuf/struct.proto";
import "google/rpc/status.proto";
import "google/type/latlng.proto";
option cc_enable_arenas = true;
option csharp_namespace = "Google.Cloud.Dialogflow.V2";
option go_package = "cloud.google.com/go/dialogflow/apiv2/dialogflowpb;dialogflowpb";
option java_multiple_files = true;
option java_outer_classname = "SessionProto";
option java_package = "com.google.cloud.dialogflow.v2";
option objc_class_prefix = "DF";
option (google.api.resource_definition) = {
type: "dialogflow.googleapis.com/Session"
pattern: "projects/{project}/agent/sessions/{session}"
pattern: "projects/{project}/agent/environments/{environment}/users/{user}/sessions/{session}"
pattern: "projects/{project}/locations/{location}/agent/sessions/{session}"
pattern: "projects/{project}/locations/{location}/agent/environments/{environment}/users/{user}/sessions/{session}"
};
// A service used for session interactions.
//
// For more information, see the [API interactions
// guide](https://cloud.google.com/dialogflow/docs/api-overview).
service Sessions {
option (google.api.default_host) = "dialogflow.googleapis.com";
option (google.api.oauth_scopes) =
"https://www.googleapis.com/auth/cloud-platform,"
"https://www.googleapis.com/auth/dialogflow";
// Processes a natural language query and returns structured, actionable data
// as a result. This method is not idempotent, because it may cause contexts
// and session entity types to be updated, which in turn might affect
// results of future queries.
//
// If you might use
// [Agent Assist](https://cloud.google.com/dialogflow/docs/#aa)
// or other CCAI products now or in the future, consider using
// [AnalyzeContent][google.cloud.dialogflow.v2.Participants.AnalyzeContent]
// instead of `DetectIntent`. `AnalyzeContent` has additional
// functionality for Agent Assist and other CCAI products.
//
// Note: Always use agent versions for production traffic.
// See [Versions and
// environments](https://cloud.google.com/dialogflow/es/docs/agents-versions).
rpc DetectIntent(DetectIntentRequest) returns (DetectIntentResponse) {
option (google.api.http) = {
post: "/v2/{session=projects/*/agent/sessions/*}:detectIntent"
body: "*"
additional_bindings {
post: "/v2/{session=projects/*/agent/environments/*/users/*/sessions/*}:detectIntent"
body: "*"
}
additional_bindings {
post: "/v2/{session=projects/*/locations/*/agent/sessions/*}:detectIntent"
body: "*"
}
additional_bindings {
post: "/v2/{session=projects/*/locations/*/agent/environments/*/users/*/sessions/*}:detectIntent"
body: "*"
}
};
option (google.api.method_signature) = "session,query_input";
}
// Processes a natural language query in audio format in a streaming fashion
// and returns structured, actionable data as a result. This method is only
// available via the gRPC API (not REST).
//
// If you might use
// [Agent Assist](https://cloud.google.com/dialogflow/docs/#aa)
// or other CCAI products now or in the future, consider using
// [StreamingAnalyzeContent][google.cloud.dialogflow.v2.Participants.StreamingAnalyzeContent]
// instead of `StreamingDetectIntent`. `StreamingAnalyzeContent` has
// additional functionality for Agent Assist and other CCAI products.
//
// Note: Always use agent versions for production traffic.
// See [Versions and
// environments](https://cloud.google.com/dialogflow/es/docs/agents-versions).
rpc StreamingDetectIntent(stream StreamingDetectIntentRequest)
returns (stream StreamingDetectIntentResponse) {}
}
// The request to detect user's intent.
message DetectIntentRequest {
// Required. The name of the session this query is sent to. Format:
// `projects/<Project ID>/agent/sessions/<Session ID>`, or
// `projects/<Project ID>/agent/environments/<Environment ID>/users/<User
// ID>/sessions/<Session ID>`. If `Environment ID` is not specified, we assume
// default 'draft' environment (`Environment ID` might be referred to as
// environment name at some places). If `User ID` is not specified, we are
// using "-". It's up to the API caller to choose an appropriate `Session ID`
// and `User Id`. They can be a random number or some type of user and session
// identifiers (preferably hashed). The length of the `Session ID` and
// `User ID` must not exceed 36 characters.
//
// For more information, see the [API interactions
// guide](https://cloud.google.com/dialogflow/docs/api-overview).
//
// Note: Always use agent versions for production traffic.
// See [Versions and
// environments](https://cloud.google.com/dialogflow/es/docs/agents-versions).
string session = 1 [
(google.api.field_behavior) = REQUIRED,
(google.api.resource_reference) = {
type: "dialogflow.googleapis.com/Session"
}
];
// The parameters of this query.
QueryParameters query_params = 2;
// Required. The input specification. It can be set to:
//
// 1. an audio config which instructs the speech recognizer how to process
// the speech audio,
//
// 2. a conversational query in the form of text, or
//
// 3. an event that specifies which intent to trigger.
QueryInput query_input = 3 [(google.api.field_behavior) = REQUIRED];
// Instructs the speech synthesizer how to generate the output
// audio. If this field is not set and agent-level speech synthesizer is not
// configured, no output audio is generated.
OutputAudioConfig output_audio_config = 4;
// Mask for
// [output_audio_config][google.cloud.dialogflow.v2.DetectIntentRequest.output_audio_config]
// indicating which settings in this request-level config should override
// speech synthesizer settings defined at agent-level.
//
// If unspecified or empty,
// [output_audio_config][google.cloud.dialogflow.v2.DetectIntentRequest.output_audio_config]
// replaces the agent-level config in its entirety.
google.protobuf.FieldMask output_audio_config_mask = 7;
// The natural language speech audio to be processed. This field
// should be populated iff `query_input` is set to an input audio config.
// A single request can contain up to 1 minute of speech audio data.
bytes input_audio = 5;
}
// The message returned from the DetectIntent method.
message DetectIntentResponse {
// The unique identifier of the response. It can be used to
// locate a response in the training example set or for reporting issues.
string response_id = 1;
// The selected results of the conversational query or event processing.
// See `alternative_query_results` for additional potential results.
QueryResult query_result = 2;
// Specifies the status of the webhook request.
google.rpc.Status webhook_status = 3;
// The audio data bytes encoded as specified in the request.
// Note: The output audio is generated based on the values of default platform
// text responses found in the `query_result.fulfillment_messages` field. If
// multiple default text responses exist, they will be concatenated when
// generating audio. If no default platform text responses exist, the
// generated audio content will be empty.
//
// In some scenarios, multiple output audio fields may be present in the
// response structure. In these cases, only the top-most-level audio output
// has content.
bytes output_audio = 4;
// The config used by the speech synthesizer to generate the output audio.
OutputAudioConfig output_audio_config = 6;
}
// Represents the parameters of the conversational query.
message QueryParameters {
// The time zone of this conversational query from the
// [time zone database](https://www.iana.org/time-zones), e.g.,
// America/New_York, Europe/Paris. If not provided, the time zone specified in
// agent settings is used.
string time_zone = 1;
// The geo location of this conversational query.
google.type.LatLng geo_location = 2;
// The collection of contexts to be activated before this query is
// executed.
repeated Context contexts = 3;
// Specifies whether to delete all contexts in the current session
// before the new ones are activated.
bool reset_contexts = 4;
// Additional session entity types to replace or extend developer
// entity types with. The entity synonyms apply to all languages and persist
// for the session of this query.
repeated SessionEntityType session_entity_types = 5;
// This field can be used to pass custom data to your webhook.
// Arbitrary JSON objects are supported.
// If supplied, the value is used to populate the
// `WebhookRequest.original_detect_intent_request.payload`
// field sent to your webhook.
google.protobuf.Struct payload = 6;
// Configures the type of sentiment analysis to perform. If not
// provided, sentiment analysis is not performed.
SentimentAnalysisRequestConfig sentiment_analysis_request_config = 10;
// This field can be used to pass HTTP headers for a webhook
// call. These headers will be sent to webhook along with the headers that
// have been configured through the Dialogflow web console. The headers
// defined within this field will overwrite the headers configured through the
// Dialogflow console if there is a conflict. Header names are
// case-insensitive. Google's specified headers are not allowed. Including:
// "Host", "Content-Length", "Connection", "From", "User-Agent",
// "Accept-Encoding", "If-Modified-Since", "If-None-Match", "X-Forwarded-For",
// etc.
map<string, string> webhook_headers = 14;
// The platform of the virtual agent response messages.
//
// If not empty, only emits messages from this platform in the response.
// Valid values are the enum names of
// [platform][google.cloud.dialogflow.v2.Intent.Message.platform].
string platform = 18;
}
// Represents the query input. It can contain either:
//
// 1. An audio config which instructs the speech recognizer how to process the
// speech audio.
//
// 2. A conversational query in the form of text.
//
// 3. An event that specifies which intent to trigger.
message QueryInput {
// Required. The input specification.
oneof input {
// Instructs the speech recognizer how to process the speech audio.
InputAudioConfig audio_config = 1;
// The natural language text to be processed. Text length must not exceed
// 256 character for virtual agent interactions.
TextInput text = 2;
// The event to be processed.
EventInput event = 3;
}
}
// Represents the result of conversational query or event processing.
message QueryResult {
// The original conversational query text:
//
// - If natural language text was provided as input, `query_text` contains
// a copy of the input.
// - If natural language speech audio was provided as input, `query_text`
// contains the speech recognition result. If speech recognizer produced
// multiple alternatives, a particular one is picked.
// - If automatic spell correction is enabled, `query_text` will contain the
// corrected user input.
string query_text = 1;
// The language that was triggered during intent detection.
// See [Language
// Support](https://cloud.google.com/dialogflow/docs/reference/language)
// for a list of the currently supported language codes.
string language_code = 15;
// The Speech recognition confidence between 0.0 and 1.0. A higher number
// indicates an estimated greater likelihood that the recognized words are
// correct. The default of 0.0 is a sentinel value indicating that confidence
// was not set.
//
// This field is not guaranteed to be accurate or set. In particular this
// field isn't set for StreamingDetectIntent since the streaming endpoint has
// separate confidence estimates per portion of the audio in
// StreamingRecognitionResult.
float speech_recognition_confidence = 2;
// The action name from the matched intent.
string action = 3;
// The collection of extracted parameters.
//
// Depending on your protocol or client library language, this is a
// map, associative array, symbol table, dictionary, or JSON object
// composed of a collection of (MapKey, MapValue) pairs:
//
// * MapKey type: string
// * MapKey value: parameter name
// * MapValue type: If parameter's entity type is a composite entity then use
// map, otherwise, depending on the parameter value type, it could be one of
// string, number, boolean, null, list or map.
// * MapValue value: If parameter's entity type is a composite entity then use
// map from composite entity property names to property values, otherwise,
// use parameter value.
google.protobuf.Struct parameters = 4;
// This field is set to:
//
// - `false` if the matched intent has required parameters and not all of
// the required parameter values have been collected.
// - `true` if all required parameter values have been collected, or if the
// matched intent doesn't contain any required parameters.
bool all_required_params_present = 5;
// Indicates whether the conversational query triggers a cancellation for slot
// filling. For more information, see the [cancel slot filling
// documentation](https://cloud.google.com/dialogflow/es/docs/intents-actions-parameters#cancel).
bool cancels_slot_filling = 21;
// The text to be pronounced to the user or shown on the screen.
// Note: This is a legacy field, `fulfillment_messages` should be preferred.
string fulfillment_text = 6;
// The collection of rich messages to present to the user.
repeated Intent.Message fulfillment_messages = 7;
// If the query was fulfilled by a webhook call, this field is set to the
// value of the `source` field returned in the webhook response.
string webhook_source = 8;
// If the query was fulfilled by a webhook call, this field is set to the
// value of the `payload` field returned in the webhook response.
google.protobuf.Struct webhook_payload = 9;
// The collection of output contexts. If applicable,
// `output_contexts.parameters` contains entries with name
// `<parameter name>.original` containing the original parameter values
// before the query.
repeated Context output_contexts = 10;
// The intent that matched the conversational query. Some, not
// all fields are filled in this message, including but not limited to:
// `name`, `display_name`, `end_interaction` and `is_fallback`.
Intent intent = 11;
// The intent detection confidence. Values range from 0.0
// (completely uncertain) to 1.0 (completely certain).
// This value is for informational purpose only and is only used to
// help match the best intent within the classification threshold.
// This value may change for the same end-user expression at any time due to a
// model retraining or change in implementation.
// If there are `multiple knowledge_answers` messages, this value is set to
// the greatest `knowledgeAnswers.match_confidence` value in the list.
float intent_detection_confidence = 12;
// Free-form diagnostic information for the associated detect intent request.
// The fields of this data can change without notice, so you should not write
// code that depends on its structure.
// The data may contain:
//
// - webhook call latency
// - webhook errors
google.protobuf.Struct diagnostic_info = 14;
// The sentiment analysis result, which depends on the
// `sentiment_analysis_request_config` specified in the request.
SentimentAnalysisResult sentiment_analysis_result = 17;
}
// The top-level message sent by the client to the
// [Sessions.StreamingDetectIntent][google.cloud.dialogflow.v2.Sessions.StreamingDetectIntent]
// method.
//
// Multiple request messages should be sent in order:
//
// 1. The first message must contain
// [session][google.cloud.dialogflow.v2.StreamingDetectIntentRequest.session],
// [query_input][google.cloud.dialogflow.v2.StreamingDetectIntentRequest.query_input]
// plus optionally
// [query_params][google.cloud.dialogflow.v2.StreamingDetectIntentRequest.query_params].
// If the client wants to receive an audio response, it should also contain
// [output_audio_config][google.cloud.dialogflow.v2.StreamingDetectIntentRequest.output_audio_config].
// The message must not contain
// [input_audio][google.cloud.dialogflow.v2.StreamingDetectIntentRequest.input_audio].
// 2. If
// [query_input][google.cloud.dialogflow.v2.StreamingDetectIntentRequest.query_input]
// was set to
// [query_input.audio_config][google.cloud.dialogflow.v2.InputAudioConfig],
// all subsequent messages must contain
// [input_audio][google.cloud.dialogflow.v2.StreamingDetectIntentRequest.input_audio]
// to continue with Speech recognition. If you decide to rather detect an
// intent from text input after you already started Speech recognition,
// please send a message with
// [query_input.text][google.cloud.dialogflow.v2.QueryInput.text].
//
// However, note that:
//
// * Dialogflow will bill you for the audio duration so far.
// * Dialogflow discards all Speech recognition results in favor of the
// input text.
// * Dialogflow will use the language code from the first message.
//
// After you sent all input, you must half-close or abort the request stream.
message StreamingDetectIntentRequest {
// Required. The name of the session the query is sent to.
// Format of the session name:
// `projects/<Project ID>/agent/sessions/<Session ID>`, or
// `projects/<Project ID>/agent/environments/<Environment ID>/users/<User
// ID>/sessions/<Session ID>`. If `Environment ID` is not specified, we assume
// default 'draft' environment. If `User ID` is not specified, we are using
// "-". It's up to the API caller to choose an appropriate `Session ID` and
// `User Id`. They can be a random number or some type of user and session
// identifiers (preferably hashed). The length of the `Session ID` and
// `User ID` must not exceed 36 characters.
//
// For more information, see the [API interactions
// guide](https://cloud.google.com/dialogflow/docs/api-overview).
//
// Note: Always use agent versions for production traffic.
// See [Versions and
// environments](https://cloud.google.com/dialogflow/es/docs/agents-versions).
string session = 1 [
(google.api.field_behavior) = REQUIRED,
(google.api.resource_reference) = {
type: "dialogflow.googleapis.com/Session"
}
];
// The parameters of this query.
QueryParameters query_params = 2;
// Required. The input specification. It can be set to:
//
// 1. an audio config which instructs the speech recognizer how to process
// the speech audio,
//
// 2. a conversational query in the form of text, or
//
// 3. an event that specifies which intent to trigger.
QueryInput query_input = 3 [(google.api.field_behavior) = REQUIRED];
// Please use
// [InputAudioConfig.single_utterance][google.cloud.dialogflow.v2.InputAudioConfig.single_utterance]
// instead. If `false` (default), recognition does not cease until the client
// closes the stream. If `true`, the recognizer will detect a single spoken
// utterance in input audio. Recognition ceases when it detects the audio's
// voice has stopped or paused. In this case, once a detected intent is
// received, the client should close the stream and start a new request with a
// new stream as needed. This setting is ignored when `query_input` is a piece
// of text or an event.
bool single_utterance = 4 [deprecated = true];
// Instructs the speech synthesizer how to generate the output
// audio. If this field is not set and agent-level speech synthesizer is not
// configured, no output audio is generated.
OutputAudioConfig output_audio_config = 5;
// Mask for
// [output_audio_config][google.cloud.dialogflow.v2.StreamingDetectIntentRequest.output_audio_config]
// indicating which settings in this request-level config should override
// speech synthesizer settings defined at agent-level.
//
// If unspecified or empty,
// [output_audio_config][google.cloud.dialogflow.v2.StreamingDetectIntentRequest.output_audio_config]
// replaces the agent-level config in its entirety.
google.protobuf.FieldMask output_audio_config_mask = 7;
// The input audio content to be recognized. Must be sent if
// `query_input` was set to a streaming input audio config. The complete audio
// over all streaming messages must not exceed 1 minute.
bytes input_audio = 6;
// if true, `StreamingDetectIntentResponse.debugging_info` will get populated.
bool enable_debugging_info = 8;
}
// Cloud conversation info for easier debugging.
// It will get populated in `StreamingDetectIntentResponse` or
// `StreamingAnalyzeContentResponse` when the flag `enable_debugging_info` is
// set to true in corresponding requests.
message CloudConversationDebuggingInfo {
// Number of input audio data chunks in streaming requests.
int32 audio_data_chunks = 1;
// Time offset of the end of speech utterance relative to the
// beginning of the first audio chunk.
google.protobuf.Duration result_end_time_offset = 2;
// Duration of first audio chunk.
google.protobuf.Duration first_audio_duration = 3;
// Whether client used single utterance mode.
bool single_utterance = 5;
// Time offsets of the speech partial results relative to the beginning of
// the stream.
repeated google.protobuf.Duration speech_partial_results_end_times = 6;
// Time offsets of the speech final results (is_final=true) relative to the
// beginning of the stream.
repeated google.protobuf.Duration speech_final_results_end_times = 7;
// Total number of partial responses.
int32 partial_responses = 8;
// Time offset of Speaker ID stream close time relative to the Speech stream
// close time in milliseconds. Only meaningful for conversations involving
// passive verification.
int32 speaker_id_passive_latency_ms_offset = 9;
// Whether a barge-in event is triggered in this request.
bool bargein_event_triggered = 10;
// Whether speech uses single utterance mode.
bool speech_single_utterance = 11;
// Time offsets of the DTMF partial results relative to the beginning of
// the stream.
repeated google.protobuf.Duration dtmf_partial_results_times = 12;
// Time offsets of the DTMF final results relative to the beginning of
// the stream.
repeated google.protobuf.Duration dtmf_final_results_times = 13;
// Time offset of the end-of-single-utterance signal relative to the
// beginning of the stream.
google.protobuf.Duration single_utterance_end_time_offset = 14;
// No speech timeout settings observed at runtime.
google.protobuf.Duration no_speech_timeout = 15;
// Whether the streaming terminates with an injected text query.
bool is_input_text = 16;
// Client half close time in terms of input audio duration.
google.protobuf.Duration client_half_close_time_offset = 17;
// Client half close time in terms of API streaming duration.
google.protobuf.Duration client_half_close_streaming_time_offset = 18;
}
// The top-level message returned from the
// `StreamingDetectIntent` method.
//
// Multiple response messages can be returned in order:
//
// 1. If the `StreamingDetectIntentRequest.input_audio` field was
// set, the `recognition_result` field is populated for one
// or more messages.
// See the
// [StreamingRecognitionResult][google.cloud.dialogflow.v2.StreamingRecognitionResult]
// message for details about the result message sequence.
//
// 2. The next message contains `response_id`, `query_result`
// and optionally `webhook_status` if a WebHook was called.
message StreamingDetectIntentResponse {
// The unique identifier of the response. It can be used to
// locate a response in the training example set or for reporting issues.
string response_id = 1;
// The result of speech recognition.
StreamingRecognitionResult recognition_result = 2;
// The result of the conversational query or event processing.
QueryResult query_result = 3;
// Specifies the status of the webhook request.
google.rpc.Status webhook_status = 4;
// The audio data bytes encoded as specified in the request.
// Note: The output audio is generated based on the values of default platform
// text responses found in the `query_result.fulfillment_messages` field. If
// multiple default text responses exist, they will be concatenated when
// generating audio. If no default platform text responses exist, the
// generated audio content will be empty.
//
// In some scenarios, multiple output audio fields may be present in the
// response structure. In these cases, only the top-most-level audio output
// has content.
bytes output_audio = 5;
// The config used by the speech synthesizer to generate the output audio.
OutputAudioConfig output_audio_config = 6;
// Debugging info that would get populated when
// `StreamingDetectIntentRequest.enable_debugging_info` is set to true.
CloudConversationDebuggingInfo debugging_info = 8;
}
// Contains a speech recognition result corresponding to a portion of the audio
// that is currently being processed or an indication that this is the end
// of the single requested utterance.
//
// While end-user audio is being processed, Dialogflow sends a series of
// results. Each result may contain a `transcript` value. A transcript
// represents a portion of the utterance. While the recognizer is processing
// audio, transcript values may be interim values or finalized values.
// Once a transcript is finalized, the `is_final` value is set to true and
// processing continues for the next transcript.
//
// If `StreamingDetectIntentRequest.query_input.audio_config.single_utterance`
// was true, and the recognizer has completed processing audio,
// the `message_type` value is set to `END_OF_SINGLE_UTTERANCE and the
// following (last) result contains the last finalized transcript.
//
// The complete end-user utterance is determined by concatenating the
// finalized transcript values received for the series of results.
//
// In the following example, single utterance is enabled. In the case where
// single utterance is not enabled, result 7 would not occur.
//
// ```
// Num | transcript | message_type | is_final
// --- | ----------------------- | ----------------------- | --------
// 1 | "tube" | TRANSCRIPT | false
// 2 | "to be a" | TRANSCRIPT | false
// 3 | "to be" | TRANSCRIPT | false
// 4 | "to be or not to be" | TRANSCRIPT | true
// 5 | "that's" | TRANSCRIPT | false
// 6 | "that is | TRANSCRIPT | false
// 7 | unset | END_OF_SINGLE_UTTERANCE | unset
// 8 | " that is the question" | TRANSCRIPT | true
// ```
//
// Concatenating the finalized transcripts with `is_final` set to true,
// the complete utterance becomes "to be or not to be that is the question".
message StreamingRecognitionResult {
// Type of the response message.
enum MessageType {
// Not specified. Should never be used.
MESSAGE_TYPE_UNSPECIFIED = 0;
// Message contains a (possibly partial) transcript.
TRANSCRIPT = 1;
// Event indicates that the server has detected the end of the user's speech
// utterance and expects no additional inputs.
// Therefore, the server will not process additional audio (although it may
// subsequently return additional results). The client should stop sending
// additional audio data, half-close the gRPC connection, and wait for any
// additional results until the server closes the gRPC connection. This
// message is only sent if `single_utterance` was set to `true`, and is not
// used otherwise.
END_OF_SINGLE_UTTERANCE = 2;
}
// Type of the result message.
MessageType message_type = 1;
// Transcript text representing the words that the user spoke.
// Populated if and only if `message_type` = `TRANSCRIPT`.
string transcript = 2;
// If `false`, the `StreamingRecognitionResult` represents an
// interim result that may change. If `true`, the recognizer will not return
// any further hypotheses about this piece of the audio. May only be populated
// for `message_type` = `TRANSCRIPT`.
bool is_final = 3;
// The Speech confidence between 0.0 and 1.0 for the current portion of audio.
// A higher number indicates an estimated greater likelihood that the
// recognized words are correct. The default of 0.0 is a sentinel value
// indicating that confidence was not set.
//
// This field is typically only provided if `is_final` is true and you should
// not rely on it being accurate or even set.
float confidence = 4;
// Word-specific information for the words recognized by Speech in
// [transcript][google.cloud.dialogflow.v2.StreamingRecognitionResult.transcript].
// Populated if and only if `message_type` = `TRANSCRIPT` and
// [InputAudioConfig.enable_word_info] is set.
repeated SpeechWordInfo speech_word_info = 7;
// Time offset of the end of this Speech recognition result relative to the
// beginning of the audio. Only populated for `message_type` = `TRANSCRIPT`.
google.protobuf.Duration speech_end_offset = 8;
// Detected language code for the transcript.
string language_code = 10;
}
// Auxiliary proto messages.
//
// Represents the natural language text to be processed.
message TextInput {
// Required. The UTF-8 encoded natural language text to be processed.
// Text length must not exceed 256 characters for virtual agent interactions.
string text = 1 [(google.api.field_behavior) = REQUIRED];
// Required. The language of this conversational query. See [Language
// Support](https://cloud.google.com/dialogflow/docs/reference/language)
// for a list of the currently supported language codes. Note that queries in
// the same session do not necessarily need to specify the same language.
string language_code = 2 [(google.api.field_behavior) = REQUIRED];
}
// Events allow for matching intents by event name instead of the natural
// language input. For instance, input `<event: { name: "welcome_event",
// parameters: { name: "Sam" } }>` can trigger a personalized welcome response.
// The parameter `name` may be used by the agent in the response:
// `"Hello #welcome_event.name! What can I do for you today?"`.
message EventInput {
// Required. The unique identifier of the event.
string name = 1 [(google.api.field_behavior) = REQUIRED];
// The collection of parameters associated with the event.
//
// Depending on your protocol or client library language, this is a
// map, associative array, symbol table, dictionary, or JSON object
// composed of a collection of (MapKey, MapValue) pairs:
//
// * MapKey type: string
// * MapKey value: parameter name
// * MapValue type: If parameter's entity type is a composite entity then use
// map, otherwise, depending on the parameter value type, it could be one of
// string, number, boolean, null, list or map.
// * MapValue value: If parameter's entity type is a composite entity then use
// map from composite entity property names to property values, otherwise,
// use parameter value.
google.protobuf.Struct parameters = 2;
// Required. The language of this query. See [Language
// Support](https://cloud.google.com/dialogflow/docs/reference/language)
// for a list of the currently supported language codes. Note that queries in
// the same session do not necessarily need to specify the same language.
//
// This field is ignored when used in the context of a
// [WebhookResponse.followup_event_input][google.cloud.dialogflow.v2.WebhookResponse.followup_event_input]
// field, because the language was already defined in the originating detect
// intent request.
string language_code = 3 [(google.api.field_behavior) = REQUIRED];
}
// Configures the types of sentiment analysis to perform.
message SentimentAnalysisRequestConfig {
// Instructs the service to perform sentiment analysis on
// `query_text`. If not provided, sentiment analysis is not performed on
// `query_text`.
bool analyze_query_text_sentiment = 1;
}
// The result of sentiment analysis. Sentiment analysis inspects user input
// and identifies the prevailing subjective opinion, especially to determine a
// user's attitude as positive, negative, or neutral.
// For [Participants.DetectIntent][], it needs to be configured in
// [DetectIntentRequest.query_params][google.cloud.dialogflow.v2.DetectIntentRequest.query_params].
// For [Participants.StreamingDetectIntent][], it needs to be configured in
// [StreamingDetectIntentRequest.query_params][google.cloud.dialogflow.v2.StreamingDetectIntentRequest.query_params].
// And for
// [Participants.AnalyzeContent][google.cloud.dialogflow.v2.Participants.AnalyzeContent]
// and
// [Participants.StreamingAnalyzeContent][google.cloud.dialogflow.v2.Participants.StreamingAnalyzeContent],
// it needs to be configured in
// [ConversationProfile.human_agent_assistant_config][google.cloud.dialogflow.v2.ConversationProfile.human_agent_assistant_config]
message SentimentAnalysisResult {
// The sentiment analysis result for `query_text`.
Sentiment query_text_sentiment = 1;
}
// The sentiment, such as positive/negative feeling or association, for a unit
// of analysis, such as the query text. See:
// https://cloud.google.com/natural-language/docs/basics#interpreting_sentiment_analysis_values
// for how to interpret the result.
message Sentiment {
// Sentiment score between -1.0 (negative sentiment) and 1.0 (positive
// sentiment).
float score = 1;
// A non-negative number in the [0, +inf) range, which represents the absolute
// magnitude of sentiment, regardless of score (positive or negative).
float magnitude = 2;
}