/
resolver_expr.cc
8743 lines (8031 loc) · 385 KB
/
resolver_expr.cc
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
//
// Copyright 2019 Google LLC
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//
// This file contains the implementation of expression-related resolver methods
// from resolver.h.
#include <stddef.h>
#include <algorithm>
#include <cctype>
#include <cstdint>
#include <cstring>
#include <deque>
#include <functional>
#include <limits>
#include <map>
#include <memory>
#include <optional>
#include <set>
#include <stack>
#include <string>
#include <unordered_map>
#include <utility>
#include <vector>
#include "zetasql/base/logging.h"
#include "google/protobuf/descriptor.pb.h"
#include "google/protobuf/descriptor.h"
#include "zetasql/analyzer/analytic_function_resolver.h"
#include "zetasql/analyzer/column_cycle_detector.h"
#include "zetasql/analyzer/expr_matching_helpers.h"
#include "zetasql/analyzer/expr_resolver_helper.h"
#include "zetasql/analyzer/filter_fields_path_validator.h"
#include "zetasql/analyzer/function_resolver.h"
#include "zetasql/analyzer/input_argument_type_resolver_helper.h"
#include "zetasql/analyzer/lambda_util.h"
#include "zetasql/analyzer/name_scope.h"
#include "zetasql/analyzer/query_resolver_helper.h"
#include "zetasql/analyzer/resolver.h"
#include "zetasql/analyzer/resolver_common_inl.h"
#include "zetasql/common/errors.h"
#include "zetasql/common/internal_analyzer_options.h"
#include "zetasql/parser/ast_node.h"
#include "zetasql/parser/ast_node_kind.h"
#include "zetasql/parser/parse_tree.h"
#include "zetasql/parser/parse_tree_errors.h"
#include "zetasql/public/analyzer_options.h"
#include "zetasql/public/analyzer_output_properties.h"
#include "zetasql/public/annotation/collation.h"
#include "zetasql/public/builtin_function.pb.h"
#include "zetasql/public/cast.h"
#include "zetasql/public/catalog.h"
#include "zetasql/public/catalog_helper.h"
#include "zetasql/public/civil_time.h"
#include "zetasql/public/coercer.h"
#include "zetasql/public/constant.h"
#include "zetasql/public/cycle_detector.h"
#include "zetasql/public/deprecation_warning.pb.h"
#include "zetasql/public/function.h"
#include "zetasql/public/function.pb.h"
#include "zetasql/public/functions/convert_string.h"
#include "zetasql/public/functions/date_time_util.h"
#include "zetasql/public/functions/datetime.pb.h"
#include "zetasql/public/functions/normalize_mode.pb.h"
#include "zetasql/public/functions/range.h"
#include "zetasql/public/id_string.h"
#include "zetasql/public/input_argument_type.h"
#include "zetasql/public/interval_value.h"
#include "zetasql/public/json_value.h"
#include "zetasql/public/language_options.h"
#include "zetasql/public/numeric_value.h"
#include "zetasql/public/options.pb.h"
#include "zetasql/public/parse_location.h"
#include "zetasql/public/proto/type_annotation.pb.h"
#include "zetasql/public/proto_util.h"
#include "zetasql/public/select_with_mode.h"
#include "zetasql/public/signature_match_result.h"
#include "zetasql/public/simple_catalog.h"
#include "zetasql/public/sql_function.h"
#include "zetasql/public/strings.h"
#include "zetasql/public/templated_sql_function.h"
#include "zetasql/public/type.h"
#include "zetasql/public/type.pb.h"
#include "zetasql/public/types/annotation.h"
#include "zetasql/public/types/array_type.h"
#include "zetasql/public/types/enum_type.h"
#include "zetasql/public/types/proto_type.h"
#include "zetasql/public/types/struct_type.h"
#include "zetasql/public/types/type_factory.h"
#include "zetasql/public/types/type_parameters.h"
#include "zetasql/public/value.h"
#include "zetasql/resolved_ast/resolved_ast.h"
#include "zetasql/resolved_ast/resolved_ast_builder.h"
#include "zetasql/resolved_ast/resolved_column.h"
#include "zetasql/resolved_ast/resolved_node.h"
#include "zetasql/resolved_ast/resolved_node_kind.pb.h"
#include "zetasql/base/case.h"
#include "zetasql/base/string_numbers.h"
#include "absl/cleanup/cleanup.h"
#include "absl/container/flat_hash_map.h"
#include "absl/container/flat_hash_set.h"
#include "absl/flags/flag.h"
#include "zetasql/base/check.h"
#include "absl/status/status.h"
#include "absl/status/statusor.h"
#include "absl/strings/ascii.h"
#include "absl/strings/match.h"
#include "absl/strings/str_cat.h"
#include "absl/strings/str_format.h"
#include "absl/strings/str_join.h"
#include "absl/strings/str_replace.h"
#include "absl/strings/str_split.h"
#include "absl/strings/string_view.h"
#include "absl/strings/strip.h"
#include "absl/strings/substitute.h"
#include "absl/time/time.h"
#include "absl/types/optional.h"
#include "absl/types/span.h"
#include "absl/types/variant.h"
#include "zetasql/base/general_trie.h"
#include "zetasql/base/map_util.h"
#include "zetasql/base/ret_check.h"
#include "zetasql/base/status.h"
#include "zetasql/base/status_macros.h"
// We are making a change to behavior that is already potentially live --
// the behavior of bare array access on proto maps. We think it's unlikely
// that anyone is using this, so we're changing the behavior instantly. This
// flag will be removed in 2023Q2 assuming we hear no reports of it needing
// to be used.
ABSL_FLAG(bool, zetasql_suppress_proto_map_bare_array_subscript_error, false,
"If true, the error for accessing a protocol buffer map using a bare "
"array subscript operation is suppressed.");
namespace zetasql {
// These are constant identifiers used mostly for generated column or table
// names. We use a single IdString for each so we never have to allocate
// or copy these strings again.
STATIC_IDSTRING(kAggregateId, "$aggregate");
STATIC_IDSTRING(kGroupingId, "$grouping_call");
STATIC_IDSTRING(kExprSubqueryId, "$expr_subquery");
STATIC_IDSTRING(kOrderById, "$orderby");
STATIC_IDSTRING(kInSubqueryCastId, "$in_subquery_cast");
STATIC_IDSTRING(kKey, "KEY");
STATIC_IDSTRING(kOffset, "OFFSET");
STATIC_IDSTRING(kOrdinal, "ORDINAL");
STATIC_IDSTRING(kSafeKey, "SAFE_KEY");
STATIC_IDSTRING(kSafeOffset, "SAFE_OFFSET");
STATIC_IDSTRING(kSafeOrdinal, "SAFE_ORDINAL");
namespace {
// Verifies that 'field_descriptor' is an extension corresponding to the same
// message as descriptor, and then returns 'field_descriptor'.
absl::StatusOr<const google::protobuf::FieldDescriptor*> VerifyFieldExtendsMessage(
const ASTNode* ast_node, const google::protobuf::FieldDescriptor* field_descriptor,
const google::protobuf::Descriptor* descriptor) {
const google::protobuf::Descriptor* containing_type_descriptor =
field_descriptor->containing_type();
// Verify by full_name rather than by pointer equality to allow for extensions
// that come from different DescriptorPools. This is tested in the
// ExternalExtension test in analyzer_test.cc.
if (descriptor->full_name() != containing_type_descriptor->full_name()) {
return MakeSqlErrorAt(ast_node)
<< "Proto extension " << field_descriptor->full_name()
<< " extends message " << containing_type_descriptor->full_name()
<< " so cannot be used on an expression with message type "
<< descriptor->full_name();
}
return field_descriptor;
}
// If type is an array, returns the array type element. Otherwise returns type.
const Type* ArrayElementTypeOrType(const Type* type) {
if (type->IsArray()) {
return type->AsArray()->element_type();
}
return type;
}
// Adds 'expr' to the get_field_list for the passed in flatten node.
// Updates the flatten result type accordingly.
absl::Status AddGetFieldToFlatten(std::unique_ptr<const ResolvedExpr> expr,
TypeFactory* type_factory,
ResolvedFlatten* flatten) {
const Type* type = expr->type();
if (!type->IsArray()) {
ZETASQL_RETURN_IF_ERROR(type_factory->MakeArrayType(expr->type(), &type));
}
flatten->set_type(type);
flatten->add_get_field_list(std::move(expr));
return absl::OkStatus();
}
static std::string GetTypeNameForPrefixedLiteral(absl::string_view sql,
const ASTLeaf& leaf_node) {
int start_offset = leaf_node.GetParseLocationRange().start().GetByteOffset();
int end_offset = start_offset;
while (end_offset < sql.size() && std::isalpha(sql[end_offset])) {
end_offset++;
}
return absl::AsciiStrToUpper(
absl::ClippedSubstr(sql, start_offset, end_offset - start_offset));
}
inline std::unique_ptr<ResolvedCast> MakeResolvedCast(
const Type* type, std::unique_ptr<const ResolvedExpr> expr,
std::unique_ptr<const ResolvedExpr> format,
std::unique_ptr<const ResolvedExpr> time_zone,
const TypeModifiers& type_modifiers, bool return_null_on_error,
const ExtendedCompositeCastEvaluator& extended_conversion_evaluator) {
auto result = MakeResolvedCast(type, std::move(expr), return_null_on_error,
/*extended_cast=*/nullptr, std::move(format),
std::move(time_zone), type_modifiers);
if (extended_conversion_evaluator.is_valid()) {
std::vector<std::unique_ptr<const ResolvedExtendedCastElement>>
conversion_list;
for (const ConversionEvaluator& evaluator :
extended_conversion_evaluator.evaluators()) {
conversion_list.push_back(MakeResolvedExtendedCastElement(
evaluator.from_type(), evaluator.to_type(), evaluator.function()));
}
result->set_extended_cast(
MakeResolvedExtendedCast(std::move(conversion_list)));
}
return result;
}
bool IsFilterFields(absl::string_view function_sql_name) {
return zetasql_base::CaseEqual(function_sql_name, "FILTER_FIELDS");
}
absl::Span<const std::string> GetTypeCatalogNamePath(const Type* type) {
if (type->IsProto()) {
return type->AsProto()->CatalogNamePath();
}
if (type->IsEnum()) {
return type->AsEnum()->CatalogNamePath();
}
return {};
}
absl::Status MakeUnsupportedGroupingFunctionError(
const ASTFunctionCall* func_call, SelectWithMode mode) {
ABSL_DCHECK(func_call != nullptr);
std::string query_kind;
switch (mode) {
case SelectWithMode::ANONYMIZATION:
query_kind = "anonymization";
break;
case SelectWithMode::DIFFERENTIAL_PRIVACY:
query_kind = "differential privacy";
break;
case SelectWithMode::AGGREGATION_THRESHOLD:
query_kind = "aggregation threshold";
break;
case SelectWithMode::NONE:
default:
return absl::OkStatus();
}
return MakeSqlErrorAt(func_call) << absl::StrFormat(
"GROUPING function is not supported in %s queries", query_kind);
}
} // namespace
absl::Status Resolver::ResolveBuildProto(
const ASTNode* ast_type_location, const ProtoType* proto_type,
const ResolvedScan* input_scan, absl::string_view argument_description,
absl::string_view query_description,
std::vector<ResolvedBuildProtoArg>* arguments,
std::unique_ptr<const ResolvedExpr>* output) {
const google::protobuf::Descriptor* descriptor = proto_type->descriptor();
// Required fields we haven't found so far.
absl::flat_hash_set<const google::protobuf::FieldDescriptor*> missing_required_fields;
for (int i = 0; i < descriptor->field_count(); ++i) {
const google::protobuf::FieldDescriptor* field = descriptor->field(i);
if (field->is_required()) {
missing_required_fields.insert(field);
}
}
std::map<int, const google::protobuf::FieldDescriptor*> added_tag_number_to_field_map;
std::vector<std::unique_ptr<const ResolvedMakeProtoField>>
resolved_make_fields;
for (int i = 0; i < arguments->size(); ++i) {
ResolvedBuildProtoArg& argument = (*arguments)[i];
const AliasOrASTPathExpression& alias_or_ast_path_expr =
*argument.alias_or_ast_path_expr;
IdString field_alias; // Empty if we are using a path expression.
const google::protobuf::FieldDescriptor* field = argument.field_descriptor;
const Type* proto_field_type = argument.proto_field_type;
if (field == nullptr || proto_field_type == nullptr) {
ZETASQL_ASSIGN_OR_RETURN(
field,
FindFieldDescriptor(descriptor, alias_or_ast_path_expr,
argument.ast_location, i, argument_description));
ZETASQL_ASSIGN_OR_RETURN(proto_field_type,
FindProtoFieldType(field, argument.ast_location,
proto_type->CatalogNamePath()));
}
if (alias_or_ast_path_expr.kind() == AliasOrASTPathExpression::ALIAS) {
field_alias = alias_or_ast_path_expr.alias();
if (field->is_required()) {
// Note that required fields may be listed twice, so this erase can
// be a no-op. This condition will eventually trigger a duplicate
// column error below.
missing_required_fields.erase(field);
}
}
auto insert_result = added_tag_number_to_field_map.insert(
std::make_pair(field->number(), field));
if (!insert_result.second) {
// We can't set the same tag number twice, so this is an error. We make an
// effort to print simple error messages for common cases.
const google::protobuf::FieldDescriptor* other_field = insert_result.first->second;
if (other_field->full_name() == field->full_name()) {
if (!field_alias.empty()) {
// Very common case (regular field accessed twice) or a very weird
// case (regular field accessed sometime after accessing an extension
// field with the same tag number and name).
return MakeSqlErrorAt(argument.ast_location)
<< query_description << " has duplicate column name "
<< ToIdentifierLiteral(field_alias)
<< " so constructing proto field " << field->full_name()
<< " is ambiguous";
} else {
// Less common case (field accessed twice, the second time as an
// extension field).
return MakeSqlErrorAt(argument.ast_location)
<< query_description << " has duplicate extension field "
<< field->full_name();
}
} else {
// Very strange case (tag number accessed twice, somehow with different
// names).
return MakeSqlErrorAt(argument.ast_location)
<< query_description << " refers to duplicate tag number "
<< field->number() << " with proto field names "
<< other_field->full_name() << " and " << field->full_name();
}
}
std::unique_ptr<const ResolvedExpr> expr = std::move(argument.expr);
// Add coercion if necessary.
// TODO: Remove input_scan arg and convert to CoerceExprToType
// This will be a separate change because it churns test files.
// TODO: Check coercer->AssignableTo
if (const absl::Status cast_status =
function_resolver_->AddCastOrConvertLiteral(
argument.ast_location, proto_field_type, /*format=*/nullptr,
/*time_zone=*/nullptr, TypeParameters(), input_scan,
/*set_has_explicit_type=*/false,
/*return_null_on_error=*/false, &expr);
!cast_status.ok()) {
// Propagate "Out of stack space" errors.
// TODO
if (cast_status.code() == absl::StatusCode::kResourceExhausted) {
return cast_status;
}
return MakeSqlErrorAt(argument.ast_location)
<< "Could not store value with type "
<< GetInputArgumentTypeForExpr(
expr.get(),
/*pick_default_type_for_untyped_expr=*/false)
.UserFacingName(product_mode())
<< " into proto field " << field->full_name()
<< " which has SQL type "
<< proto_field_type->ShortTypeName(product_mode());
}
resolved_make_fields.emplace_back(MakeResolvedMakeProtoField(
field, ProtoType::GetFormatAnnotation(field), std::move(expr)));
}
if (!missing_required_fields.empty()) {
std::set<std::string> field_names; // Sorted list of fields.
for (const google::protobuf::FieldDescriptor* field : missing_required_fields) {
field_names.insert(field->name());
}
return MakeSqlErrorAt(ast_type_location)
<< "Cannot construct proto " << descriptor->full_name()
<< " because required field"
<< (missing_required_fields.size() > 1 ? "s" : "") << " "
<< absl::StrJoin(field_names, ",") << " "
<< (missing_required_fields.size() > 1 ? "are" : "is") << " missing";
}
auto resolved_make_proto =
MakeResolvedMakeProto(proto_type, std::move(resolved_make_fields));
MaybeRecordParseLocation(ast_type_location, resolved_make_proto.get());
*output = std::move(resolved_make_proto);
return absl::OkStatus();
}
absl::StatusOr<const google::protobuf::FieldDescriptor*> Resolver::FindFieldDescriptor(
const google::protobuf::Descriptor* descriptor,
const AliasOrASTPathExpression& alias_or_ast_path_expr,
const ASTNode* ast_location, int field_index,
absl::string_view argument_description) {
const google::protobuf::FieldDescriptor* field_descriptor = nullptr;
switch (alias_or_ast_path_expr.kind()) {
case AliasOrASTPathExpression::ALIAS: {
IdString field_alias = alias_or_ast_path_expr.alias();
ZETASQL_RET_CHECK(!field_alias.empty());
ZETASQL_RET_CHECK(!IsInternalAlias(field_alias));
field_descriptor = descriptor->FindFieldByLowercaseName(
absl::AsciiStrToLower(field_alias.ToStringView()));
break;
}
case AliasOrASTPathExpression::AST_PATH_EXPRESSION: {
const ASTPathExpression* ast_path_expr =
alias_or_ast_path_expr.ast_path_expr();
ZETASQL_ASSIGN_OR_RETURN(field_descriptor,
FindExtensionFieldDescriptor(ast_path_expr, descriptor));
break;
}
}
if (field_descriptor == nullptr) {
return MakeSqlErrorAt(ast_location)
<< argument_description << " " << (field_index + 1) << " has name "
<< ToIdentifierLiteral(alias_or_ast_path_expr.alias())
<< " which is not a field in proto " << descriptor->full_name();
}
return field_descriptor;
}
absl::StatusOr<const Type*> Resolver::FindProtoFieldType(
const google::protobuf::FieldDescriptor* field_descriptor,
const ASTNode* ast_location,
absl::Span<const std::string> catalog_name_path) {
// Although the default value is unused, we need to pass it otherwise
// default validation does not take place. Ideally this should be refactored
// so that the validation is done separately.
Value unused_default_value;
const Type* type = nullptr;
RETURN_SQL_ERROR_AT_IF_ERROR(
ast_location, GetProtoFieldTypeAndDefault(
ProtoFieldDefaultOptions::FromFieldAndLanguage(
field_descriptor, language()),
field_descriptor, catalog_name_path, type_factory_,
&type, &unused_default_value));
if (!type->IsSupportedType(language())) {
return MakeSqlErrorAt(ast_location)
<< "Proto field " << field_descriptor->full_name()
<< " has unsupported type "
<< type->TypeName(language().product_mode());
}
return type;
}
// The extension name can be written in any of these forms.
// (1) package.ProtoName.field_name
// (2) catalog.package.ProtoName.field_name
// (3) `package.ProtoName`.field_name
// (4) catalog.`package.ProtoName`.field_name
// (5) package.field_name
//
// The package and catalog names are optional, and could also be multi-part.
// The field_name is always written as the last identifier and cannot be part of
// a quoted name. (The Catalog lookup interface can only find message names, not
// extension field names.)
//
// We'll first try to resolve the full path as a fully qualified extension name
// by looking up the extension in the DescriptorPool of the relevant proto. This
// will resolve scoped extensions that are written in form (1) as well as
// top-level extensions written in form (5).
//
// If a resolution of extension with form (5) fails and weak field fallback
// lookup (FEATURE_WEAK_FIELD_FALLBACK_LOOKUP) is enabled then we attempt to
// find a weak field by field_name such that if a weak field was converted to
// extension it would have matched form (5) exactly.
//
// If we can't find the extension this way, we'll resolve the first N-1 names as
// a type name, which must come out to a ProtoType, and then look for the last
// name as an extension field name. This will find extensions written in form
// (2), (3) or (4).
//
// We look for the message name first using the DescriptorPool attached to the
// proto we are reading from. This lets some cases work where the proto does not
// exist or has an unexpected name in the catalog, like a global_proto_db
// qualified name.
absl::StatusOr<const google::protobuf::FieldDescriptor*>
Resolver::FindExtensionFieldDescriptor(const ASTPathExpression* ast_path_expr,
const google::protobuf::Descriptor* descriptor) {
// First try to find the extension in the DescriptorPool of the relevant proto
// using the fully qualified name specified in the path.
// TODO Ideally we should do this lookup using the Catalog, which will
// enable finding top-level extensions that are inside a Catalog.
const std::vector<std::string> extension_path =
ast_path_expr->ToIdentifierVector();
const std::string extension_name =
Catalog::ConvertPathToProtoName(extension_path);
const google::protobuf::DescriptorPool* descriptor_pool = descriptor->file()->pool();
if (!extension_name.empty()) {
const google::protobuf::FieldDescriptor* field_descriptor =
descriptor_pool->FindExtensionByName(extension_name);
if (field_descriptor != nullptr) {
return VerifyFieldExtendsMessage(ast_path_expr, field_descriptor,
descriptor);
}
}
// If we couldn't find the extension in the pool using the specified path, we
// try to look for the extension inside a message (and point
// 'extension_scope_descriptor' to that message). But that only works if there
// are at least two identifiers in the path (e.g., path.to.message.extension).
const google::protobuf::Descriptor* extension_scope_descriptor = nullptr;
if (extension_path.size() >= 2) {
// The type name path is the full extension path without the last element
// (which is the field name).
std::vector<std::string> type_name_path = extension_path;
type_name_path.pop_back();
ZETASQL_ASSIGN_OR_RETURN(extension_scope_descriptor,
FindMessageTypeForExtension(
ast_path_expr, type_name_path, descriptor_pool,
/*return_error_for_non_message=*/true));
if (extension_scope_descriptor == nullptr) {
// We didn't find the scope message for the extension. If the extension
// was written as just (package.ProtoName), without specifying a field,
// then we can fail with a more helpful error.
ZETASQL_ASSIGN_OR_RETURN(extension_scope_descriptor,
FindMessageTypeForExtension(
ast_path_expr, extension_path, descriptor_pool,
/*return_error_for_non_message=*/false));
if (extension_scope_descriptor != nullptr) {
return MakeSqlErrorAt(ast_path_expr)
<< "Expected extension name of the form "
<< "(MessageName.extension_field_name), but "
<< ast_path_expr->ToIdentifierPathString()
<< " is a full message name. Add the extension field name.";
}
} else {
const google::protobuf::FieldDescriptor* field_descriptor =
extension_scope_descriptor->FindExtensionByName(
ast_path_expr->last_name()->GetAsString());
if (field_descriptor != nullptr) {
return VerifyFieldExtendsMessage(ast_path_expr, field_descriptor,
descriptor);
}
}
}
// If the extension was written as a single quoted identifier containing the
// fully qualified extension name, we try to resolve the extension this way,
// and if we can, we issue a more specific error message. Otherwise we'll
// issue the generic extension not found error.
if (extension_path.size() == 1 && extension_name.empty() &&
descriptor_pool->FindExtensionByName(
ast_path_expr->last_name()->GetAsString()) != nullptr) {
return MakeSqlErrorAt(ast_path_expr)
<< "Specifying the fully qualified extension name as a quoted "
<< "identifier is disallowed: "
<< ast_path_expr->ToIdentifierPathString();
}
// We couldn't find the extension. If we found the scope message for the
// extension we can issue a more specific error mentioning the scope message.
// Otherwise we don't know if the user was looking for an extension scoped
// within a message or a top-level extension, so we'll issue a generic message
// that the extension was not found.
if (extension_scope_descriptor != nullptr) {
return MakeSqlErrorAt(ast_path_expr->last_name())
<< "Extension "
<< ToIdentifierLiteral(ast_path_expr->last_name()->GetAsIdString())
<< " not found in proto message "
<< extension_scope_descriptor->full_name();
}
return MakeSqlErrorAt(ast_path_expr)
<< "Extension " << ast_path_expr->ToIdentifierPathString()
<< " not found";
}
absl::StatusOr<const google::protobuf::Descriptor*> Resolver::FindMessageTypeForExtension(
const ASTPathExpression* ast_path_expr,
const std::vector<std::string>& type_name_path,
const google::protobuf::DescriptorPool* descriptor_pool,
bool return_error_for_non_message) {
const std::string message_name =
Catalog::ConvertPathToProtoName(type_name_path);
if (!message_name.empty()) {
const google::protobuf::Descriptor* found_descriptor =
descriptor_pool->FindMessageTypeByName(message_name);
if (found_descriptor != nullptr) {
ZETASQL_VLOG(2) << "Found message in proto's DescriptorPool: "
<< found_descriptor->DebugString();
return found_descriptor;
}
}
const Type* found_type = nullptr;
const absl::Status find_type_status = catalog_->FindType(
type_name_path, &found_type, analyzer_options_.find_options());
if (find_type_status.code() == absl::StatusCode::kNotFound) {
// We don't give an error if it wasn't found. That will happen in
// the caller so it has a chance to try generating a better error.
return nullptr;
}
ZETASQL_RETURN_IF_ERROR(find_type_status);
ZETASQL_RET_CHECK(found_type != nullptr);
if (!found_type->IsProto()) {
if (return_error_for_non_message) {
return MakeSqlErrorAt(ast_path_expr)
<< "Path "
<< ast_path_expr->ToIdentifierPathString(
/*max_prefix_size=*/type_name_path.size())
<< " resolves to type "
<< found_type->ShortTypeName(product_mode())
<< " but a PROTO type was expected for reading an extension field";
} else {
return nullptr;
}
}
return found_type->AsProto()->descriptor();
}
absl::Status Resolver::MakeResolvedDateOrTimeLiteral(
const ASTExpression* ast_expr, const TypeKind type_kind,
absl::string_view literal_string_value,
std::unique_ptr<const ResolvedExpr>* resolved_expr_out) {
std::string string_value;
if (Type::IsSimpleType(type_kind) &&
!Type::IsSupportedSimpleTypeKind(type_kind, language())) {
return MakeSqlErrorAt(ast_expr)
<< "Type not found: "
<< Type::TypeKindToString(type_kind, language().product_mode());
}
switch (type_kind) {
case TYPE_DATE: {
int32_t date;
if (functions::ConvertStringToDate(literal_string_value, &date).ok()) {
*resolved_expr_out =
MakeResolvedLiteral(ast_expr, Value::Date(date),
/*set_has_explicit_type=*/true);
return absl::OkStatus();
}
break;
}
case TYPE_TIMESTAMP: {
if (language().LanguageFeatureEnabled(FEATURE_TIMESTAMP_NANOS)) {
absl::Time timestamp;
if (functions::ConvertStringToTimestamp(
literal_string_value, default_time_zone(),
functions::kNanoseconds, /*allow_tz_in_str=*/true, ×tamp)
.ok()) {
*resolved_expr_out =
MakeResolvedLiteral(ast_expr, Value::Timestamp(timestamp),
/*set_has_explicit_type=*/true);
return absl::OkStatus();
}
} else {
int64_t timestamp;
if (functions::ConvertStringToTimestamp(
literal_string_value, default_time_zone(),
functions::kMicroseconds, ×tamp)
.ok()) {
*resolved_expr_out = MakeResolvedLiteral(
ast_expr, Value::TimestampFromUnixMicros(timestamp),
/*set_has_explicit_type=*/true);
return absl::OkStatus();
}
}
break;
}
case TYPE_TIME: {
TimeValue time;
functions::TimestampScale scale =
language().LanguageFeatureEnabled(FEATURE_TIMESTAMP_NANOS)
? functions::kNanoseconds
: functions::kMicroseconds;
if (functions::ConvertStringToTime(literal_string_value, scale, &time)
.ok() &&
time.IsValid()) {
*resolved_expr_out =
MakeResolvedLiteral(ast_expr, Value::Time(time),
/*set_has_explicit_type=*/true);
return absl::OkStatus();
}
break;
}
case TYPE_DATETIME: {
DatetimeValue datetime;
functions::TimestampScale scale =
language().LanguageFeatureEnabled(FEATURE_TIMESTAMP_NANOS)
? functions::kNanoseconds
: functions::kMicroseconds;
if (functions::ConvertStringToDatetime(literal_string_value, scale,
&datetime)
.ok() &&
datetime.IsValid()) {
*resolved_expr_out =
MakeResolvedLiteral(ast_expr, Value::Datetime(datetime),
/*set_has_explicit_type=*/true);
return absl::OkStatus();
}
break;
}
default:
break;
}
return MakeSqlErrorAt(ast_expr)
<< "Invalid " << Type::TypeKindToString(type_kind, product_mode())
<< " literal";
}
absl::Status Resolver::ResolveScalarExpr(
const ASTExpression* ast_expr, const NameScope* name_scope,
const char* clause_name,
std::unique_ptr<const ResolvedExpr>* resolved_expr_out,
const Type* inferred_type) {
ExprResolutionInfo expr_resolution_info(name_scope, clause_name);
return ResolveExpr(ast_expr, &expr_resolution_info, resolved_expr_out,
inferred_type);
}
absl::StatusOr<std::unique_ptr<const ResolvedLiteral>>
Resolver::ResolveJsonLiteral(const ASTJSONLiteral* json_literal) {
auto status_or_value = JSONValue::ParseJSONString(
json_literal->string_literal()->string_value(),
JSONParsingOptions{
.wide_number_mode =
(language().LanguageFeatureEnabled(
FEATURE_JSON_STRICT_NUMBER_PARSING)
? JSONParsingOptions::WideNumberMode::kExact
: JSONParsingOptions::WideNumberMode::kRound)});
if (!status_or_value.ok()) {
return MakeSqlErrorAt(json_literal)
<< "Invalid JSON literal: " << status_or_value.status().message();
}
return MakeResolvedLiteral(json_literal,
{types::JsonType(), /*annotation_map=*/nullptr},
Value::Json(std::move(status_or_value.value())),
/*has_explicit_type=*/true);
}
absl::StatusOr<Value> ParseRangeBoundary(
const TypeKind& type_kind, std::optional<absl::string_view> boundary_value,
const LanguageOptions& language, const absl::TimeZone default_time_zone) {
bool unbounded = !boundary_value.has_value();
switch (type_kind) {
case TYPE_DATE: {
if (unbounded) {
return Value::NullDate();
}
int32_t date;
ZETASQL_RETURN_IF_ERROR(functions::ConvertStringToDate(*boundary_value, &date));
return Value::Date(date);
}
case TYPE_DATETIME: {
if (unbounded) {
return Value::NullDatetime();
}
DatetimeValue datetime;
functions::TimestampScale scale =
language.LanguageFeatureEnabled(FEATURE_TIMESTAMP_NANOS)
? functions::kNanoseconds
: functions::kMicroseconds;
ZETASQL_RETURN_IF_ERROR(functions::ConvertStringToDatetime(*boundary_value, scale,
&datetime));
if (!datetime.IsValid()) {
return absl::InvalidArgumentError("Datetime is invalid");
}
return Value::Datetime(datetime);
}
case TYPE_TIMESTAMP: {
if (unbounded) {
return Value::NullTimestamp();
}
if (language.LanguageFeatureEnabled(FEATURE_TIMESTAMP_NANOS)) {
absl::Time timestamp;
ZETASQL_RETURN_IF_ERROR(functions::ConvertStringToTimestamp(
*boundary_value, default_time_zone, functions::kNanoseconds,
/*allow_tz_in_str=*/true, ×tamp));
return Value::Timestamp(timestamp);
} else {
int64_t timestamp;
ZETASQL_RETURN_IF_ERROR(functions::ConvertStringToTimestamp(
*boundary_value, default_time_zone, functions::kMicroseconds,
×tamp));
return Value::TimestampFromUnixMicros(timestamp);
}
}
default: {
return absl::InvalidArgumentError(absl::StrCat(
"Parsing of RANGE literal of type ",
Type::TypeKindToString(type_kind, language.product_mode()),
" is not supported"));
}
}
}
absl::StatusOr<Value> ParseRange(absl::string_view range_literal,
const RangeType* range_type,
const LanguageOptions& language,
const absl::TimeZone default_time_zone) {
ZETASQL_ASSIGN_OR_RETURN(const auto boundaries, ParseRangeBoundaries(range_literal));
ZETASQL_ASSIGN_OR_RETURN(
Value start,
ParseRangeBoundary(range_type->element_type()->kind(), boundaries.start,
language, default_time_zone));
ZETASQL_ASSIGN_OR_RETURN(Value end, ParseRangeBoundary(
range_type->element_type()->kind(),
boundaries.end, language, default_time_zone));
return Value::MakeRange(start, end);
}
absl::StatusOr<std::unique_ptr<const ResolvedLiteral>>
Resolver::ResolveRangeLiteral(const ASTRangeLiteral* range_literal) {
RETURN_ERROR_IF_OUT_OF_STACK_SPACE();
const RangeType* range_type = nullptr;
ZETASQL_RETURN_IF_ERROR(ResolveRangeType(range_literal->type(),
{.context = "literal value construction"},
&range_type,
/*resolved_type_modifiers=*/nullptr));
if (range_literal->range_value() == nullptr) {
return MakeSqlErrorAt(range_literal)
<< "Invalid range literal. Expected RANGE keyword to be followed by "
"a STRING literal";
}
absl::StatusOr<Value> range =
ParseRange(range_literal->range_value()->string_value(), range_type,
language(), default_time_zone());
if (!range.ok()) {
return MakeSqlErrorAt(range_literal)
<< "Invalid RANGE literal value: " << range.status().message();
}
return MakeResolvedLiteral(
range_literal,
{types::RangeTypeFromSimpleTypeKind(range_type->element_type()->kind()),
/*annotation_map=*/nullptr},
range.value(), /*has_explicit_type=*/true);
}
absl::Status Resolver::ResolveExpr(
const ASTExpression* ast_expr,
ExprResolutionInfo* parent_expr_resolution_info,
std::unique_ptr<const ResolvedExpr>* resolved_expr_out,
const Type* inferred_type) {
RETURN_ERROR_IF_OUT_OF_STACK_SPACE();
ABSL_DCHECK(parent_expr_resolution_info != nullptr);
// Use a separate ExprAggregationInfo for the child because we don't
// want it to observe <has_aggregation>, <has_analytic>, or <can_flatten> from
// a sibling. <has_aggregation> and <has_analytic> need to flow up the tree
// only, and not across. <can_flatten> needs to be selectively propagated.
std::unique_ptr<ExprResolutionInfo> expr_resolution_info( // Save stack for
new ExprResolutionInfo(parent_expr_resolution_info)); // nested exprs.
switch (ast_expr->node_kind()) {
// These cases are extracted into a separate method to reduce stack usage.
case AST_INT_LITERAL:
case AST_STRING_LITERAL:
case AST_BYTES_LITERAL:
case AST_BOOLEAN_LITERAL:
case AST_FLOAT_LITERAL:
case AST_NULL_LITERAL:
case AST_DATE_OR_TIME_LITERAL:
case AST_NUMERIC_LITERAL:
case AST_BIGNUMERIC_LITERAL:
case AST_JSON_LITERAL:
case AST_RANGE_LITERAL:
ZETASQL_RETURN_IF_ERROR(ResolveLiteralExpr(ast_expr, resolved_expr_out));
break;
case AST_STAR:
return MakeSqlErrorAt(ast_expr)
<< "Argument * can only be used in COUNT(*)"
<< (language().LanguageFeatureEnabled(FEATURE_ANONYMIZATION)
? " or ANON_COUNT(*)"
: "");
case AST_DOT_STAR:
case AST_DOT_STAR_WITH_MODIFIERS:
// This is expected to be unreachable as parser allows creation of
// dot star nodes only inside SELECT expression.
return MakeSqlErrorAt(ast_expr)
<< "Dot-star is only supported in SELECT expression";
case AST_PATH_EXPRESSION:
expr_resolution_info->flatten_state.SetParent(
&parent_expr_resolution_info->flatten_state);
ZETASQL_RETURN_IF_ERROR(ResolvePathExpressionAsExpression(
PathExpressionSpan(*ast_expr->GetAsOrDie<ASTPathExpression>()),
expr_resolution_info.get(), ResolvedStatement::READ,
resolved_expr_out));
break;
case AST_PARAMETER_EXPR:
ZETASQL_RETURN_IF_ERROR(ResolveParameterExpr(
ast_expr->GetAsOrDie<ASTParameterExpr>(), resolved_expr_out));
break;
case AST_DOT_IDENTIFIER:
expr_resolution_info->flatten_state.SetParent(
&parent_expr_resolution_info->flatten_state);
ZETASQL_RETURN_IF_ERROR(
ResolveDotIdentifier(ast_expr->GetAsOrDie<ASTDotIdentifier>(),
expr_resolution_info.get(), resolved_expr_out));
break;
case AST_DOT_GENERALIZED_FIELD:
expr_resolution_info->flatten_state.SetParent(
&parent_expr_resolution_info->flatten_state);
ZETASQL_RETURN_IF_ERROR(ResolveDotGeneralizedField(
ast_expr->GetAsOrDie<ASTDotGeneralizedField>(),
expr_resolution_info.get(), resolved_expr_out));
break;
case AST_UNARY_EXPRESSION:
ZETASQL_RETURN_IF_ERROR(
ResolveUnaryExpr(ast_expr->GetAsOrDie<ASTUnaryExpression>(),
expr_resolution_info.get(), resolved_expr_out));
break;
case AST_BINARY_EXPRESSION:
ZETASQL_RETURN_IF_ERROR(
ResolveBinaryExpr(ast_expr->GetAsOrDie<ASTBinaryExpression>(),
expr_resolution_info.get(), resolved_expr_out));
break;
case AST_BITWISE_SHIFT_EXPRESSION:
ZETASQL_RETURN_IF_ERROR(ResolveBitwiseShiftExpr(
ast_expr->GetAsOrDie<ASTBitwiseShiftExpression>(),
expr_resolution_info.get(), resolved_expr_out));
break;
case AST_IN_EXPRESSION:
ZETASQL_RETURN_IF_ERROR(ResolveInExpr(ast_expr->GetAsOrDie<ASTInExpression>(),
expr_resolution_info.get(),
resolved_expr_out));
break;
case AST_LIKE_EXPRESSION:
ZETASQL_RETURN_IF_ERROR(ResolveLikeExpr(ast_expr->GetAsOrDie<ASTLikeExpression>(),
expr_resolution_info.get(),
resolved_expr_out));
break;
case AST_BETWEEN_EXPRESSION:
ZETASQL_RETURN_IF_ERROR(
ResolveBetweenExpr(ast_expr->GetAsOrDie<ASTBetweenExpression>(),
expr_resolution_info.get(), resolved_expr_out));
break;
case AST_AND_EXPR:
ZETASQL_RETURN_IF_ERROR(ResolveAndExpr(ast_expr->GetAsOrDie<ASTAndExpr>(),
expr_resolution_info.get(),
resolved_expr_out));
break;
case AST_OR_EXPR:
ZETASQL_RETURN_IF_ERROR(ResolveOrExpr(ast_expr->GetAsOrDie<ASTOrExpr>(),
expr_resolution_info.get(),
resolved_expr_out));
break;
case AST_FUNCTION_CALL:
ZETASQL_RETURN_IF_ERROR(
ResolveFunctionCall(ast_expr->GetAsOrDie<ASTFunctionCall>(),
expr_resolution_info.get(), resolved_expr_out));
break;
case AST_CAST_EXPRESSION:
ZETASQL_RETURN_IF_ERROR(
ResolveExplicitCast(ast_expr->GetAsOrDie<ASTCastExpression>(),
expr_resolution_info.get(), resolved_expr_out));
break;