Skip to content

Commit

Permalink
feat(bigquery/storage/managedwriter): improve schema comparison stabi…
Browse files Browse the repository at this point in the history
…lity (#9241)

The Write API leverages DescriptorProto messages for communicating proto schema between the client and service.  This change ensures the DescriptorProto is more stable for comparison purposes by ensuring ordering for the normalized fields, nested messages, and enum values.

Test-wise, this adds a new testdata message with things explicitly defined in out-of-order fashion, and also updates some test inputs for other tests where the definition is out of order.

Related: issue b/319301074 internally
  • Loading branch information
shollyman committed Jan 12, 2024
1 parent 4a132f9 commit faccb68
Show file tree
Hide file tree
Showing 4 changed files with 501 additions and 160 deletions.
19 changes: 19 additions & 0 deletions bigquery/storage/managedwriter/adapt/protoconversion.go
Expand Up @@ -17,6 +17,7 @@ package adapt
import (
"encoding/base64"
"fmt"
"sort"
"strings"

"cloud.google.com/go/bigquery/storage/apiv1/storagepb"
Expand Down Expand Up @@ -476,6 +477,12 @@ func normalizeDescriptorInternal(in protoreflect.MessageDescriptor, visitedTypes
} else {
enumDP := protodesc.ToEnumDescriptorProto(inField.Enum())
enumDP.Name = proto.String(enumName)
// Ensure values in enum are sorted.
vals := enumDP.GetValue()
sort.SliceStable(vals, func(i, j int) bool {
return vals[i].GetNumber() < vals[j].GetNumber()
})
// Append wrapped enum to nested types.
root.NestedType = append(root.NestedType, &descriptorpb.DescriptorProto{
Name: proto.String(enclosingTypeName),
EnumType: []*descriptorpb.EnumDescriptorProto{enumDP},
Expand All @@ -486,6 +493,18 @@ func normalizeDescriptorInternal(in protoreflect.MessageDescriptor, visitedTypes
}
resultDP.Field = append(resultDP.Field, resultFDP)
}
// To reduce comparison jitter, order the common slices fields where possible.
//
// First, fields are sorted by ID number.
fields := resultDP.GetField()
sort.SliceStable(fields, func(i, j int) bool {
return fields[i].GetNumber() < fields[j].GetNumber()
})
// Then, sort nested messages in NestedType by name.
nested := resultDP.GetNestedType()
sort.SliceStable(nested, func(i, j int) bool {
return nested[i].GetName() < nested[j].GetName()
})
structTypes.add(fullProtoName)
return resultDP, nil
}
Expand Down
213 changes: 159 additions & 54 deletions bigquery/storage/managedwriter/adapt/protoconversion_test.go
Expand Up @@ -459,28 +459,6 @@ func TestSchemaToProtoConversion(t *testing.T) {
},
},
NestedType: []*descriptorpb.DescriptorProto{
{
Name: proto.String("root__reused_inner_struct"),
Field: []*descriptorpb.FieldDescriptorProto{
{
Name: proto.String("leaf"),
Number: proto.Int32(1),
Type: descriptorpb.FieldDescriptorProto_TYPE_STRING.Enum(),
Label: descriptorpb.FieldDescriptorProto_LABEL_REQUIRED.Enum(),
},
},
},
{
Name: proto.String("root__outer_struct__another_inner_struct"),
Field: []*descriptorpb.FieldDescriptorProto{
{
Name: proto.String("another_leaf"),
Number: proto.Int32(1),
Type: descriptorpb.FieldDescriptorProto_TYPE_STRING.Enum(),
Label: descriptorpb.FieldDescriptorProto_LABEL_REQUIRED.Enum(),
},
},
},
{
Name: proto.String("root__outer_struct"),
Field: []*descriptorpb.FieldDescriptorProto{
Expand All @@ -507,6 +485,28 @@ func TestSchemaToProtoConversion(t *testing.T) {
},
},
},
{
Name: proto.String("root__outer_struct__another_inner_struct"),
Field: []*descriptorpb.FieldDescriptorProto{
{
Name: proto.String("another_leaf"),
Number: proto.Int32(1),
Type: descriptorpb.FieldDescriptorProto_TYPE_STRING.Enum(),
Label: descriptorpb.FieldDescriptorProto_LABEL_REQUIRED.Enum(),
},
},
},
{
Name: proto.String("root__reused_inner_struct"),
Field: []*descriptorpb.FieldDescriptorProto{
{
Name: proto.String("leaf"),
Number: proto.Int32(1),
Type: descriptorpb.FieldDescriptorProto_TYPE_STRING.Enum(),
Label: descriptorpb.FieldDescriptorProto_LABEL_REQUIRED.Enum(),
},
},
},
},
},
wantProto3: &descriptorpb.DescriptorProto{
Expand Down Expand Up @@ -592,6 +592,18 @@ func TestSchemaToProtoConversion(t *testing.T) {
},
},
NestedType: []*descriptorpb.DescriptorProto{
{
Name: proto.String("root__outer_struct"),
Field: []*descriptorpb.FieldDescriptorProto{
{
Name: proto.String("inner_struct"),
Number: proto.Int32(1),
Type: descriptorpb.FieldDescriptorProto_TYPE_MESSAGE.Enum(),
TypeName: proto.String("root__outer_struct__inner_struct"),
Label: descriptorpb.FieldDescriptorProto_LABEL_OPTIONAL.Enum(),
},
},
},
{
Name: proto.String("root__outer_struct__inner_struct"),
Field: []*descriptorpb.FieldDescriptorProto{
Expand All @@ -609,18 +621,6 @@ func TestSchemaToProtoConversion(t *testing.T) {
},
},
},
{
Name: proto.String("root__outer_struct"),
Field: []*descriptorpb.FieldDescriptorProto{
{
Name: proto.String("inner_struct"),
Number: proto.Int32(1),
Type: descriptorpb.FieldDescriptorProto_TYPE_MESSAGE.Enum(),
TypeName: proto.String("root__outer_struct__inner_struct"),
Label: descriptorpb.FieldDescriptorProto_LABEL_OPTIONAL.Enum(),
},
},
},
},
},
wantProto3: &descriptorpb.DescriptorProto{
Expand Down Expand Up @@ -1367,6 +1367,39 @@ func TestNormalizeDescriptor(t *testing.T) {
},
},
NestedType: []*descriptorpb.DescriptorProto{
{
Name: proto.String("testdata_EnumMsgA"),
Field: []*descriptorpb.FieldDescriptorProto{
{
Name: proto.String("foo"),
JsonName: proto.String("foo"),
Number: proto.Int32(1),
Type: descriptorpb.FieldDescriptorProto_TYPE_STRING.Enum(),
Label: descriptorpb.FieldDescriptorProto_LABEL_OPTIONAL.Enum(),
},
{
Name: proto.String("bar"),
JsonName: proto.String("bar"),
Number: proto.Int32(2),
Type: descriptorpb.FieldDescriptorProto_TYPE_ENUM.Enum(),
TypeName: proto.String("testdata_ExtEnum_E.ExtEnum"),
Label: descriptorpb.FieldDescriptorProto_LABEL_OPTIONAL.Enum(),
},
},
},
{
Name: proto.String("testdata_EnumMsgB"),
Field: []*descriptorpb.FieldDescriptorProto{
{
Name: proto.String("baz"),
JsonName: proto.String("baz"),
Number: proto.Int32(1),
Type: descriptorpb.FieldDescriptorProto_TYPE_ENUM.Enum(),
TypeName: proto.String("testdata_ExtEnum_E.ExtEnum"),
Label: descriptorpb.FieldDescriptorProto_LABEL_OPTIONAL.Enum(),
},
},
},
{
Name: proto.String("testdata_ExtEnum_E"),
EnumType: []*descriptorpb.EnumDescriptorProto{
Expand All @@ -1389,35 +1422,107 @@ func TestNormalizeDescriptor(t *testing.T) {
},
},
},
},
},
},
{
description: "OutOfOrderDefinitionProto2",
in: (&testdata.OutOfOrderDefinitionProto2{}).ProtoReflect().Descriptor(),
want: &descriptorpb.DescriptorProto{
Name: proto.String("testdata_OutOfOrderDefinitionProto2"),
Field: []*descriptorpb.FieldDescriptorProto{
{
Name: proto.String("testdata_EnumMsgA"),
Field: []*descriptorpb.FieldDescriptorProto{
{
Name: proto.String("foo"),
JsonName: proto.String("foo"),
Number: proto.Int32(1),
Type: descriptorpb.FieldDescriptorProto_TYPE_STRING.Enum(),
Label: descriptorpb.FieldDescriptorProto_LABEL_OPTIONAL.Enum(),
},
Name: proto.String("s1"),
JsonName: proto.String("s1"),
Number: proto.Int32(1),
Type: descriptorpb.FieldDescriptorProto_TYPE_STRING.Enum(),
Label: descriptorpb.FieldDescriptorProto_LABEL_OPTIONAL.Enum(),
},
{
Name: proto.String("s2"),
JsonName: proto.String("s2"),
Number: proto.Int32(2),
Type: descriptorpb.FieldDescriptorProto_TYPE_STRING.Enum(),
Label: descriptorpb.FieldDescriptorProto_LABEL_OPTIONAL.Enum(),
},
{
Name: proto.String("s3"),
JsonName: proto.String("s3"),
Number: proto.Int32(3),
Type: descriptorpb.FieldDescriptorProto_TYPE_STRING.Enum(),
Label: descriptorpb.FieldDescriptorProto_LABEL_OPTIONAL.Enum(),
},
{
Name: proto.String("enum1"),
JsonName: proto.String("enum1"),
Number: proto.Int32(4),
Type: descriptorpb.FieldDescriptorProto_TYPE_ENUM.Enum(),
Label: descriptorpb.FieldDescriptorProto_LABEL_OPTIONAL.Enum(),
TypeName: proto.String("testdata_OutOfOrderDefinitionProto2_OutOfOrderEnum_E.OutOfOrderEnum"),
},
{
Name: proto.String("enum2"),
JsonName: proto.String("enum2"),
Number: proto.Int32(5),
Type: descriptorpb.FieldDescriptorProto_TYPE_ENUM.Enum(),
Label: descriptorpb.FieldDescriptorProto_LABEL_OPTIONAL.Enum(),
TypeName: proto.String("testdata_OutOfOrderDefinitionProto2_OutOfOrderEnum_E.OutOfOrderEnum"),
},
{
Name: proto.String("msg6"),
JsonName: proto.String("msg6"),
Number: proto.Int32(6),
Type: descriptorpb.FieldDescriptorProto_TYPE_MESSAGE.Enum(),
Label: descriptorpb.FieldDescriptorProto_LABEL_OPTIONAL.Enum(),
TypeName: proto.String("testdata_SimpleMessageProto2"),
},
{
Name: proto.String("msg7"),
JsonName: proto.String("msg7"),
Number: proto.Int32(7),
Type: descriptorpb.FieldDescriptorProto_TYPE_MESSAGE.Enum(),
Label: descriptorpb.FieldDescriptorProto_LABEL_OPTIONAL.Enum(),
TypeName: proto.String("testdata_SimpleMessageProto2"),
},
},
NestedType: []*descriptorpb.DescriptorProto{
{
Name: proto.String("testdata_OutOfOrderDefinitionProto2_OutOfOrderEnum_E"),
EnumType: []*descriptorpb.EnumDescriptorProto{
{
Name: proto.String("bar"),
JsonName: proto.String("bar"),
Number: proto.Int32(2),
Type: descriptorpb.FieldDescriptorProto_TYPE_ENUM.Enum(),
TypeName: proto.String("testdata_ExtEnum_E.ExtEnum"),
Label: descriptorpb.FieldDescriptorProto_LABEL_OPTIONAL.Enum(),
Name: proto.String("OutOfOrderEnum"),
Value: []*descriptorpb.EnumValueDescriptorProto{
{
Name: proto.String("E1"),
Number: proto.Int32(1),
},
{
Name: proto.String("E2"),
Number: proto.Int32(2),
},
{
Name: proto.String("E3"),
Number: proto.Int32(3),
},
},
},
},
},
{
Name: proto.String("testdata_EnumMsgB"),
Name: proto.String("testdata_SimpleMessageProto2"),
Field: []*descriptorpb.FieldDescriptorProto{
{
Name: proto.String("baz"),
JsonName: proto.String("baz"),
Name: proto.String("name"),
JsonName: proto.String("name"),
Number: proto.Int32(1),
Type: descriptorpb.FieldDescriptorProto_TYPE_ENUM.Enum(),
TypeName: proto.String("testdata_ExtEnum_E.ExtEnum"),
Type: descriptorpb.FieldDescriptorProto_TYPE_STRING.Enum(),
Label: descriptorpb.FieldDescriptorProto_LABEL_OPTIONAL.Enum(),
},
{
Name: proto.String("value"),
JsonName: proto.String("value"),
Number: proto.Int32(2),
Type: descriptorpb.FieldDescriptorProto_TYPE_INT64.Enum(),
Label: descriptorpb.FieldDescriptorProto_LABEL_OPTIONAL.Enum(),
},
},
Expand Down

0 comments on commit faccb68

Please sign in to comment.