Skip to content
Permalink
Browse files
fix: ProtoSchemaConver's problem when converting fields reference sam…
…e… (#428)

* fix: ProtoSchemaConver's problem when converting fields reference same enum types, the generated proto is not buildable.

	modified:   google-cloud-bigquerystorage/src/main/java/com/google/cloud/bigquery/storage/v1alpha2/ProtoSchemaConverter.java
	modified:   google-cloud-bigquerystorage/src/test/java/com/google/cloud/bigquery/storage/v1alpha2/ProtoSchemaConverterTest.java

* fix review comments
  • Loading branch information
yirutang committed Jul 20, 2020
1 parent e2156fc commit 1ce2621fe633f29c57bc4f4df84b2bcc2c57bdb8
@@ -19,6 +19,7 @@
import com.google.api.gax.rpc.InvalidArgumentException;
import com.google.cloud.bigquery.storage.v1alpha2.ProtoBufProto.ProtoSchema;
import com.google.protobuf.DescriptorProtos.DescriptorProto;
import com.google.protobuf.DescriptorProtos.EnumDescriptorProto;
import com.google.protobuf.DescriptorProtos.FieldDescriptorProto;
import com.google.protobuf.Descriptors.Descriptor;
import com.google.protobuf.Descriptors.FieldDescriptor;
@@ -29,51 +30,95 @@
// protobuf::DescriptorProto
// that can be reconstructed by the backend.
public class ProtoSchemaConverter {
private static class StructName {
public String getName() {
count++;
return count == 1 ? "__ROOT__" : "__S" + count;
}

private int count = 0;
}

private static ProtoSchema convertInternal(
Descriptor input, List<String> visitedTypes, StructName structName) {
Descriptor input,
Set<String> visitedTypes,
Set<String> enumTypes,
Set<String> structTypes,
DescriptorProto.Builder rootProtoSchema) {
DescriptorProto.Builder resultProto = DescriptorProto.newBuilder();
resultProto.setName(structName.getName());
if (rootProtoSchema == null) {
rootProtoSchema = resultProto;
}
String protoName = input.getFullName();
protoName = protoName.replace('.', '_');
resultProto.setName(protoName);
Set<String> localEnumTypes = new HashSet<String>();
visitedTypes.add(input.getFullName());
for (int i = 0; i < input.getFields().size(); i++) {
FieldDescriptor inputField = input.getFields().get(i);
FieldDescriptorProto.Builder resultField = inputField.toProto().toBuilder();
if (inputField.getType() == FieldDescriptor.Type.GROUP
|| inputField.getType() == FieldDescriptor.Type.MESSAGE) {
if (visitedTypes.contains(inputField.getMessageType().getFullName())) {
throw new InvalidArgumentException(
"Recursive type is not supported:" + inputField.getMessageType().getFullName(),
null,
GrpcStatusCode.of(Status.Code.INVALID_ARGUMENT),
false);
String msgFullName = inputField.getMessageType().getFullName();
msgFullName = msgFullName.replace('.', '_');
if (structTypes.contains(msgFullName)) {
resultField.setTypeName(msgFullName);
} else {
if (visitedTypes.contains(msgFullName)) {
throw new InvalidArgumentException(
"Recursive type is not supported:" + inputField.getMessageType().getFullName(),
null,
GrpcStatusCode.of(Status.Code.INVALID_ARGUMENT),
false);
}
visitedTypes.add(msgFullName);
rootProtoSchema.addNestedType(
convertInternal(
inputField.getMessageType(),
visitedTypes,
enumTypes,
structTypes,
rootProtoSchema)
.getProtoDescriptor());
visitedTypes.remove(msgFullName);
resultField.setTypeName(
rootProtoSchema.getNestedType(rootProtoSchema.getNestedTypeCount() - 1).getName());
}
resultProto.addNestedType(
convertInternal(inputField.getMessageType(), visitedTypes, structName)
.getProtoDescriptor());
visitedTypes.remove(inputField.getMessageType().getFullName());
resultField.setTypeName(
resultProto.getNestedType(resultProto.getNestedTypeCount() - 1).getName());
}
if (inputField.getType() == FieldDescriptor.Type.ENUM) {
resultProto.addEnumType(inputField.getEnumType().toProto());
resultField.setTypeName(
resultProto.getEnumType(resultProto.getEnumTypeCount() - 1).getName());
String enumFullName = inputField.getEnumType().getFullName();
// If the enum is defined within the current message, we don't want to
// pull it out to the top since then the same enum values will not be
// allowed if the value collides with other enums.
if (enumFullName.startsWith(input.getFullName())) {
String enumName = inputField.getEnumType().getName();
if (localEnumTypes.contains(enumName)) {
resultField.setTypeName(enumName);
} else {
resultProto.addEnumType(inputField.getEnumType().toProto());
resultField.setTypeName(enumName);
localEnumTypes.add(enumName);
}
} else {
// If the enum is defined elsewhere, then redefine it at the top
// message scope. There is a problem that different enum values might
// be OK when living under its original scope, but when they all live
// in top scope, their values cannot collide. Say if thers A.Color with
// RED and B.Color with RED, if they are redefined here, the RED will
// collide with each other and thus not allowed.
enumFullName = enumFullName.replace('.', '_');
if (enumTypes.contains(enumFullName)) {
resultField.setTypeName(enumFullName);
} else {
EnumDescriptorProto enumType =
inputField.getEnumType().toProto().toBuilder().setName(enumFullName).build();
resultProto.addEnumType(enumType);
resultField.setTypeName(enumFullName);
enumTypes.add(enumFullName);
}
}
}
resultProto.addField(resultField);
}
structTypes.add(protoName);
return ProtoSchema.newBuilder().setProtoDescriptor(resultProto.build()).build();
}

public static ProtoSchema convert(Descriptor descriptor) {
ArrayList<String> visitedTypes = new ArrayList<String>();
return convertInternal(descriptor, visitedTypes, new StructName());
Set<String> visitedTypes = new HashSet<String>();
Set<String> enumTypes = new HashSet<String>();
Set<String> structTypes = new HashSet<String>();
return convertInternal(descriptor, visitedTypes, enumTypes, structTypes, null);
}
}
@@ -17,6 +17,8 @@

import com.google.api.gax.rpc.InvalidArgumentException;
import com.google.cloud.bigquery.storage.test.Test.*;
import com.google.protobuf.DescriptorProtos.FileDescriptorProto;
import com.google.protobuf.Descriptors;
import org.junit.*;

public class ProtoSchemaConverterTest {
@@ -26,7 +28,7 @@ public void convertSimple() {
ProtoBufProto.ProtoSchema protoSchema =
ProtoSchemaConverter.convert(testProto.getDescriptorForType());
Assert.assertEquals(
"name: \"__ROOT__\"\n"
"name: \"com_google_cloud_bigquery_storage_test_AllSupportedTypes\"\n"
+ "field {\n"
+ " name: \"int32_value\"\n"
+ " number: 1\n"
@@ -74,7 +76,7 @@ public void convertSimple() {
+ " number: 8\n"
+ " label: LABEL_OPTIONAL\n"
+ " type: TYPE_ENUM\n"
+ " type_name: \"TestEnum\"\n"
+ " type_name: \"com_google_cloud_bigquery_storage_test_TestEnum\"\n"
+ "}\n"
+ "field {\n"
+ " name: \"string_value\"\n"
@@ -83,7 +85,7 @@ public void convertSimple() {
+ " type: TYPE_STRING\n"
+ "}\n"
+ "enum_type {\n"
+ " name: \"TestEnum\"\n"
+ " name: \"com_google_cloud_bigquery_storage_test_TestEnum\"\n"
+ " value {\n"
+ " name: \"TestEnum0\"\n"
+ " number: 0\n"
@@ -102,47 +104,38 @@ public void convertNested() {
ProtoBufProto.ProtoSchema protoSchema =
ProtoSchemaConverter.convert(testProto.getDescriptorForType());
Assert.assertEquals(
"name: \"__ROOT__\"\n"
"name: \"com_google_cloud_bigquery_storage_test_ComplicateType\"\n"
+ "field {\n"
+ " name: \"nested_repeated_type\"\n"
+ " number: 1\n"
+ " label: LABEL_REPEATED\n"
+ " type: TYPE_MESSAGE\n"
+ " type_name: \"__S2\"\n"
+ " type_name: \"com_google_cloud_bigquery_storage_test_NestedType\"\n"
+ "}\n"
+ "field {\n"
+ " name: \"inner_type\"\n"
+ " number: 2\n"
+ " label: LABEL_OPTIONAL\n"
+ " type: TYPE_MESSAGE\n"
+ " type_name: \"__S4\"\n"
+ " type_name: \"com_google_cloud_bigquery_storage_test_InnerType\"\n"
+ "}\n"
+ "nested_type {\n"
+ " name: \"__S2\"\n"
+ " name: \"com_google_cloud_bigquery_storage_test_InnerType\"\n"
+ " field {\n"
+ " name: \"inner_type\"\n"
+ " name: \"value\"\n"
+ " number: 1\n"
+ " label: LABEL_REPEATED\n"
+ " type: TYPE_MESSAGE\n"
+ " type_name: \"__S3\"\n"
+ " }\n"
+ " nested_type {\n"
+ " name: \"__S3\"\n"
+ " field {\n"
+ " name: \"value\"\n"
+ " number: 1\n"
+ " label: LABEL_REPEATED\n"
+ " type: TYPE_STRING\n"
+ " }\n"
+ " type: TYPE_STRING\n"
+ " }\n"
+ "}\n"
+ "nested_type {\n"
+ " name: \"__S4\"\n"
+ " name: \"com_google_cloud_bigquery_storage_test_NestedType\"\n"
+ " field {\n"
+ " name: \"value\"\n"
+ " name: \"inner_type\"\n"
+ " number: 1\n"
+ " label: LABEL_REPEATED\n"
+ " type: TYPE_STRING\n"
+ " type: TYPE_MESSAGE\n"
+ " type_name: \"com_google_cloud_bigquery_storage_test_InnerType\"\n"
+ " }\n"
+ "}\n",
protoSchema.getProtoDescriptor().toString());
@@ -156,7 +149,46 @@ public void convertRecursive() {
ProtoSchemaConverter.convert(testProto.getDescriptorForType());
Assert.fail("No exception raised");
} catch (InvalidArgumentException e) {
// Expected exception
Assert.assertEquals(
"Recursive type is not supported:com.google.cloud.bigquery.storage.test.ContainsRecursive",
e.getMessage());
}
}

@Test
public void convertRecursiveTopMessage() {
try {
RecursiveTypeTopMessage testProto = RecursiveTypeTopMessage.newBuilder().build();
ProtoBufProto.ProtoSchema protoSchema =
ProtoSchemaConverter.convert(testProto.getDescriptorForType());
Assert.fail("No exception raised");
} catch (InvalidArgumentException e) {
Assert.assertEquals(
"Recursive type is not supported:com.google.cloud.bigquery.storage.test.RecursiveTypeTopMessage",
e.getMessage());
}
}

@Test
public void convertDuplicateType() {
DuplicateType testProto = DuplicateType.newBuilder().build();
ProtoBufProto.ProtoSchema protoSchema =
ProtoSchemaConverter.convert(testProto.getDescriptorForType());

FileDescriptorProto fileDescriptorProto =
FileDescriptorProto.newBuilder()
.setName("foo.proto")
.addMessageType(protoSchema.getProtoDescriptor())
.build();
try {
Descriptors.FileDescriptor fs =
Descriptors.FileDescriptor.buildFrom(
fileDescriptorProto, new Descriptors.FileDescriptor[0]);
Descriptors.Descriptor type =
fs.findMessageTypeByName(protoSchema.getProtoDescriptor().getName());
Assert.assertEquals(4, type.getFields().size());
} catch (Descriptors.DescriptorValidationException ex) {
Assert.fail("Got unexpected exception: " + ex.getMessage());
}
}
}
@@ -55,6 +55,17 @@ message RecursiveType {
optional ContainsRecursive field = 2;
}

message RecursiveTypeTopMessage {
optional RecursiveTypeTopMessage field = 2;
}

message FooType {
optional string foo = 1;
}

message DuplicateType {
optional TestEnum f1 = 1;
optional TestEnum f2 = 2;
optional ComplicateType f3 = 3;
optional ComplicateType f4 = 4;
}

0 comments on commit 1ce2621

Please sign in to comment.