Merge af43ea7 into 2826811

protocolbuffers · Jul 11, 2017 · e261b81 · e261b81
2 parents 2826811 + af43ea7
commit e261b81
Show file tree

Hide file tree

Showing 12 changed files with 3,002 additions and 58 deletions.
diff --git a/DESIGN.md b/DESIGN.md
@@ -0,0 +1,83 @@
+
+μpb Design
+----------
+
+**NOTE:** the design described here is being implemented currently, but is not
+yet complete.  The repo is in heavy transition right now.
+
+μpb has the following design goals:
+
+- C89 compatible.
+- small code size (both for the core library and generated messages).
+- fast performance (hundreds of MB/s).
+- idiomatic for C programs.
+- easy to wrap in high-level languages (Python, Ruby, Lua, etc) with
+  good performance and all standard protobuf features.
+- hands-off about memory management, allowing for easy integration
+  with existing VMs and/or garbage collectors.
+- offers binary ABI compatibility between apps, generated messages, and
+  the core library (doesn't require re-generating messages or recompiling
+  your application when the core library changes).
+- provides all features that users expect from a protobuf library
+  (generated messages in C, reflection, text format, etc.).
+- layered, so the core is small and doesn't require descriptors.
+- tidy about symbol references, so that any messages or features that
+  aren't used by a C program can have their code GC'd by the linker.
+- possible to use protobuf binary format without leaking message/field
+  names into the binary.
+
+μpb accomplishes these goals by keeping a very small core that does not contain
+descriptors.  We need some way of knowing what fields are in each message and
+where they live, but instead of descriptors, we keep a small/lightweight summary
+of the .proto file.  We call this a `upb_msglayout`.  It contains the bare
+minimum of what we need to know to parse and serialize protobuf binary format
+into our internal representation for messages, `upb_msg`.
+
+The core then contains functions to parse/serialize a message, given a `upb_msg*`
+and a `const upb_msglayout*`.
+
+This approach is similar to [nanopb](https://github.com/nanopb/nanopb) which
+also compiles message definitions to a compact, internal representation without
+names.  However nanopb does not aim to be a fully-featured library, and has no
+support for text format, JSON, or descriptors.  μpb is unique in that it has a
+small core similar to nanopb (though not quite as small), but also offers a
+full-featured protobuf library for applications that want reflection, text
+format, JSON format, etc.
+
+Without descriptors, the core doesn't have access to field names, so it cannot
+parse/serialize to protobuf text format or JSON.  Instead this functionality
+lives in separate modules that depend on the module implementing descriptors.
+With the descriptor module we can parse/serialize binary descriptors and
+validate that they follow all the rules of protobuf schemas.
+
+To provide binary compatibility, we version the structs that generated messages
+use to create a `upb_msglayout*`.  The current initializers are
+`upb_msglayout_msginit_v1`, `upb_msglayout_fieldinit_v1`, etc.  Then
+`upb_msglayout*` uses these as its internal representation.  If upb changes its
+internal representation for a `upb_msglayout*`, it will also include code to
+convert the old representation to the new representation.  This will use some
+more memory/CPU at runtime to convert between the two, but apps that statically
+link μpb will never need to worry about this.
+
+TODO
+----
+
+The current state of the repo is quite different than what is described above.
+Here are the major items that need to be implemented.
+
+1. implement the core generic protobuf binary encoder/decoder that uses a
+   `upb_msglayout*`.
+2. remove all mention of handlers, sink, etc. from core into their own module.
+   All of the handlers stuff needs substantial revision, but moving it out of
+   core is the first priority.
+3. move all of the def/refcounted stuff out of core.  The defs also need
+   substantial revision, but moving them out of core is the first priority.
+4. revise our generated code until it is in a state where we feel comfortable
+   committing to API/ABI stability for it.  This may involve moving different
+   parts of the generated code into separate files, like keeping the serialized
+   descriptor in a separate file from the compact msglayout.
+5. revise all of the existing encoders/decoders and handlers.  We probably
+   will want to keep handlers, since they let us decouple encoders/decoders
+   from `upb_msg`, but we need to simplify all of that a LOT.  Likely we will
+   want to make handlers only per-message instead of per-field, except for
+   variable-length fields.
diff --git a/Makefile b/Makefile
@@ -262,11 +262,11 @@ genfiles: tools/upbc
 	$(E) PROTOC upb/descriptor/descriptor.proto
 	$(Q) protoc upb/descriptor/descriptor.proto -oupb/descriptor/descriptor.pb
 	$(E) UPBC upb/descriptor/descriptor.pb
-	$(Q) ./tools/upbc upb/descriptor/descriptor.pb
+	$(Q) ./tools/upbc --generate-upbdefs upb/descriptor/descriptor.pb
 	$(E) PROTOC tests/json/test.proto
 	$(Q) protoc tests/json/test.proto -otests/json/test.proto.pb
 	$(E) UPBC tests/json/test.proto.pb
-	$(Q) ./tools/upbc tests/json/test.proto.pb
+	$(Q) ./tools/upbc --generate-upbdefs tests/json/test.proto.pb
 	$(E) DYNASM upb/pb/compile_decoder_x64.dasc
 	$(Q) $(LUA) third_party/dynasm/dynasm.lua -c upb/pb/compile_decoder_x64.dasc > upb/pb/compile_decoder_x64.h || (rm upb/pb/compile_decoder_x64.h ; false)
 

diff --git a/tests/conformance_upb.c b/tests/conformance_upb.c
@@ -0,0 +1,162 @@
+/* This is a upb implementation of the upb conformance tests, see:
+ *   https://github.com/google/protobuf/tree/master/conformance
+ */
+
+#include <errno.h>
+#include <stdarg.h>
+#include <stdlib.h>
+#include <unistd.h>
+
+#include "conformance.upb.h"
+#include "google/protobuf/test_messages_proto3.upb.h"
+
+int test_count = 0;
+
+bool CheckedRead(int fd, void *buf, size_t len) {
+  size_t ofs = 0;
+  while (len > 0) {
+    ssize_t bytes_read = read(fd, (char*)buf + ofs, len);
+
+    if (bytes_read == 0) return false;
+
+    if (bytes_read < 0) {
+      perror("reading from test runner");
+      exit(1);
+    }
+
+    len -= bytes_read;
+    ofs += bytes_read;
+  }
+
+  return true;
+}
+
+void CheckedWrite(int fd, const void *buf, size_t len) {
+  if (write(fd, buf, len) != len) {
+    perror("writing to test runner");
+    exit(1);
+  }
+}
+
+void DoTest(
+    const conformance_ConformanceRequest* request,
+    conformance_ConformanceResponse *response,
+    upb_env *env) {
+      conformance_ConformanceResponse_new(env);
+  protobuf_test_messages_proto3_TestAllTypes *test_message;
+
+  switch (conformance_ConformanceRequest_payload_case(request)) {
+    case conformance_ConformanceRequest_payload_protobuf_payload:
+      test_message = protobuf_test_messages_proto3_TestAllTypes_parsenew(
+          conformance_ConformanceRequest_protobuf_payload(request), env);
+
+      if (!test_message) {
+        /* TODO(haberman): return details. */
+        static char msg[] = "Parse error (no more details available).";
+        conformance_ConformanceResponse_set_parse_error(
+            response, upb_stringview_make(msg, sizeof(msg)));
+        return;
+      }
+      break;
+
+    case conformance_ConformanceRequest_payload_json_payload: {
+      static char msg[] = "JSON support not yet implemented.";
+      conformance_ConformanceResponse_set_skipped(
+          response, upb_stringview_make(msg, sizeof(msg)));
+      return;
+    }
+
+    case conformance_ConformanceRequest_payload_NOT_SET:
+      fprintf(stderr, "conformance_upb: Request didn't have payload.");
+      exit(1);
+  }
+
+  switch (conformance_ConformanceRequest_requested_output_format(request)) {
+    case conformance_UNSPECIFIED:
+      fprintf(stderr, "conformance_upb: Unspecified output format.");
+      exit(1);
+
+    case conformance_PROTOBUF: {
+      size_t serialized_len;
+      char *serialized = protobuf_test_messages_proto3_TestAllTypes_serialize(
+          test_message, env, &serialized_len);
+      if (!serialized) {
+        fprintf(stderr, "conformance_upb: Error serialiing.");
+        exit(1);
+      }
+      conformance_ConformanceResponse_set_protobuf_payload(
+          response, upb_stringview_make(serialized, serialized_len));
+      break;
+    }
+
+    case conformance_JSON: {
+      static char msg[] = "JSON support not yet implemented.";
+      conformance_ConformanceResponse_set_skipped(
+          response, upb_stringview_make(msg, sizeof(msg)));
+      break;
+    }
+
+    default:
+      fprintf(stderr, "conformance_upb: Unknown output format: %d",
+              conformance_ConformanceRequest_requested_output_format(request));
+      exit(1);
+  }
+
+  return;
+}
+
+bool DoTestIo() {
+  upb_env env;
+  upb_status status;
+  char *serialized_input;
+  char *serialized_output;
+  uint32_t input_size;
+  size_t output_size;
+  conformance_ConformanceRequest *request;
+  conformance_ConformanceResponse *response;
+
+  if (!CheckedRead(STDIN_FILENO, &input_size, sizeof(uint32_t))) {
+    // EOF.
+    return false;
+  }
+
+  upb_env_init(&env);
+  upb_env_reporterrorsto(&env, &status);
+  serialized_input = upb_env_malloc(&env, input_size);
+
+  if (!CheckedRead(STDIN_FILENO, serialized_input, input_size)) {
+    fprintf(stderr, "conformance_upb: unexpected EOF on stdin.\n");
+    exit(1);
+  }
+
+  request = conformance_ConformanceRequest_parsenew(
+      upb_stringview_make(serialized_input, input_size), &env);
+  response = conformance_ConformanceResponse_new(&env);
+
+  if (request) {
+    DoTest(request, response, &env);
+  } else {
+    fprintf(stderr, "conformance_upb: parse of ConformanceRequest failed: %s\n",
+            upb_status_errmsg(&status));
+  }
+
+  serialized_output = conformance_ConformanceResponse_serialize(
+      response, &env, &output_size);
+
+  CheckedWrite(STDOUT_FILENO, &output_size, sizeof(uint32_t));
+  CheckedWrite(STDOUT_FILENO, serialized_output, output_size);
+
+  test_count++;
+
+  return true;
+}
+
+int main() {
+  while (1) {
+    if (!DoTestIo()) {
+      fprintf(stderr, "conformance_upb: received EOF from test runner "
+                      "after %d tests, exiting\n", test_count);
+      return 0;
+    }
+  }
+}