From 689e73f30c508fbceb5d2b3f8216ea61b21f5b49 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jo=C3=A3o=20Britto?= Date: Wed, 1 Jul 2020 15:25:03 -0300 Subject: [PATCH] Add protobuf standard benchmarks --- bench/.gitignore | 3 ++ bench/README.md | 48 ++++++++++++++++++++++++++++ bench/script/standard_bench.exs | 56 +++++++++++++++++++++++++++++++++ 3 files changed, 107 insertions(+) create mode 100644 bench/script/standard_bench.exs diff --git a/bench/.gitignore b/bench/.gitignore index 2a9a0581..8b1e2452 100644 --- a/bench/.gitignore +++ b/bench/.gitignore @@ -2,3 +2,6 @@ /deps erl_crash.dump benchmarks +/data/datasets.tar.gz +/data/dataset.google_message3*.pb +/data/dataset.google_message4.pb diff --git a/bench/README.md b/bench/README.md index e8419909..cbd7ad41 100644 --- a/bench/README.md +++ b/bench/README.md @@ -144,6 +144,54 @@ Generated benchmarks/output/encode.html Opened report using open ``` +## Protobuf standard benchmarks + +Protobuf includes benchmarks for its official language implementations, such as Python, C++ +and Golang. They measure average encode and decode throughput for each built-in dataset. This +is useful to check how Elixir matches up with them. You can read more about these benchmarks +[here](https://github.com/protocolbuffers/protobuf/blob/master/benchmarks/README.md). + +To run the standard benchmarks for Elixir, download the datasets then run `standard_bench.exs`. + +```console +$ mix run script/standard_bench.exs +Message benchmarks.proto2.GoogleMessage1 of dataset file data/dataset.google_message1_proto2.pb +Average throughput for parse_from_benchmark: 18.48 MB/s +Average throughput for serialize_to_benchmark: 6.19 MB/s + +Message benchmarks.proto3.GoogleMessage1 of dataset file data/dataset.google_message1_proto3.pb +Average throughput for parse_from_benchmark: 18.4 MB/s +Average throughput for serialize_to_benchmark: 11.1 MB/s + +Message benchmarks.proto2.GoogleMessage2 of dataset file data/dataset.google_message2.pb +Average throughput for parse_from_benchmark: 47.82 MB/s +Average throughput for serialize_to_benchmark: 5656.75 MB/s + +Message benchmarks.google_message3.GoogleMessage3 of dataset file data/dataset.google_message3_1.pb +Average throughput for parse_from_benchmark: 19.94 MB/s +Average throughput for serialize_to_benchmark: 45.5 MB/s + +Message benchmarks.google_message3.GoogleMessage3 of dataset file data/dataset.google_message3_2.pb +Average throughput for parse_from_benchmark: 110.65 MB/s +Average throughput for serialize_to_benchmark: 164.96 MB/s + +Message benchmarks.google_message3.GoogleMessage3 of dataset file data/dataset.google_message3_3.pb +Average throughput for parse_from_benchmark: 9.8 MB/s +Average throughput for serialize_to_benchmark: 6.84 MB/s + +Message benchmarks.google_message3.GoogleMessage3 of dataset file data/dataset.google_message3_4.pb +Average throughput for parse_from_benchmark: 5254.14 MB/s +Average throughput for serialize_to_benchmark: 737.71 MB/s + +Message benchmarks.google_message3.GoogleMessage3 of dataset file data/dataset.google_message3_5.pb +Average throughput for parse_from_benchmark: 3.77 MB/s +Average throughput for serialize_to_benchmark: 3.29 MB/s + +Message benchmarks.google_message4.GoogleMessage4 of dataset file data/dataset.google_message4.pb +Average throughput for parse_from_benchmark: 20.06 MB/s +Average throughput for serialize_to_benchmark: 32.46 MB/s +``` + ## Contributing If you have trouble using the downloaded datasets, they might have been upgraded and their diff --git a/bench/script/standard_bench.exs b/bench/script/standard_bench.exs new file mode 100644 index 00000000..8162fba2 --- /dev/null +++ b/bench/script/standard_bench.exs @@ -0,0 +1,56 @@ +# Standard benchmark. Its output is compatible with the built-in benchmarks from +# protobuf for official language implementations, including encoding and decoding +# throughput on each dataset. +# +# Based on Python's implementation: +# https://github.com/protocolbuffers/protobuf/blob/master/benchmarks/python/py_benchmark.py + +single = fn fun, inputs -> + Enum.reduce(inputs, 0, fn input, total -> + {time, _result} = :timer.tc(fun, [input]) + total + time + end) +end + +repeat = fn fun, inputs, reps -> + Enum.reduce(1..reps, 0, fn _, total -> + total + single.(fun, inputs) + end) +end + +run = fn fun, inputs -> + target_run_time = 3_000_000 + single_run_time = single.(fun, inputs) + + with true <- single_run_time < target_run_time, + reps when reps > 1 <- trunc(ceil(target_run_time / single_run_time)) do + repeat.(fun, inputs, reps) / reps + else + _ -> single_run_time + end +end + +throughput = fn bytes, microseconds -> + megabytes = bytes / 1_048_576 + seconds = microseconds / 1_000_000 + Float.round(megabytes / seconds, 2) +end + +for file <- Path.wildcard("data/*.pb") do + %{payload: payloads, message_name: mod_name} = ProtoBench.load(file) + module = ProtoBench.mod_name(mod_name) + + IO.puts("Message #{mod_name} of dataset file #{file}") + + bytes = Enum.reduce(payloads, 0, &(byte_size(&1) + &2)) + messages = Enum.map(payloads, &module.decode/1) + + parse = throughput.(bytes, run.(&module.decode/1, payloads)) + + IO.puts("Average throughput for parse_from_benchmark: #{parse} MB/s") + + serialize = throughput.(bytes, run.(&module.encode/1, messages)) + + IO.puts("Average throughput for serialize_to_benchmark: #{serialize} MB/s") + IO.puts("") +end