Refactor benchmark CLI (#273)

amazon-ion · Jul 28, 2023 · f3f1b3c · f3f1b3c
1 parent c0a4ef5
commit f3f1b3c
Show file tree

Hide file tree

Showing 32 changed files with 1,077 additions and 1,025 deletions.
diff --git a/.github/workflows/main.yml b/.github/workflows/main.yml
@@ -10,6 +10,7 @@ jobs:
     strategy:
       matrix:
         python-version: ['3.7', '3.8', '3.9', '3.10', 'pypy-3.7', 'pypy-3.8']
+      fail-fast: false
     steps:
       - uses: actions/checkout@v2
       - name: Set up Python

diff --git a/.github/workflows/performance-regression.yml b/.github/workflows/performance-regression.yml
@@ -18,6 +18,7 @@ jobs:
     strategy:
       matrix:
         python-version: ['3.9', '3.10', 'pypy-3.7', 'pypy-3.8']
+      fail-fast: false
     steps:
       - name: Set up Python
         uses: actions/setup-python@v2

diff --git a/amazon/ionbenchmark/API.py b/amazon/ionbenchmark/API.py
diff --git a/amazon/ionbenchmark/Command.py b/amazon/ionbenchmark/Command.py
diff --git a/amazon/ionbenchmark/Format.py b/amazon/ionbenchmark/Format.py
@@ -4,18 +4,12 @@
 import os
 
 
-def file_is_ion_binary(file):
-    if os.path.splitext(file)[1] == '.10n':
-        return True
-    else:
-        return False
+def _file_is_ion_binary(file):
+    return os.path.splitext(file)[1] == '.10n'
 
 
-def file_is_ion_text(file):
-    if os.path.splitext(file)[1] == '.ion':
-        return True
-    else:
-        return False
+def _file_is_ion_text(file):
+    return os.path.splitext(file)[1] == '.ion'
 
 
 def format_is_ion(format_option):
@@ -32,7 +26,8 @@ def format_is_cbor(format_option):
 
 
 def format_is_binary(format_option):
-    return format_is_cbor(format_option) or (format_option == Format.ION_BINARY.value)
+    return format_is_cbor(format_option) or (format_option == Format.ION_BINARY.value) \
+           or (format_option == Format.PROTOBUF.value) or (format_option == Format.SD_PROTOBUF.value)
 
 
 def rewrite_file_to_format(file, format_option):
@@ -50,8 +45,8 @@ def rewrite_file_to_format(file, format_option):
 
     if format_is_ion(format_option):
         # Write data if a conversion is required
-        if (format_option == Format.ION_BINARY.value and file_is_ion_text(file)) \
-                or (format_option == Format.ION_TEXT.value and file_is_ion_binary(file)):
+        if (format_option == Format.ION_BINARY.value and _file_is_ion_text(file)) \
+                or (format_option == Format.ION_TEXT.value and _file_is_ion_binary(file)):
             # Load data
             with open(file, 'br') as fp:
                 obj = simpleion.load(fp, single_value=False)
@@ -63,8 +58,7 @@ def rewrite_file_to_format(file, format_option):
         else:
             shutil.copy(file, temp_file_name)
     else:
-        # Copy the file
-        shutil.copy(file, temp_file_name)
+        return file
 
     return temp_file_name
 
@@ -79,4 +73,5 @@ class Format(Enum):
     RAPIDJSON = 'rapidjson'
     CBOR = 'cbor'
     CBOR2 = 'cbor2'
-    DEFAULT = 'ion_binary'
+    PROTOBUF = 'protobuf'
+    SD_PROTOBUF = 'self_describing_protobuf'
diff --git a/amazon/ionbenchmark/Io_type.py b/amazon/ionbenchmark/Io_type.py
diff --git a/amazon/ionbenchmark/__init__.py b/amazon/ionbenchmark/__init__.py
@@ -1,9 +1,2 @@
 __author__ = 'Amazon.com, Inc.'
 __version__ = '0.1.0'
-
-__all__ = [
-    'API',
-    'ion_python_benchmark_cli'
-]
-
-from amazon.ionbenchmark.ion_benchmark_cli import ion_python_benchmark_cli
diff --git a/amazon/ionbenchmark/benchmark_runner.py b/amazon/ionbenchmark/benchmark_runner.py
@@ -0,0 +1,124 @@
+# Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved.
+# SPDX-License-Identifier: Apache-2.0
+
+"""
+This module
+"""
+import gc
+import tempfile
+import platform
+import time
+import timeit
+
+from amazon.ionbenchmark.benchmark_spec import BenchmarkSpec
+import amazon.ionbenchmark.Format as _format
+
+_pypy = platform.python_implementation() == 'PyPy'
+if not _pypy:
+    import tracemalloc
+
+
+class BenchmarkResult:
+    """
+    Results generated by the `run_benchmark` function.
+
+     * `timings` is a list of numbers representing the number of nanoseconds to complete each iteration
+     * `batch_size` is the number of times the function was invoked in each iteration
+     * `peak_memory_usage` is the peak memory allocated during a single run of the benchmark function, in bytes
+    """
+    timings = None
+    batch_size = None
+    peak_memory_usage = None
+
+    def __init__(self, timings, batch_size, peak_memory_usage):
+        self.timings = timings
+        self.batch_size = batch_size
+        self.peak_memory_usage = peak_memory_usage
+
+
+def run_benchmark(benchmark_spec: BenchmarkSpec):
+    """
+    Run benchmarks for `benchmark_spec`.
+    """
+    test_fun = _create_test_fun(benchmark_spec)
+
+    # memory profiling
+    if _pypy:
+        peak_memory_usage = None
+    else:
+        peak_memory_usage = _trace_memory_allocation(test_fun)
+
+    setup = ""
+    if benchmark_spec["py_gc_disabled"]:
+        setup += "import gc; gc.disable()"
+    else:
+        setup += "import gc; gc.enable()"
+
+    timer = timeit.Timer(stmt=test_fun, timer=time.perf_counter_ns, setup=setup)
+
+    # warm up
+    timer.timeit(benchmark_spec.get_warmups())
+
+    # iteration
+    (batch_size, _) = timer.autorange()
+    timings = timer.repeat(benchmark_spec.get_iterations(), batch_size)
+
+    return BenchmarkResult(timings, batch_size, peak_memory_usage)
+
+
+def _create_test_fun(benchmark_spec: BenchmarkSpec):
+    """
+    Create a benchmark function for the given `benchmark_spec`.
+    """
+    loader_dumper = benchmark_spec.get_loader_dumper()
+    match_arg = [benchmark_spec.get_io_type(), benchmark_spec.get_command(), benchmark_spec.get_api()]
+
+    if match_arg == ['buffer', 'read', 'load_dump']:
+        with open(benchmark_spec.get_input_file(), 'rb') as f:
+            buffer = f.read()
+
+        def test_fn():
+            return loader_dumper.loads(buffer)
+
+    elif match_arg == ['buffer', 'write', 'load_dump']:
+        data_obj = benchmark_spec.get_data_object()
+
+        def test_fn():
+            return loader_dumper.dumps(data_obj)
+
+    elif match_arg == ['file', 'read', 'load_dump']:
+        data_file = benchmark_spec.get_input_file()
+
+        def test_fn():
+            with open(data_file, "rb") as f:
+                return loader_dumper.load(f)
+
+    elif match_arg == ['file', 'write', 'load_dump']:
+        data_obj = benchmark_spec.get_data_object()
+        data_format = benchmark_spec.get_format()
+        if _format.format_is_binary(data_format) or _format.format_is_ion(data_format):
+            def test_fn():
+                with tempfile.TemporaryFile(mode="wb") as f:
+                    return loader_dumper.dump(data_obj, f)
+        else:
+            def test_fn():
+                with tempfile.TemporaryFile(mode="wt") as f:
+                    return loader_dumper.dump(data_obj, f)
+
+    else:
+        raise NotImplementedError(f"Argument combination not supported: {match_arg}")
+
+    return test_fn
+
+
+def _trace_memory_allocation(test_fn, *args, **kwargs):
+    """
+    Measure the memory allocations in bytes for a single invocation of test_fn
+    """
+    gc.disable()
+    tracemalloc.start()
+    test_fn(*args, **kwargs)
+    memory_usage_peak = tracemalloc.get_traced_memory()[1]
+    tracemalloc.stop()
+    gc.enable()
+    return memory_usage_peak