Increases benchmark execution's iteration and warm-up numbers to decr…

…eases the threshold value (#268) * Addresses a memory leak issue * Changes to MacOS for 200 iterations and 1k warm-ups * Adds a method for Ion binary/text conversion * Fixes a comparison result mismatch issue by sorting the combination list
amazon-ion · Jun 9, 2023 · 35f07ca · 35f07ca
1 parent 2694fb3
commit 35f07ca
Show file tree

Hide file tree

Showing 7 changed files with 119 additions and 51 deletions.
diff --git a/.github/workflows/performance-regression.yml b/.github/workflows/performance-regression.yml
diff --git a/amazon/ion/ioncmodule.c b/amazon/ion/ioncmodule.c
@@ -22,7 +22,6 @@
 #define ANNOTATION_MAX_LEN 50
 
 #define IONC_STREAM_READ_BUFFER_SIZE 1024*32
-#define IONC_STREAM_BYTES_READ_SIZE PyLong_FromLong(IONC_STREAM_READ_BUFFER_SIZE/4)
 
 static char _err_msg[ERR_MSG_MAX_LEN];
 
@@ -31,6 +30,7 @@ static char _err_msg[ERR_MSG_MAX_LEN];
 #define IONC_BYTES_FORMAT "y#"
 #define IONC_READ_ARGS_FORMAT "OOO"
 
+static PyObject* IONC_STREAM_BYTES_READ_SIZE;
 static PyObject* _math_module;
 
 static PyObject* _decimal_module;
@@ -1515,12 +1515,10 @@ PyObject* ionc_read(PyObject* self, PyObject *args, PyObject *kwds) {
     return exception;
 }
 
-
 /******************************************************************************
 *       Initial module                                                        *
 ******************************************************************************/
 
-
 static char ioncmodule_docs[] =
     "C extension module for ion-c.\n";
 
@@ -1548,6 +1546,7 @@ PyObject* ionc_init_module(void) {
 
     m = PyModule_Create(&moduledef);
 
+    IONC_STREAM_BYTES_READ_SIZE = PyLong_FromLong(IONC_STREAM_READ_BUFFER_SIZE/4);
     // TODO is there a destructor for modules? These should be decreffed there
      _math_module               = PyImport_ImportModule("math");
 

diff --git a/amazon/ionbenchmark/Format.py b/amazon/ionbenchmark/Format.py
@@ -1,4 +1,21 @@
+import shutil
 from enum import Enum
+import amazon.ion.simpleion as simpleion
+import os
+
+
+def file_is_ion_binary(file):
+    if os.path.splitext(file)[1] == '.10n':
+        return True
+    else:
+        return False
+
+
+def file_is_ion_text(file):
+    if os.path.splitext(file)[1] == '.ion':
+        return True
+    else:
+        return False
 
 
 def format_is_ion(format_option):
@@ -7,7 +24,7 @@ def format_is_ion(format_option):
 
 def format_is_json(format_option):
     return (format_option == Format.JSON.value) or (format_option == Format.SIMPLEJSON.value) \
-           or (format_option == Format.UJSON.value) or (format_option == Format.RAPIDJSON.value)
+        or (format_option == Format.UJSON.value) or (format_option == Format.RAPIDJSON.value)
 
 
 def format_is_cbor(format_option):
@@ -19,7 +36,37 @@ def format_is_binary(format_option):
 
 
 def rewrite_file_to_format(file, format_option):
-    return file
+    temp_file_name_base = 'temp_' + os.path.splitext(os.path.basename(file))[0]
+    if format_option == Format.ION_BINARY.value:
+        temp_file_name_suffix = '.10n'
+    elif format_option == Format.ION_TEXT.value:
+        temp_file_name_suffix = '.ion'
+    else:
+        temp_file_name_suffix = ''
+    temp_file_name = temp_file_name_base + temp_file_name_suffix
+    # Check the file path
+    if os.path.exists(temp_file_name):
+        os.remove(temp_file_name)
+
+    if format_is_ion(format_option):
+        # Write data if a conversion is required
+        if (format_option == Format.ION_BINARY.value and file_is_ion_text(file)) \
+                or (format_option == Format.ION_TEXT.value and file_is_ion_binary(file)):
+            # Load data
+            with open(file, 'br') as fp:
+                obj = simpleion.load(fp, single_value=False)
+            with open(temp_file_name, 'bw') as fp:
+                if format_option == Format.ION_BINARY.value:
+                    simpleion.dump(obj, fp, binary=True)
+                else:
+                    simpleion.dump(obj, fp, binary=False)
+        else:
+            shutil.copy(file, temp_file_name)
+    else:
+        # Copy the file
+        shutil.copy(file, temp_file_name)
+
+    return temp_file_name
 
 
 class Format(Enum):
@@ -33,4 +80,3 @@ class Format(Enum):
     CBOR = 'cbor'
     CBOR2 = 'cbor2'
     DEFAULT = 'ion_binary'
-
diff --git a/amazon/ionbenchmark/ion_benchmark_cli.py b/amazon/ionbenchmark/ion_benchmark_cli.py
@@ -14,8 +14,8 @@
 """A repeatable benchmark tool for ion-python implementation.
 
 Usage:
-    ion_python_benchmark_cli.py write [--results-file <path>] [--api <api>]... [--warmups <int>] [--c-extension <bool>] [--iterations <int>] [--format <format>]... [--io-type <io_type>]... <input_file>
-    ion_python_benchmark_cli.py read [--results-file <path>] [--api <api>]... [--iterator <bool>] [--warmups <int>] [--iterations <int>] [--c-extension <bool>] [--format <format>]... [--io-type <io_type>]... <input_file>
+    ion_python_benchmark_cli.py write [--results-file <path>] [--api <api>]... [--c-extension <bool>] [--warmups <int>] [--iterations <int>] [--format <format>]... [--io-type <io_type>]... <input_file>
+    ion_python_benchmark_cli.py read [--results-file <path>] [--api <api>]... [--iterator <bool>]  [--c-extension <bool>] [--warmups <int>] [--iterations <int>] [--format <format>]... [--io-type <io_type>]... <input_file>
     ion_python_benchmark_cli.py compare (--benchmark-result-previous <file_path>) (--benchmark-result-new <file_path>) <output_file>
     ion_python_benchmark_cli.py (-h | --help)
     ion_python_benchmark_cli.py (-v | --version)
@@ -61,7 +61,7 @@
                                         error will be raised if this option is used when multiple values are specified
                                         for other options. Not enabled by default.
 
-     -i --io-type <io_type>             The source or destination type, from the set (buffer | file). If buffer is
+     -I --io-type <io_type>             The source or destination type, from the set (buffer | file). If buffer is
                                         selected, buffers the input data in memory before reading and writes the output
                                         data to an in-memory buffer instead of a file. [default: file]
 
@@ -115,7 +115,7 @@
 
 output_file_for_benchmarking = 'dump_output'
 BENCHMARK_SCORE_KEYWORDS = ['file_size (MB)', 'total_time (s)']
-REGRESSION_THRESHOLD = 1
+REGRESSION_THRESHOLD = 0.2
 
 
 # Generates benchmark code for json/cbor/Ion load/loads APIs
@@ -594,6 +594,11 @@ def clean_up():
         os.remove(output_file_for_benchmarking)
 
 
+def clean_up_temp_file(temp_file):
+    if os.path.exists(temp_file):
+        os.remove(temp_file)
+
+
 def output_result_table(results_output, table):
     if results_output is None:
         print(tabulate(table, tablefmt='fancy_grid'))
@@ -650,7 +655,6 @@ def has_regression(results):
         relative_difference_score = each_result['relative_difference_score']
         for field in relative_difference_score:
             value_diff = relative_difference_score[field]
-            # TODO simply set the threshold to 1. Need optimization.
             if value_diff > REGRESSION_THRESHOLD:
                 return each_result['input']
     return None
@@ -688,7 +692,7 @@ def ion_python_benchmark_cli(arguments):
     # option_configuration is used for tracking options may show up multiple times.
     option_configuration = [api, format_option, io_type]
     option_configuration_combination = list(itertools.product(*option_configuration))
-
+    option_configuration_combination.sort()
     # initialize benchmark report table
     table = identify_report_table(command)
 
@@ -700,7 +704,7 @@ def ion_python_benchmark_cli(arguments):
         # TODO. currently, we must provide the tool to convert to a corresponding file format for read benchmarking.
         #  For example, we must provide a CBOR file for CBOR APIs benchmarking. We cannot benchmark CBOR APIs by giving
         #  a JSON file. Lack of format conversion prevents us from benchmarking different formats concurrently.
-        file = rewrite_file_to_format(file, format_option)
+        temp_file = rewrite_file_to_format(file, format_option)
 
         # Generate microbenchmark API according to read/write command
         if format_is_ion(format_option):
@@ -720,12 +724,13 @@ def ion_python_benchmark_cli(arguments):
             raise Exception(f'Invalid format option {format_option}.')
 
         if command == 'read':
-            read_micro_benchmark_and_profiling(table, micro_benchmark_function, iterations, warmups, file,
+            read_micro_benchmark_and_profiling(table, micro_benchmark_function, iterations, warmups, temp_file,
                                                c_extension, binary, iterator, each_option, io_type, command=command)
         else:
-            write_micro_benchmark_and_profiling(table, micro_benchmark_function, iterations, warmups, file,
+            write_micro_benchmark_and_profiling(table, micro_benchmark_function, iterations, warmups, temp_file,
                                                 c_extension, binary, each_option, io_type, command=command)
 
+        clean_up_temp_file(temp_file)
     # If the `--results-file` is set, write the final results table to the destination file in Ion. Otherwise, print the
     # results in stdout.
     output_result_table(results_output, table)

diff --git a/ion-c b/ion-c
diff --git a/tests/benchmark_sample_data/integers.10n b/tests/benchmark_sample_data/integers.10n
diff --git a/tests/test_benchmark_cli.py b/tests/test_benchmark_cli.py
@@ -10,7 +10,7 @@
 from amazon.ion import simpleion
 from amazon.ion.equivalence import ion_equals
 from amazon.ionbenchmark import ion_benchmark_cli, Format, Io_type
-from amazon.ionbenchmark.Format import format_is_ion, format_is_cbor, format_is_json
+from amazon.ionbenchmark.Format import format_is_ion, format_is_cbor, format_is_json, rewrite_file_to_format
 from amazon.ionbenchmark.ion_benchmark_cli import generate_read_test_code, \
     generate_write_test_code, ion_python_benchmark_cli, output_result_table, REGRESSION_THRESHOLD
 from amazon.ionbenchmark.util import str_to_bool, TOOL_VERSION
@@ -302,9 +302,9 @@ def test_write_io_type(f):
 )
 def test_read_io_type(f):
     table = execution_with_command(
-        ['read', generate_test_path('integers.ion'), '--io-type', f'{f}', '--format', 'json', '--format', 'ion_binary'])
+        ['read', generate_test_path('integers.ion'), '--io-type', f'{f}', '--format', 'ion_text', '--format', 'ion_binary'])
     assert gather_all_options_in_list(table) == sorted(
-        [('load_dump', 'json', f'{f}'), ('load_dump', 'ion_binary', f'{f}')])
+        [('load_dump', 'ion_text', f'{f}'), ('load_dump', 'ion_binary', f'{f}')])
 
 
 @parametrize(
@@ -415,3 +415,15 @@ def test_compare_big_gap_with_regression():
         os.remove(generate_test_path('compare_output'))
     assert res[0].get('relative_difference_score').get('total_time (s)') > REGRESSION_THRESHOLD
     assert reg_f == 'integers.ion'
+
+
+def test_format_conversion_ion_binary_to_ion_text():
+    rewrite_file_to_format(generate_test_path('integers.ion'), Format.Format.ION_BINARY.value)
+    assert os.path.exists('temp_integers.10n')
+    os.remove('temp_integers.10n')
+
+
+def test_format_conversion_ion_text_to_ion_binary():
+    rewrite_file_to_format(generate_test_path('integers.10n'), Format.Format.ION_TEXT.value)
+    assert os.path.exists('temp_integers.ion')
+    os.remove('temp_integers.ion')