Skip to content

Commit

Permalink
Increases benchmark execution's iteration and warm-up numbers to decr…
Browse files Browse the repository at this point in the history
…eases the threshold value (#268)

* Addresses a memory leak issue
* Changes to MacOS for 200 iterations and 1k warm-ups
* Adds a method for Ion binary/text conversion
* Fixes a comparison result mismatch issue by sorting the combination list
  • Loading branch information
cheqianh committed Jun 9, 2023
1 parent 2694fb3 commit 35f07ca
Show file tree
Hide file tree
Showing 7 changed files with 119 additions and 51 deletions.
70 changes: 38 additions & 32 deletions .github/workflows/performance-regression.yml

Large diffs are not rendered by default.

5 changes: 2 additions & 3 deletions amazon/ion/ioncmodule.c
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,6 @@
#define ANNOTATION_MAX_LEN 50

#define IONC_STREAM_READ_BUFFER_SIZE 1024*32
#define IONC_STREAM_BYTES_READ_SIZE PyLong_FromLong(IONC_STREAM_READ_BUFFER_SIZE/4)

static char _err_msg[ERR_MSG_MAX_LEN];

Expand All @@ -31,6 +30,7 @@ static char _err_msg[ERR_MSG_MAX_LEN];
#define IONC_BYTES_FORMAT "y#"
#define IONC_READ_ARGS_FORMAT "OOO"

static PyObject* IONC_STREAM_BYTES_READ_SIZE;
static PyObject* _math_module;

static PyObject* _decimal_module;
Expand Down Expand Up @@ -1515,12 +1515,10 @@ PyObject* ionc_read(PyObject* self, PyObject *args, PyObject *kwds) {
return exception;
}


/******************************************************************************
* Initial module *
******************************************************************************/


static char ioncmodule_docs[] =
"C extension module for ion-c.\n";

Expand Down Expand Up @@ -1548,6 +1546,7 @@ PyObject* ionc_init_module(void) {

m = PyModule_Create(&moduledef);

IONC_STREAM_BYTES_READ_SIZE = PyLong_FromLong(IONC_STREAM_READ_BUFFER_SIZE/4);
// TODO is there a destructor for modules? These should be decreffed there
_math_module = PyImport_ImportModule("math");

Expand Down
52 changes: 49 additions & 3 deletions amazon/ionbenchmark/Format.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,21 @@
import shutil
from enum import Enum
import amazon.ion.simpleion as simpleion
import os


def file_is_ion_binary(file):
if os.path.splitext(file)[1] == '.10n':
return True
else:
return False


def file_is_ion_text(file):
if os.path.splitext(file)[1] == '.ion':
return True
else:
return False


def format_is_ion(format_option):
Expand All @@ -7,7 +24,7 @@ def format_is_ion(format_option):

def format_is_json(format_option):
return (format_option == Format.JSON.value) or (format_option == Format.SIMPLEJSON.value) \
or (format_option == Format.UJSON.value) or (format_option == Format.RAPIDJSON.value)
or (format_option == Format.UJSON.value) or (format_option == Format.RAPIDJSON.value)


def format_is_cbor(format_option):
Expand All @@ -19,7 +36,37 @@ def format_is_binary(format_option):


def rewrite_file_to_format(file, format_option):
return file
temp_file_name_base = 'temp_' + os.path.splitext(os.path.basename(file))[0]
if format_option == Format.ION_BINARY.value:
temp_file_name_suffix = '.10n'
elif format_option == Format.ION_TEXT.value:
temp_file_name_suffix = '.ion'
else:
temp_file_name_suffix = ''
temp_file_name = temp_file_name_base + temp_file_name_suffix
# Check the file path
if os.path.exists(temp_file_name):
os.remove(temp_file_name)

if format_is_ion(format_option):
# Write data if a conversion is required
if (format_option == Format.ION_BINARY.value and file_is_ion_text(file)) \
or (format_option == Format.ION_TEXT.value and file_is_ion_binary(file)):
# Load data
with open(file, 'br') as fp:
obj = simpleion.load(fp, single_value=False)
with open(temp_file_name, 'bw') as fp:
if format_option == Format.ION_BINARY.value:
simpleion.dump(obj, fp, binary=True)
else:
simpleion.dump(obj, fp, binary=False)
else:
shutil.copy(file, temp_file_name)
else:
# Copy the file
shutil.copy(file, temp_file_name)

return temp_file_name


class Format(Enum):
Expand All @@ -33,4 +80,3 @@ class Format(Enum):
CBOR = 'cbor'
CBOR2 = 'cbor2'
DEFAULT = 'ion_binary'

23 changes: 14 additions & 9 deletions amazon/ionbenchmark/ion_benchmark_cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,8 +14,8 @@
"""A repeatable benchmark tool for ion-python implementation.
Usage:
ion_python_benchmark_cli.py write [--results-file <path>] [--api <api>]... [--warmups <int>] [--c-extension <bool>] [--iterations <int>] [--format <format>]... [--io-type <io_type>]... <input_file>
ion_python_benchmark_cli.py read [--results-file <path>] [--api <api>]... [--iterator <bool>] [--warmups <int>] [--iterations <int>] [--c-extension <bool>] [--format <format>]... [--io-type <io_type>]... <input_file>
ion_python_benchmark_cli.py write [--results-file <path>] [--api <api>]... [--c-extension <bool>] [--warmups <int>] [--iterations <int>] [--format <format>]... [--io-type <io_type>]... <input_file>
ion_python_benchmark_cli.py read [--results-file <path>] [--api <api>]... [--iterator <bool>] [--c-extension <bool>] [--warmups <int>] [--iterations <int>] [--format <format>]... [--io-type <io_type>]... <input_file>
ion_python_benchmark_cli.py compare (--benchmark-result-previous <file_path>) (--benchmark-result-new <file_path>) <output_file>
ion_python_benchmark_cli.py (-h | --help)
ion_python_benchmark_cli.py (-v | --version)
Expand Down Expand Up @@ -61,7 +61,7 @@
error will be raised if this option is used when multiple values are specified
for other options. Not enabled by default.
-i --io-type <io_type> The source or destination type, from the set (buffer | file). If buffer is
-I --io-type <io_type> The source or destination type, from the set (buffer | file). If buffer is
selected, buffers the input data in memory before reading and writes the output
data to an in-memory buffer instead of a file. [default: file]
Expand Down Expand Up @@ -115,7 +115,7 @@

output_file_for_benchmarking = 'dump_output'
BENCHMARK_SCORE_KEYWORDS = ['file_size (MB)', 'total_time (s)']
REGRESSION_THRESHOLD = 1
REGRESSION_THRESHOLD = 0.2


# Generates benchmark code for json/cbor/Ion load/loads APIs
Expand Down Expand Up @@ -594,6 +594,11 @@ def clean_up():
os.remove(output_file_for_benchmarking)


def clean_up_temp_file(temp_file):
if os.path.exists(temp_file):
os.remove(temp_file)


def output_result_table(results_output, table):
if results_output is None:
print(tabulate(table, tablefmt='fancy_grid'))
Expand Down Expand Up @@ -650,7 +655,6 @@ def has_regression(results):
relative_difference_score = each_result['relative_difference_score']
for field in relative_difference_score:
value_diff = relative_difference_score[field]
# TODO simply set the threshold to 1. Need optimization.
if value_diff > REGRESSION_THRESHOLD:
return each_result['input']
return None
Expand Down Expand Up @@ -688,7 +692,7 @@ def ion_python_benchmark_cli(arguments):
# option_configuration is used for tracking options may show up multiple times.
option_configuration = [api, format_option, io_type]
option_configuration_combination = list(itertools.product(*option_configuration))

option_configuration_combination.sort()
# initialize benchmark report table
table = identify_report_table(command)

Expand All @@ -700,7 +704,7 @@ def ion_python_benchmark_cli(arguments):
# TODO. currently, we must provide the tool to convert to a corresponding file format for read benchmarking.
# For example, we must provide a CBOR file for CBOR APIs benchmarking. We cannot benchmark CBOR APIs by giving
# a JSON file. Lack of format conversion prevents us from benchmarking different formats concurrently.
file = rewrite_file_to_format(file, format_option)
temp_file = rewrite_file_to_format(file, format_option)

# Generate microbenchmark API according to read/write command
if format_is_ion(format_option):
Expand All @@ -720,12 +724,13 @@ def ion_python_benchmark_cli(arguments):
raise Exception(f'Invalid format option {format_option}.')

if command == 'read':
read_micro_benchmark_and_profiling(table, micro_benchmark_function, iterations, warmups, file,
read_micro_benchmark_and_profiling(table, micro_benchmark_function, iterations, warmups, temp_file,
c_extension, binary, iterator, each_option, io_type, command=command)
else:
write_micro_benchmark_and_profiling(table, micro_benchmark_function, iterations, warmups, file,
write_micro_benchmark_and_profiling(table, micro_benchmark_function, iterations, warmups, temp_file,
c_extension, binary, each_option, io_type, command=command)

clean_up_temp_file(temp_file)
# If the `--results-file` is set, write the final results table to the destination file in Ion. Otherwise, print the
# results in stdout.
output_result_table(results_output, table)
Expand Down
Binary file added tests/benchmark_sample_data/integers.10n
Binary file not shown.
18 changes: 15 additions & 3 deletions tests/test_benchmark_cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@
from amazon.ion import simpleion
from amazon.ion.equivalence import ion_equals
from amazon.ionbenchmark import ion_benchmark_cli, Format, Io_type
from amazon.ionbenchmark.Format import format_is_ion, format_is_cbor, format_is_json
from amazon.ionbenchmark.Format import format_is_ion, format_is_cbor, format_is_json, rewrite_file_to_format
from amazon.ionbenchmark.ion_benchmark_cli import generate_read_test_code, \
generate_write_test_code, ion_python_benchmark_cli, output_result_table, REGRESSION_THRESHOLD
from amazon.ionbenchmark.util import str_to_bool, TOOL_VERSION
Expand Down Expand Up @@ -302,9 +302,9 @@ def test_write_io_type(f):
)
def test_read_io_type(f):
table = execution_with_command(
['read', generate_test_path('integers.ion'), '--io-type', f'{f}', '--format', 'json', '--format', 'ion_binary'])
['read', generate_test_path('integers.ion'), '--io-type', f'{f}', '--format', 'ion_text', '--format', 'ion_binary'])
assert gather_all_options_in_list(table) == sorted(
[('load_dump', 'json', f'{f}'), ('load_dump', 'ion_binary', f'{f}')])
[('load_dump', 'ion_text', f'{f}'), ('load_dump', 'ion_binary', f'{f}')])


@parametrize(
Expand Down Expand Up @@ -415,3 +415,15 @@ def test_compare_big_gap_with_regression():
os.remove(generate_test_path('compare_output'))
assert res[0].get('relative_difference_score').get('total_time (s)') > REGRESSION_THRESHOLD
assert reg_f == 'integers.ion'


def test_format_conversion_ion_binary_to_ion_text():
rewrite_file_to_format(generate_test_path('integers.ion'), Format.Format.ION_BINARY.value)
assert os.path.exists('temp_integers.10n')
os.remove('temp_integers.10n')


def test_format_conversion_ion_text_to_ion_binary():
rewrite_file_to_format(generate_test_path('integers.10n'), Format.Format.ION_TEXT.value)
assert os.path.exists('temp_integers.ion')
os.remove('temp_integers.ion')

0 comments on commit 35f07ca

Please sign in to comment.