Skip to content

Commit

Permalink
Changes bytes read size to avoid unicode/UTF-8 conversion issue. (#216)
Browse files Browse the repository at this point in the history
  • Loading branch information
cheqianh committed Aug 19, 2022
1 parent 36e3728 commit ee43553
Show file tree
Hide file tree
Showing 2 changed files with 13 additions and 5 deletions.
8 changes: 3 additions & 5 deletions amazon/ion/ioncmodule.c
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,8 @@
#define FIELD_NAME_MAX_LEN 1000
#define ANNOTATION_MAX_LEN 50

#define IONC_STREAM_READ_BUFFER_SIZE 1024
#define IONC_STREAM_READ_BUFFER_SIZE 1024*32
#define IONC_STREAM_BYTES_READ_SIZE PyLong_FromLong(IONC_STREAM_READ_BUFFER_SIZE/4)

static char _err_msg[ERR_MSG_MAX_LEN];

Expand Down Expand Up @@ -87,8 +88,6 @@ static PyObject* _py_symboltoken_constructor;
static PyObject* _exception_module;
static PyObject* _ion_exception_cls;
static decContext dec_context;
static PyObject *_arg_read_size;


typedef struct {
PyObject *py_file; // a TextIOWrapper-like object
Expand Down Expand Up @@ -1400,7 +1399,7 @@ iERR ion_read_file_stream_handler(struct _ion_user_stream *pstream) {
Py_ssize_t size;
_ION_READ_STREAM_HANDLE *stream_handle = (_ION_READ_STREAM_HANDLE *) pstream->handler_state;
PyObject *py_buffer_as_bytes = NULL;
PyObject *py_buffer = PyObject_CallMethod(stream_handle->py_file, "read", "O", _arg_read_size);
PyObject *py_buffer = PyObject_CallMethod(stream_handle->py_file, "read", "O", IONC_STREAM_BYTES_READ_SIZE);

if (py_buffer == NULL) {
pstream->limit = NULL;
Expand Down Expand Up @@ -1669,7 +1668,6 @@ PyObject* ionc_init_module(void) {

decContextDefault(&dec_context, DEC_INIT_DECQUAD); //The writer already had one of these, but it's private.

_arg_read_size = PyLong_FromLong(IONC_STREAM_READ_BUFFER_SIZE);
return m;
}

Expand Down
10 changes: 10 additions & 0 deletions tests/test_simpleion.py
Original file line number Diff line number Diff line change
Expand Up @@ -704,3 +704,13 @@ def test_dumps_omit_version_marker():
assert dumps(v) == b'\xe0\x01\x00\xea\x21\x05'
assert dumps(v, omit_version_marker=True) == b'\xe0\x01\x00\xea\x21\x05'


# See issue https://github.com/amzn/ion-python/issues/213
def test_loads_unicode_utf8_conversion():
# Generates test data that more than 1024*32 bytes
data = "[ '''\u2013''',"
data += 'test,' * 100000
data += "]"
# Loads API should convert it to UTF-8 without illegal bytes number read exception.
loads(data, parse_eagerly=True)

0 comments on commit ee43553

Please sign in to comment.