Skip to content

Commit

Permalink
Speed up fstrings by avoiding a tuple for joining the unicode string …
Browse files Browse the repository at this point in the history
…parts. (GH-5866)
  • Loading branch information
scoder committed Nov 25, 2023
1 parent 27095d5 commit 875293e
Show file tree
Hide file tree
Showing 2 changed files with 82 additions and 75 deletions.
130 changes: 60 additions & 70 deletions Cython/Compiler/ExprNodes.py
Original file line number Diff line number Diff line change
Expand Up @@ -1746,19 +1746,26 @@ def coerce_to_boolean(self, env):
bool_value = bool(self.value)
return BoolNode(self.pos, value=bool_value, constant_result=bool_value)

def estimate_max_charval(self):
# Most strings will probably be ASCII.
if self.value.isascii():
return 127
max_charval = ord(max(self.value))
if max_charval <= 255:
return 255
elif max_charval <= 65535:
return 65535
else:
return 1114111

def contains_surrogates(self):
return StringEncoding.string_contains_surrogates(self.value)

def generate_evaluation_code(self, code):
if self.type.is_pyobject:
# FIXME: this should go away entirely!
# Since string_contains_lone_surrogates() returns False for surrogate pairs in Py2/UCS2,
# Py2 can generate different code from Py3 here. Let's hope we get away with claiming that
# the processing of surrogate pairs in code was always ambiguous and lead to different results
# on P16/32bit Unicode platforms.
if StringEncoding.string_contains_lone_surrogates(self.value):
# lone (unpaired) surrogates are not really portable and cannot be
# decoded by the UTF-8 codec in Py3.3
# decoded by the UTF-8 codec in Py3.3+
self.result_code = code.get_py_const(py_object_type, 'ustring')
data_cname = code.get_string_const(
StringEncoding.BytesLiteral(self.value.encode('unicode_escape')))
Expand Down Expand Up @@ -3581,85 +3588,68 @@ def may_be_none(self):
def generate_evaluation_code(self, code):
code.mark_pos(self.pos)
num_items = len(self.values)
list_var = code.funcstate.allocate_temp(py_object_type, manage_ref=True)
ulength_var = code.funcstate.allocate_temp(PyrexTypes.c_py_ssize_t_type, manage_ref=False)
max_char_var = code.funcstate.allocate_temp(PyrexTypes.c_py_ucs4_type, manage_ref=False)
use_stack_memory = num_items < 32

code.putln('%s = PyTuple_New(%s); %s' % (
list_var,
num_items,
code.error_goto_if_null(list_var, self.pos)))
code.put_gotref(list_var, py_object_type)
code.putln("%s = 0;" % ulength_var)
code.putln("%s = 127;" % max_char_var) # at least ASCII character range
unknown_nodes = set()
max_char_value = 127
for node in self.values:
if isinstance(node, UnicodeNode):
max_char_value = max(max_char_value, node.estimate_max_charval())
elif isinstance(node, FormattedValueNode) and node.value.type.is_numeric:
# formatted C numbers are always ASCII
pass
else:
unknown_nodes.add(node)

for i, node in enumerate(self.values):
length_parts = []
charval_parts = [str(max_char_value)]
for node in self.values:
node.generate_evaluation_code(code)
node.make_owned_reference(code)

ulength = "__Pyx_PyUnicode_GET_LENGTH(%s)" % node.py_result()
max_char_value = "__Pyx_PyUnicode_MAX_CHAR_VALUE(%s)" % node.py_result()
is_ascii = False
if isinstance(node, UnicodeNode):
try:
# most strings will be ASCII or at least Latin-1
node.value.encode('iso8859-1')
max_char_value = '255'
node.value.encode('us-ascii')
is_ascii = True
except UnicodeEncodeError:
if max_char_value != '255':
# not ISO8859-1 => check BMP limit
max_char = max(map(ord, node.value))
if max_char < 0xD800:
# BMP-only, no surrogate pairs used
max_char_value = '65535'
ulength = str(len(node.value))
elif max_char >= 65536:
# clearly outside of BMP, and not on a 16-bit Unicode system
max_char_value = '1114111'
ulength = str(len(node.value))
else:
# not really worth implementing a check for surrogate pairs here
# drawback: C code can differ when generating on Py2 with 2-byte Unicode
pass
else:
ulength = str(len(node.value))
elif isinstance(node, FormattedValueNode) and node.value.type.is_numeric:
is_ascii = True # formatted C numbers are always ASCII
length_parts.append(str(len(node.value)))
else:
length_parts.append("__Pyx_PyUnicode_GET_LENGTH(%s)" % node.py_result())
if node in unknown_nodes:
charval_parts.append("__Pyx_PyUnicode_MAX_CHAR_VALUE(%s)" % node.py_result())

if not is_ascii:
code.putln("%s = (%s > %s) ? %s : %s;" % (
max_char_var, max_char_value, max_char_var, max_char_value, max_char_var))
code.putln("%s += %s;" % (ulength_var, ulength))
if use_stack_memory:
values_array = code.funcstate.allocate_temp(
PyrexTypes.c_array_type(PyrexTypes.py_object_type, num_items), manage_ref=False)
else:
values_array = code.funcstate.allocate_temp(
PyrexTypes.CPtrType(PyrexTypes.py_object_type), manage_ref=False)
code.putln("%s = (PyObject **) PyMem_Calloc(%d, sizeof(PyObject*));" % (values_array, num_items))
code.putln("if (unlikely(!%s)) {" % values_array)
code.putln("PyErr_NoMemory(); %s" % code.error_goto(self.pos))
code.putln("}")

node.generate_giveref(code)
code.putln('#if CYTHON_ASSUME_SAFE_MACROS')
code.putln('PyTuple_SET_ITEM(%s, %s, %s);' % (list_var, i, node.py_result()))
code.putln('#else')
code.put_error_if_neg(
self.pos,
'PyTuple_SetItem(%s, %s, %s)' % (list_var, i, node.py_result()))
code.putln('#endif')
node.generate_post_assignment_code(code)
node.free_temps(code)
for i, node in enumerate(self.values):
code.putln('%s[%d] = %s;' % (values_array, i, node.py_result()))

code.mark_pos(self.pos)
self.allocate_temp_result(code)
code.globalstate.use_utility_code(UtilityCode.load_cached("JoinPyUnicode", "StringTools.c"))
code.putln('%s = __Pyx_PyUnicode_Join(%s, %d, %s, %s); %s' % (
code.putln('%s = __Pyx_PyUnicode_Join(%s, %d, %s, %s);' % (
self.result(),
list_var,
values_array,
num_items,
ulength_var,
max_char_var,
code.error_goto_if_null(self.py_result(), self.pos)))
' + '.join(length_parts),
# or-ing isn't entirely correct here since it can produce values > 1114111,
# but we crop that in __Pyx_PyUnicode_Join().
' | '.join(charval_parts),
))

if not use_stack_memory:
code.putln("PyMem_Free(%s);" % values_array)
code.funcstate.release_temp(values_array)

code.putln(code.error_goto_if_null(self.py_result(), self.pos))
self.generate_gotref(code)

code.put_decref_clear(list_var, py_object_type)
code.funcstate.release_temp(list_var)
code.funcstate.release_temp(ulength_var)
code.funcstate.release_temp(max_char_var)
for node in self.values:
node.generate_disposal_code(code)
node.free_temps(code)


class FormattedValueNode(ExprNode):
Expand Down
27 changes: 22 additions & 5 deletions Cython/Utility/StringTools.c
Original file line number Diff line number Diff line change
Expand Up @@ -812,34 +812,38 @@ static CYTHON_INLINE PyObject* __Pyx_PyBytes_Join(PyObject* sep, PyObject* value

/////////////// JoinPyUnicode.proto ///////////////

static PyObject* __Pyx_PyUnicode_Join(PyObject* value_tuple, Py_ssize_t value_count, Py_ssize_t result_ulength,
static PyObject* __Pyx_PyUnicode_Join(PyObject** values, Py_ssize_t value_count, Py_ssize_t result_ulength,
Py_UCS4 max_char);

/////////////// JoinPyUnicode ///////////////
//@requires: IncludeStringH
//@substitute: naming

static PyObject* __Pyx_PyUnicode_Join(PyObject* value_tuple, Py_ssize_t value_count, Py_ssize_t result_ulength,
static PyObject* __Pyx_PyUnicode_Join(PyObject** values, Py_ssize_t value_count, Py_ssize_t result_ulength,
Py_UCS4 max_char) {
#if CYTHON_USE_UNICODE_INTERNALS && CYTHON_ASSUME_SAFE_MACROS && !CYTHON_AVOID_BORROWED_REFS
PyObject *result_uval;
int result_ukind, kind_shift;
Py_ssize_t i, char_pos;
void *result_udata;

if (max_char > 1114111) max_char = 1114111;
result_uval = PyUnicode_New(result_ulength, max_char);
if (unlikely(!result_uval)) return NULL;
result_ukind = (max_char <= 255) ? PyUnicode_1BYTE_KIND : (max_char <= 65535) ? PyUnicode_2BYTE_KIND : PyUnicode_4BYTE_KIND;
kind_shift = (result_ukind == PyUnicode_4BYTE_KIND) ? 2 : result_ukind - 1;
result_udata = PyUnicode_DATA(result_uval);
assert(kind_shift == 2 || kind_shift == 1 || kind_shift == 0);

if (unlikely((PY_SSIZE_T_MAX >> kind_shift) - result_ulength < 0))
goto overflow;

char_pos = 0;
for (i=0; i < value_count; i++) {
int ukind;
Py_ssize_t ulength;
void *udata;
PyObject *uval = PyTuple_GET_ITEM(value_tuple, i);
PyObject *uval = values[i];
if (unlikely(__Pyx_PyUnicode_READY(uval)))
goto bad;
ulength = __Pyx_PyUnicode_GET_LENGTH(uval);
Expand Down Expand Up @@ -874,10 +878,23 @@ static PyObject* __Pyx_PyUnicode_Join(PyObject* value_tuple, Py_ssize_t value_co
return NULL;
#else
// non-CPython fallback
Py_ssize_t i;
PyObject *result = NULL;
PyObject *value_tuple = PyTuple_New(value_count);
if (unlikely(!value_tuple)) return NULL;
CYTHON_UNUSED_VAR(max_char);
CYTHON_UNUSED_VAR(result_ulength);
CYTHON_UNUSED_VAR(value_count);
return PyUnicode_Join($empty_unicode, value_tuple);

for (i=0; i<value_count; i++) {
if (unlikely(__Pyx_PyTuple_SET_ITEM(value_tuple, i, values[i]) < 0)) goto bad;
Py_INCREF(values[i]);
}

result = PyUnicode_Join($empty_unicode, value_tuple);

bad:
Py_DECREF(value_tuple);
return result;
#endif
}

Expand Down

0 comments on commit 875293e

Please sign in to comment.