Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Speed up fstrings #5866

Merged
merged 6 commits into from
Nov 25, 2023
Merged
Show file tree
Hide file tree
Changes from 2 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
150 changes: 61 additions & 89 deletions Cython/Compiler/ExprNodes.py
Original file line number Diff line number Diff line change
Expand Up @@ -1746,36 +1746,25 @@ def coerce_to_boolean(self, env):
bool_value = bool(self.value)
return BoolNode(self.pos, value=bool_value, constant_result=bool_value)

def estimate_max_charval(self):
# most strings will be ASCII
if self.value.isascii():
return 127
# ... or at least Latin-1
try:
scoder marked this conversation as resolved.
Show resolved Hide resolved
self.value.encode('iso8859-1')
return 255
except UnicodeEncodeError:
pass
# not ISO8859-1 => check BMP limit
return 65535 if ord(max(self.value)) <= 65535 else 1114111

def contains_surrogates(self):
return StringEncoding.string_contains_surrogates(self.value)

def generate_evaluation_code(self, code):
if self.type.is_pyobject:
# FIXME: this should go away entirely!
# Since string_contains_lone_surrogates() returns False for surrogate pairs in Py2/UCS2,
# Py2 can generate different code from Py3 here. Let's hope we get away with claiming that
# the processing of surrogate pairs in code was always ambiguous and lead to different results
# on P16/32bit Unicode platforms.
if StringEncoding.string_contains_lone_surrogates(self.value):
# lone (unpaired) surrogates are not really portable and cannot be
# decoded by the UTF-8 codec in Py3.3
self.result_code = code.get_py_const(py_object_type, 'ustring')
data_cname = code.get_string_const(
StringEncoding.BytesLiteral(self.value.encode('unicode_escape')))
const_code = code.get_cached_constants_writer(self.result_code)
if const_code is None:
return # already initialised
const_code.mark_pos(self.pos)
const_code.putln(
"%s = PyUnicode_DecodeUnicodeEscape(%s, sizeof(%s) - 1, NULL); %s" % (
self.result_code,
data_cname,
data_cname,
const_code.error_goto_if_null(self.result_code, self.pos)))
const_code.put_error_if_neg(
self.pos, "__Pyx_PyUnicode_READY(%s)" % self.result_code)
else:
self.result_code = code.get_py_string_const(self.value)
self.result_code = code.get_py_string_const(self.value)
else:
self.result_code = code.get_pyunicode_ptr_const(self.value)

Expand Down Expand Up @@ -3581,85 +3570,68 @@ def may_be_none(self):
def generate_evaluation_code(self, code):
code.mark_pos(self.pos)
num_items = len(self.values)
list_var = code.funcstate.allocate_temp(py_object_type, manage_ref=True)
ulength_var = code.funcstate.allocate_temp(PyrexTypes.c_py_ssize_t_type, manage_ref=False)
max_char_var = code.funcstate.allocate_temp(PyrexTypes.c_py_ucs4_type, manage_ref=False)
use_stack_memory = num_items < 32

code.putln('%s = PyTuple_New(%s); %s' % (
list_var,
num_items,
code.error_goto_if_null(list_var, self.pos)))
code.put_gotref(list_var, py_object_type)
code.putln("%s = 0;" % ulength_var)
code.putln("%s = 127;" % max_char_var) # at least ASCII character range
unknown_nodes = set()
max_char_value = 127
for node in self.values:
if isinstance(node, UnicodeNode):
max_char_value = max(max_char_value, node.estimate_max_charval())
elif isinstance(node, FormattedValueNode) and node.value.type.is_numeric:
# formatted C numbers are always ASCII
pass
else:
unknown_nodes.add(node)

for i, node in enumerate(self.values):
length_parts = []
charval_parts = [str(max_char_value)]
for node in self.values:
node.generate_evaluation_code(code)
node.make_owned_reference(code)

ulength = "__Pyx_PyUnicode_GET_LENGTH(%s)" % node.py_result()
max_char_value = "__Pyx_PyUnicode_MAX_CHAR_VALUE(%s)" % node.py_result()
is_ascii = False
if isinstance(node, UnicodeNode):
try:
# most strings will be ASCII or at least Latin-1
node.value.encode('iso8859-1')
max_char_value = '255'
node.value.encode('us-ascii')
is_ascii = True
except UnicodeEncodeError:
if max_char_value != '255':
# not ISO8859-1 => check BMP limit
max_char = max(map(ord, node.value))
if max_char < 0xD800:
# BMP-only, no surrogate pairs used
max_char_value = '65535'
ulength = str(len(node.value))
elif max_char >= 65536:
# clearly outside of BMP, and not on a 16-bit Unicode system
max_char_value = '1114111'
ulength = str(len(node.value))
else:
# not really worth implementing a check for surrogate pairs here
# drawback: C code can differ when generating on Py2 with 2-byte Unicode
pass
else:
ulength = str(len(node.value))
elif isinstance(node, FormattedValueNode) and node.value.type.is_numeric:
is_ascii = True # formatted C numbers are always ASCII
length_parts.append(str(len(node.value)))
else:
length_parts.append("__Pyx_PyUnicode_GET_LENGTH(%s)" % node.py_result())
if node in unknown_nodes:
charval_parts.append("__Pyx_PyUnicode_MAX_CHAR_VALUE(%s)" % node.py_result())

if not is_ascii:
code.putln("%s = (%s > %s) ? %s : %s;" % (
max_char_var, max_char_value, max_char_var, max_char_value, max_char_var))
code.putln("%s += %s;" % (ulength_var, ulength))
if use_stack_memory:
values_array = code.funcstate.allocate_temp(
PyrexTypes.c_array_type(PyrexTypes.py_object_type, num_items), manage_ref=False)
else:
values_array = code.funcstate.allocate_temp(
PyrexTypes.CPtrType(PyrexTypes.py_object_type), manage_ref=False)
code.putln("%s = PyMem_Calloc(%d, sizeof(PyObject*));" % (values_array, num_items))
code.putln("if (unlikely(!%s)) {" % values_array)
code.putln("PyErr_NoMemory(); %s" % code.error_goto(self.pos))
code.putln("}")

node.generate_giveref(code)
code.putln('#if CYTHON_ASSUME_SAFE_MACROS')
code.putln('PyTuple_SET_ITEM(%s, %s, %s);' % (list_var, i, node.py_result()))
code.putln('#else')
code.put_error_if_neg(
self.pos,
'PyTuple_SetItem(%s, %s, %s)' % (list_var, i, node.py_result()))
code.putln('#endif')
node.generate_post_assignment_code(code)
node.free_temps(code)
for i, node in enumerate(self.values):
code.putln('%s[%d] = %s;' % (values_array, i, node.py_result()))

code.mark_pos(self.pos)
self.allocate_temp_result(code)
code.globalstate.use_utility_code(UtilityCode.load_cached("JoinPyUnicode", "StringTools.c"))
code.putln('%s = __Pyx_PyUnicode_Join(%s, %d, %s, %s); %s' % (
code.putln('%s = __Pyx_PyUnicode_Join(%s, %d, %s, %s);' % (
self.result(),
list_var,
values_array,
num_items,
ulength_var,
max_char_var,
code.error_goto_if_null(self.py_result(), self.pos)))
' + '.join(length_parts),
# or-ing isn't entirely correct here since it can produce values > 1114111,
# but we crop that in __Pyx_PyUnicode_Join().
' | '.join(charval_parts),
))

if not use_stack_memory:
code.putln("PyMem_Free(%s);" % values_array)
code.funcstate.release_temp(values_array)

code.putln(code.error_goto_if_null(self.py_result(), self.pos))
self.generate_gotref(code)

code.put_decref_clear(list_var, py_object_type)
code.funcstate.release_temp(list_var)
code.funcstate.release_temp(ulength_var)
code.funcstate.release_temp(max_char_var)
for node in self.values:
node.generate_disposal_code(code)
node.free_temps(code)


class FormattedValueNode(ExprNode):
Expand Down
24 changes: 19 additions & 5 deletions Cython/Utility/StringTools.c
Original file line number Diff line number Diff line change
Expand Up @@ -812,34 +812,38 @@ static CYTHON_INLINE PyObject* __Pyx_PyBytes_Join(PyObject* sep, PyObject* value

/////////////// JoinPyUnicode.proto ///////////////

static PyObject* __Pyx_PyUnicode_Join(PyObject* value_tuple, Py_ssize_t value_count, Py_ssize_t result_ulength,
static PyObject* __Pyx_PyUnicode_Join(PyObject** values, Py_ssize_t value_count, Py_ssize_t result_ulength,
Py_UCS4 max_char);

/////////////// JoinPyUnicode ///////////////
//@requires: IncludeStringH
//@substitute: naming

static PyObject* __Pyx_PyUnicode_Join(PyObject* value_tuple, Py_ssize_t value_count, Py_ssize_t result_ulength,
static PyObject* __Pyx_PyUnicode_Join(PyObject** values, Py_ssize_t value_count, Py_ssize_t result_ulength,
Py_UCS4 max_char) {
#if CYTHON_USE_UNICODE_INTERNALS && CYTHON_ASSUME_SAFE_MACROS && !CYTHON_AVOID_BORROWED_REFS
PyObject *result_uval;
int result_ukind, kind_shift;
Py_ssize_t i, char_pos;
void *result_udata;

if (max_char > 1114111) max_char = 1114111;
result_uval = PyUnicode_New(result_ulength, max_char);
if (unlikely(!result_uval)) return NULL;
result_ukind = (max_char <= 255) ? PyUnicode_1BYTE_KIND : (max_char <= 65535) ? PyUnicode_2BYTE_KIND : PyUnicode_4BYTE_KIND;
kind_shift = (result_ukind == PyUnicode_4BYTE_KIND) ? 2 : result_ukind - 1;
result_udata = PyUnicode_DATA(result_uval);
assert(kind_shift == 2 || kind_shift == 1 || kind_shift == 0);

if (unlikely((PY_SSIZE_T_MAX >> kind_shift) - result_ulength < 0))
goto overflow;

char_pos = 0;
for (i=0; i < value_count; i++) {
int ukind;
Py_ssize_t ulength;
void *udata;
PyObject *uval = PyTuple_GET_ITEM(value_tuple, i);
PyObject *uval = values[i];
if (unlikely(__Pyx_PyUnicode_READY(uval)))
goto bad;
ulength = __Pyx_PyUnicode_GET_LENGTH(uval);
Expand Down Expand Up @@ -874,10 +878,20 @@ static PyObject* __Pyx_PyUnicode_Join(PyObject* value_tuple, Py_ssize_t value_co
return NULL;
#else
// non-CPython fallback
PyObject *result, *value_tuple = PyTuple_New(value_count);
Py_ssize_t i;
if (unlikely(!value_tuple)) return NULL;
CYTHON_UNUSED_VAR(max_char);
CYTHON_UNUSED_VAR(result_ulength);
CYTHON_UNUSED_VAR(value_count);
return PyUnicode_Join($empty_unicode, value_tuple);

for (i=0; i<value_count; i++) {
PyTuple_SET_ITEM(value_tuple, i, values[i]);
Py_INCREF(values[i]);
scoder marked this conversation as resolved.
Show resolved Hide resolved
}

result = PyUnicode_Join($empty_unicode, value_tuple);
Py_DECREF(value_tuple);
return result;
#endif
}

Expand Down