Skip to content

Commit

Permalink
Add a new directive 'str_is_str=True' that keeps unprefixed string li…
Browse files Browse the repository at this point in the history
…terals and the 'str' builtin type unchanged even when 'language_level=3' is enabled.

See #2565.
  • Loading branch information
scoder committed Sep 24, 2018
1 parent b6509bf commit cea4291
Show file tree
Hide file tree
Showing 6 changed files with 133 additions and 6 deletions.
5 changes: 5 additions & 0 deletions CHANGES.rst
Expand Up @@ -34,6 +34,11 @@ Features added
* ``cython.inline()`` supports a direct ``language_level`` keyword argument that
was previously only available via a directive.

* A new directive ``str_is_str=True`` was added that keeps unprefixed string
literals as type 'str' in both Py2 and Py3, and the builtin 'str' type unchanged
even when ``language_level=3`` is enabled. This is meant to help user code to
migrate to Python 3 semantics without making support for Python 2.x difficult.

* In CPython 3.6 and later, looking up globals in the module dict is almost
as fast as looking up C globals.
(Github issue #2313)
Expand Down
9 changes: 9 additions & 0 deletions Cython/Compiler/Main.py
Expand Up @@ -94,9 +94,18 @@ def __init__(self, include_directories, compiler_directives, cpp=False,

if language_level is not None:
self.set_language_level(language_level)
if self.compiler_directives.get('str_is_str') is not None:
self.set_str_is_str(self.compiler_directives['str_is_str'])

self.gdb_debug_outputwriter = None

def set_str_is_str(self, str_is_str):
from .Future import unicode_literals
if str_is_str:
self.future_directives.discard(unicode_literals)
else:
self.future_directives.add(unicode_literals)

def set_language_level(self, level):
self.language_level = level
if level >= 3:
Expand Down
3 changes: 3 additions & 0 deletions Cython/Compiler/Options.py
Expand Up @@ -198,6 +198,7 @@ def get_directive_defaults():
'iterable_coroutine': False, # Make async coroutines backwards compatible with the old asyncio yield-from syntax.
'c_string_type': 'bytes',
'c_string_encoding': '',
'str_is_str': None, # fall back to 'language_level == 2'
'type_version_tag': True, # enables Py_TPFLAGS_HAVE_VERSION_TAG on extension types
'unraisable_tracebacks': True,
'old_style_globals': False,
Expand Down Expand Up @@ -313,6 +314,7 @@ def normalise_encoding_name(option_name, encoding):
'freelist': int,
'c_string_type': one_of('bytes', 'bytearray', 'str', 'unicode'),
'c_string_encoding': normalise_encoding_name,
'str_is_str': bool,
}

for key, val in _directive_defaults.items():
Expand Down Expand Up @@ -347,6 +349,7 @@ def normalise_encoding_name(option_name, encoding):
# Avoid scope-specific to/from_py_functions for c_string.
'c_string_type': ('module',),
'c_string_encoding': ('module',),
'str_is_str': ('module',),
'type_version_tag': ('module', 'cclass'),
'language_level': ('module',),
# globals() could conceivably be controlled at a finer granularity,
Expand Down
3 changes: 3 additions & 0 deletions Cython/Compiler/Parsing.py
Expand Up @@ -3652,6 +3652,9 @@ def p_compiler_directive_comments(s):
if 'language_level' in new_directives:
# Make sure we apply the language level already to the first token that follows the comments.
s.context.set_language_level(new_directives['language_level'])
if 'str_is_str' in new_directives:
# Make sure we apply 'str_is_str' directive already to the first token that follows the comments.
s.context.set_str_is_str(new_directives['str_is_str'])

result.update(new_directives)

Expand Down
18 changes: 12 additions & 6 deletions Cython/Compiler/Symtab.py
Expand Up @@ -21,6 +21,7 @@
from .TypeSlots import (
pyfunction_signature, pymethod_signature, richcmp_special_methods,
get_special_method_signature, get_property_accessor_signature)
from . import Future

from . import Code

Expand Down Expand Up @@ -1002,10 +1003,12 @@ def __init__(self):
cname, type = definition
self.declare_var(name, type, None, cname)

def lookup(self, name, language_level=None):
# 'language_level' is passed by ModuleScope
if language_level == 3:
if name == 'str':
def lookup(self, name, language_level=None, str_is_str=None):
# 'language_level' and 'str_is_str' are passed by ModuleScope
if name == 'str':
if str_is_str is None:
str_is_str = language_level in (None, 2)
if not str_is_str:
name = 'unicode'
return Scope.lookup(self, name)

Expand Down Expand Up @@ -1174,15 +1177,18 @@ def qualifying_scope(self):
def global_scope(self):
return self

def lookup(self, name, language_level=None):
def lookup(self, name, language_level=None, str_is_str=None):
entry = self.lookup_here(name)
if entry is not None:
return entry

if language_level is None:
language_level = self.context.language_level if self.context is not None else 3
if str_is_str is None:
str_is_str = language_level == 2 or (
self.context is not None and Future.unicode_literals not in self.context.future_directives)

return self.outer_scope.lookup(name, language_level=language_level)
return self.outer_scope.lookup(name, language_level=language_level, str_is_str=str_is_str)

def declare_tuple_type(self, pos, components):
components = tuple(components)
Expand Down
101 changes: 101 additions & 0 deletions tests/run/cython3_no_unicode_literals.pyx
@@ -0,0 +1,101 @@
# cython: language_level=3, binding=True, str_is_str=True
# mode: run
# tag: python3, str_is_str

print(end='') # test that language_level 3 applies immediately at the module start, for the first token.

__doc__ = """
>>> items = sorted(locals_function(1).items())
>>> for item in items:
... print('%s = %r' % item)
a = 1
b = 2
x = 'abc'
"""

def locals_function(a, b=2):
x = 'abc'
return locals()


### true division

def truediv(x):
"""
>>> truediv(4)
2.0
>>> truediv(3)
1.5
"""
return x / 2


def truediv_int(int x):
"""
>>> truediv_int(4)
2.0
>>> truediv_int(3)
1.5
"""
return x / 2


### Py3 feature tests

def print_function(*args):
"""
>>> print_function(1,2,3)
1 2 3
"""
print(*args) # this isn't valid Py2 syntax


str_string = "abcdefg"

def no_unicode_literals():
"""
>>> print( no_unicode_literals() )
True
abcdefg
"""
print(isinstance(str_string, str) or type(str_string))
return str_string


def str_type_is_str():
"""
>>> str_type, s = str_type_is_str()
>>> isinstance(s, type(str_string)) or (s, str_type)
True
>>> isinstance(s, str_type) or (s, str_type)
True
>>> isinstance(str_string, str_type) or str_type
True
"""
cdef str s = 'abc'
return str, s


def annotation_syntax(a: "test new test", b : "other" = 2, *args: "ARGS", **kwargs: "KWARGS") -> "ret":
"""
>>> annotation_syntax(1)
3
>>> annotation_syntax(1,3)
4

>>> len(annotation_syntax.__annotations__)
5
>>> annotation_syntax.__annotations__['a']
'test new test'
>>> annotation_syntax.__annotations__['b']
'other'
>>> annotation_syntax.__annotations__['args']
'ARGS'
>>> annotation_syntax.__annotations__['kwargs']
'KWARGS'
>>> annotation_syntax.__annotations__['return']
'ret'
"""
result : int = a + b

return result

0 comments on commit cea4291

Please sign in to comment.