Skip to content

HTTPS clone URL

Subversion checkout URL

You can clone with HTTPS or Subversion.

Download ZIP
Browse files

better selection of variable names

replace non-id characters with unicode names and add hash for long strings
  • Loading branch information...
commit dd95489a24fe8dce2b570b23b6c571d5135b5e08 1 parent cbaddd2
@denik authored
Showing with 61 additions and 3 deletions.
  1. +61 −3 Cython/Compiler/Code.py
View
64 Cython/Compiler/Code.py
@@ -344,7 +344,7 @@ def __init__(self, cname, type):
replace_identifier=object, find_alphanums=object)
possible_unicode_identifier = re.compile(ur"(?![0-9])\w+$", re.U).match
possible_bytes_identifier = re.compile(r"(?![0-9])\w+$".encode('ASCII')).match
-replace_identifier = re.compile(r'[^a-zA-Z0-9_]+').sub
+replace_identifier = re.compile(r'[^a-zA-Z0-9_]+').subn
find_alphanums = re.compile('([a-zA-Z0-9]+)').findall
class StringConst(object):
@@ -707,10 +707,47 @@ def new_int_const_cname(self, value, longness):
cname = cname.replace('-', 'neg_').replace('.','_')
return cname
- def new_const_cname(self, prefix='', value=''):
+ def new_const_cname(self, prefix='', value='', limit=32):
+ from hashlib import md5
@scoder
scoder added a note

hashlib isn't available in Py2.4

@denik Owner
denik added a note

fixed: fixed here: 005b3ee

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
+ from base64 import b64encode
if hasattr(value, 'decode'):
value = value.decode('ASCII', 'ignore')
- value = replace_identifier('_', value)[:32].strip('_')
+ orig_value = value
+ need_hash = False
+
+ def repl(m):
+ chars = []
+ for c in m.group():
+ chars.append(short_unicode_name(c))
+ if chars:
+ result = '_'.join(chars)
+ if m.start() > 0:
+ result = '_' + result
+ if m.end() < len(value):
+ result = result + '_'
+ return result
+ return '_'
+
+ if len(value) > limit:
+ need_hash = True
+
+ value, n = replace_identifier(repl, value[:limit])
+ if len(value) >= limit:
+ need_hash = True
+ if n:
+ value = '_' + value[:limit - 1]
+
+ if need_hash:
+ digest = b64encode(md5(orig_value).digest()).replace('+', '=').replace('/', '=').replace('=', '')
@robertwb
robertwb added a note

hexdigest()

fixed here: 005b3ee
also simplified

@denik Owner
denik added a note

Yeah, I guess that would work too :)

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
+ length = len(digest)
+
+ for length in xrange(4, len(digest)):
+ if (value + '_' + digest[:length]) not in self.const_cname_counters:
+ value = value[:limit - length - 1] + '_' + digest[:length]
@robertwb
robertwb added a note

If we need a lot of the hash, I'm OK with not shortening the value portion for readability. (Same below.) As long as the expected length is small and maximum length is not unbounded then I think that's good.

@denik Owner
denik added a note

OK, so what if we just get rid of the for-loop and use 4 or 5 characters of hash? I think it will work for all cases. Counter below will stay for the exceptional case when it won't.

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
+ break
+ else:
+ value = value[:limit - 1 - len(digest)] + '_' + digest
+
c = self.const_cname_counters
c[value] = c.setdefault(value, 0) + 1
if c[value] == 1:
@@ -922,6 +959,27 @@ def use_utility_code(self, utility_code):
utility_code.put_code(self)
+def short_unicode_name(char, shortcut={'SPACE': 'SP',
+ 'HYPHEN-MINUS': 'HYPHEN',
+ 'PERCENT': 'PCNT'}):
+ import unicodedata
+ name = unicodedata.name(char, '').replace(' SIGN', '')
+ if name:
+ name = shortcut.get(name, name)
+ name = name.replace('-', ' ')
+ if ' ' in name:
+ # "LEFT PARENTHESIS" => "LP"
+ name = ''.join(word[:1] for word in name.split(' '))
+ name, _ = replace_identifier('_', name)
+ else:
+ name = repr(char).lstrip('u').strip("'").strip('"').lstrip('\\')
+ if name:
+ name, _ = replace_identifier('_', name)
+ else:
+ name = '_'
+ return name
+
+
def funccontext_property(name):
try:
import operator
Please sign in to comment.
Something went wrong with that request. Please try again.