Permalink
Browse files

Remove runtime use of u()

u() is slower than unicod(), and u() is only needed
for string literals with \uXX escapes in them.
Therefore use unicod() wherever possible, and make
explicit strings used at runtime into constants.
  • Loading branch information...
1 parent 0fd60a0 commit 319c33ed67753593f7562f200cfe397cfc52c835 @daviddrysdale committed Dec 28, 2011
@@ -26,7 +26,7 @@
# limitations under the License.
import re
-from .util import u
+from .util import u, unicod, U_EMPTY_STRING
from .unicode_util import digit as unicode_digit
from .re_util import fullmatch
from .phonemetadata import PhoneMetadata
@@ -35,26 +35,28 @@
from .phonenumberutil import _extract_country_code, region_code_for_country_code
from .phonenumberutil import country_code_for_region
-_EMPTY_METADATA = PhoneMetadata(id=u(""), international_prefix=u("NA"), register=False)
+_EMPTY_METADATA = PhoneMetadata(id=unicod(""),
+ international_prefix=unicod("NA"),
+ register=False)
# A pattern that is used to match character classes in regular expressions. An
# example of a character class is [1-4].
-_CHARACTER_CLASS_PATTERN = re.compile(u("\\[([^\\[\\]])*\\]"))
+_CHARACTER_CLASS_PATTERN = re.compile(unicod("\\[([^\\[\\]])*\\]"))
# Any digit in a regular expression that actually denotes a digit. For
# example, in the regular expression 80[0-2]\d{6,10}, the first 2 digits (8
# and 0) are standalone digits, but the rest are not.
# Two look-aheads are needed because the number following \\d could be a
# two-digit number, since the phone number can be as long as 15 digits.
-_STANDALONE_DIGIT_PATTERN = re.compile(u("\\d(?=[^,}][^,}])"))
+_STANDALONE_DIGIT_PATTERN = re.compile(unicod("\\d(?=[^,}][^,}])"))
# A pattern that is used to determine if a number_format under
# available_formats is eligible to be used by the AYTF. It is eligible when
# the format element under number_format contains groups of the dollar sign
# followed by a single digit, separated by valid phone number
# punctuation. This prevents invalid punctuation (such as the star sign in
# Israeli star numbers) getting into the output of the AYTF.
-_ELIGIBLE_FORMAT_PATTERN = re.compile(u("[") + _VALID_PUNCTUATION + u("]*") +
- u("(\\\\\\d") + u("[") + _VALID_PUNCTUATION + u("]*)+"))
+_ELIGIBLE_FORMAT_PATTERN = re.compile(unicod("[") + _VALID_PUNCTUATION + unicod("]*") +
+ unicod("(\\\\\\d") + unicod("[") + _VALID_PUNCTUATION + unicod("]*)+"))
# This is the minimum length of national number accrued that is required to
# trigger the formatter. The first element of the leading_digits_pattern of each
@@ -185,7 +187,7 @@ def _get_formatting_template(self, number_pattern, number_format):
# entered so far is longer than the maximum the current formatting
# rule can accommodate.
if len(a_phone_number) < len(self._national_number):
- return u("")
+ return U_EMPTY_STRING
# Formats the number according to number_format
template = re.sub(number_pattern, number_format, a_phone_number)
# Replaces each digit with character _DIGIT_PLACEHOLDER
@@ -494,7 +496,7 @@ def _normalize_and_accrue_digits_and_plus_sign(self, next_char, remember_positio
else:
next_digit = unicode_digit(next_char, -1)
if next_digit != -1:
- normalized_char = u(next_digit)
+ normalized_char = unicod(next_digit)
else: # pragma no cover
normalized_char = next_char
self._accrued_input_without_formatting += normalized_char
@@ -44,7 +44,7 @@
# See the License for the specific language governing permissions and
# limitations under the License.
-from .util import prnt, u
+from .util import prnt, U_EMPTY_STRING
from .phonenumberutil import format_number, PhoneNumberFormat, is_valid_number
from .phonenumberutil import region_code_for_number
@@ -122,8 +122,8 @@ def area_description_for_number(numobj, lang, script=None, region=None):
if name is not None:
return name
else:
- return u("")
- return u("")
+ return U_EMPTY_STRING
+ return U_EMPTY_STRING
def country_name_for_number(numobj, lang, script=None, region=None):
@@ -156,7 +156,7 @@ def region_display_name(region_code, lang, script=None, region=None):
other_lang = name[1:]
name = LOCALE_DATA[region_code].get(other_lang, "")
return name
- return u("")
+ return U_EMPTY_STRING
def description_for_valid_number(numobj, lang, script=None, region=None):
@@ -124,17 +124,17 @@ def __repr__(self):
def __unicode__(self):
# Generate a string that is valid Python input for the constructor.
# Note that we use rpr (variant of repr), which generates its own quotes.
- result = u("NumberFormat(pattern=%s, format=%s") % (rpr(self.pattern), rpr(self.format))
+ result = unicod("NumberFormat(pattern=%s, format=%s") % (rpr(self.pattern), rpr(self.format))
if len(self.leading_digits_pattern) > 0:
- result += (u(", leading_digits_pattern=[%s]") %
- ", ".join([rpr(ld) for ld in self.leading_digits_pattern]))
+ result += (unicod(", leading_digits_pattern=[%s]") %
+ unicod(", ").join([rpr(ld) for ld in self.leading_digits_pattern]))
if self.national_prefix_formatting_rule is not None:
- result += u(", national_prefix_formatting_rule=%s") % rpr(self.national_prefix_formatting_rule)
+ result += unicod(", national_prefix_formatting_rule=%s") % rpr(self.national_prefix_formatting_rule)
if self.national_prefix_optional_when_formatting:
- result += u(", national_prefix_optional_when_formatting=%s") % str(self.national_prefix_optional_when_formatting)
+ result += unicod(", national_prefix_optional_when_formatting=%s") % str(self.national_prefix_optional_when_formatting)
if self.domestic_carrier_code_formatting_rule is not None:
- result += u(", domestic_carrier_code_formatting_rule=%s") % rpr(self.domestic_carrier_code_formatting_rule)
- result += u(")")
+ result += unicod(", domestic_carrier_code_formatting_rule=%s") % rpr(self.domestic_carrier_code_formatting_rule)
+ result += unicod(")")
return result
@@ -185,18 +185,18 @@ def __repr__(self):
def __unicode__(self):
# Generate a string that is valid Python input for constructor
- result = u("PhoneNumberDesc(")
- sep = u("")
+ result = unicod("PhoneNumberDesc(")
+ sep = unicod("")
if self.national_number_pattern is not None:
- result += u("%snational_number_pattern=%s") % (sep, rpr(self.national_number_pattern))
- sep = u(", ")
+ result += unicod("%snational_number_pattern=%s") % (sep, rpr(self.national_number_pattern))
+ sep = unicod(", ")
if self.possible_number_pattern is not None:
- result += u("%spossible_number_pattern=%s") % (sep, rpr(self.possible_number_pattern))
- sep = u(", ")
+ result += unicod("%spossible_number_pattern=%s") % (sep, rpr(self.possible_number_pattern))
+ sep = unicod(", ")
if self.example_number is not None:
- result += u("%sexample_number=%s") % (sep, rpr(self.example_number))
- sep = u(", ")
- result += u(")")
+ result += unicod("%sexample_number=%s") % (sep, rpr(self.example_number))
+ sep = unicod(", ")
+ result += unicod(")")
return result
@@ -416,42 +416,42 @@ def __unicode__(self):
country_code = self.country_code
if country_code is None:
country_code = -1
- result = (u("PhoneMetadata(id='%s', country_code=%d, international_prefix=%s") %
+ result = (unicod("PhoneMetadata(id='%s', country_code=%d, international_prefix=%s") %
(self.id, country_code, rpr(self.international_prefix)))
- result += ",\n general_desc=%s" % self.general_desc
- result += ",\n fixed_line=%s" % self.fixed_line
- result += ",\n mobile=%s" % self.mobile
- result += ",\n toll_free=%s" % self.toll_free
- result += ",\n premium_rate=%s" % self.premium_rate
- result += ",\n shared_cost=%s" % self.shared_cost
- result += ",\n personal_number=%s" % self.personal_number
- result += ",\n voip=%s" % self.voip
- result += ",\n pager=%s" % self.pager
- result += ",\n uan=%s" % self.uan
- result += ",\n emergency=%s" % self.emergency
- result += ",\n voicemail=%s" % self.voicemail
- result += ",\n no_international_dialling=%s" % self.no_international_dialling
+ result += unicod(",\n general_desc=%s") % self.general_desc
+ result += unicod(",\n fixed_line=%s") % self.fixed_line
+ result += unicod(",\n mobile=%s") % self.mobile
+ result += unicod(",\n toll_free=%s") % self.toll_free
+ result += unicod(",\n premium_rate=%s") % self.premium_rate
+ result += unicod(",\n shared_cost=%s") % self.shared_cost
+ result += unicod(",\n personal_number=%s") % self.personal_number
+ result += unicod(",\n voip=%s") % self.voip
+ result += unicod(",\n pager=%s") % self.pager
+ result += unicod(",\n uan=%s") % self.uan
+ result += unicod(",\n emergency=%s") % self.emergency
+ result += unicod(",\n voicemail=%s") % self.voicemail
+ result += unicod(",\n no_international_dialling=%s") % self.no_international_dialling
if self.preferred_international_prefix is not None:
- result += ",\n preferred_international_prefix=%s" % rpr(self.preferred_international_prefix)
+ result += unicod(",\n preferred_international_prefix=%s") % rpr(self.preferred_international_prefix)
if self.national_prefix is not None:
- result += ",\n national_prefix=%s" % rpr(self.national_prefix)
+ result += unicod(",\n national_prefix=%s") % rpr(self.national_prefix)
if self.preferred_extn_prefix is not None:
- result += ",\n preferred_extn_prefix=%s" % rpr(self.preferred_extn_prefix)
+ result += unicod(",\n preferred_extn_prefix=%s") % rpr(self.preferred_extn_prefix)
if self.national_prefix_for_parsing is not None:
- result += ",\n national_prefix_for_parsing=%s" % rpr(self.national_prefix_for_parsing)
+ result += unicod(",\n national_prefix_for_parsing=%s") % rpr(self.national_prefix_for_parsing)
if self.national_prefix_transform_rule is not None:
# Note that we use rpr() on self.national_prefix_transform_rule, which generates its own quotes
- result += ",\n national_prefix_transform_rule=%s" % rpr(self.national_prefix_transform_rule)
+ result += unicod(",\n national_prefix_transform_rule=%s") % rpr(self.national_prefix_transform_rule)
if len(self.number_format) > 0:
- result += ",\n number_format=[%s]" % ',\n '.join(map(u, self.number_format))
+ result += unicod(",\n number_format=[%s]") % ',\n '.join(map(u, self.number_format))
if len(self.intl_number_format) > 0:
- result += ",\n intl_number_format=[%s]" % ',\n '.join(map(u, self.intl_number_format))
+ result += unicod(",\n intl_number_format=[%s]") % ',\n '.join(map(u, self.intl_number_format))
if self.main_country_for_code:
- result += ",\n main_country_for_code=True"
+ result += unicod(",\n main_country_for_code=True")
if self.leading_digits is not None:
- result += ",\n leading_digits='%s'" % self.leading_digits
+ result += unicod(",\n leading_digits='%s'") % self.leading_digits
if self.leading_zero_possible:
- result += ",\n leading_zero_possible=True"
- result += u(")")
+ result += unicod(",\n leading_zero_possible=True")
+ result += unicod(")")
return result
@@ -186,7 +186,7 @@ def __ne__(self, other):
return not self.__eq__(other)
def __repr__(self):
- return (("PhoneNumber(country_code=%s, national_number=%s, extension=%s, " +
+ return (unicod("PhoneNumber(country_code=%s, national_number=%s, extension=%s, " +
"italian_leading_zero=%s, country_code_source=%s, preferred_domestic_carrier_code=%s)") %
(self.country_code,
self.national_number,
@@ -196,16 +196,16 @@ def __repr__(self):
rpr(self.preferred_domestic_carrier_code)))
def __unicode__(self):
- result = ("Country Code: %s National Number: %s" %
+ result = (unicod("Country Code: %s National Number: %s") %
(self.country_code, self.national_number))
if self.italian_leading_zero is not None:
- result += " Leading Zero: %s" % self.italian_leading_zero
+ result += unicod(" Leading Zero: %s") % self.italian_leading_zero
if self.extension is not None:
- result += " Extension: %s" % self.extension
+ result += unicod(" Extension: %s") % self.extension
if self.country_code_source is not None:
- result += " Country Code Source: %s" % self.country_code_source
+ result += unicod(" Country Code Source: %s") % self.country_code_source
if self.preferred_domestic_carrier_code is not None:
- result += (" Preferred Domestic Carrier Code: %s" %
+ result += (unicod(" Preferred Domestic Carrier Code: %s") %
self.preferred_domestic_carrier_code)
return result
@@ -20,7 +20,8 @@
# Extra regexp function; see README
from .re_util import fullmatch
-from .util import UnicodeMixin, u
+from .util import UnicodeMixin, u, unicod
+from .util import U_EMPTY_STRING, U_DASH, U_SEMICOLON, U_SLASH, U_X_LOWER, U_X_UPPER
from .unicode_util import Category, Block, is_letter
from .phonenumberutil import _MAX_LENGTH_FOR_NSN, _MAX_LENGTH_COUNTRY_CODE
from .phonenumberutil import _VALID_PUNCTUATION, _PLUS_CHARS, _NON_DIGITS_PATTERN
@@ -40,7 +41,7 @@ def _limit(lower, upper):
"""Returns a regular expression quantifier with an upper and lower limit."""
if ((lower < 0) or (upper <= 0) or (upper < lower)):
raise Exception("Illegal argument to _limit")
- return u("{%d,%d}") % (lower, upper)
+ return unicod("{%d,%d}") % (lower, upper)
# Build the MATCHING_BRACKETS and PATTERN regular expression patterns. The
# building blocks below exist to make the patterns more easily understood.
@@ -210,7 +211,7 @@ def _verify_strict_grouping(numobj, candidate):
# The check here makes sure that we haven't mistakenly already used the extension to
# match the last group of the subscriber number. Note the extension cannot have
# formatting in-between digits.
- return (normalized_candidate[from_index:].find(numobj.extension or "") != -1)
+ return (normalized_candidate[from_index:].find(numobj.extension or U_EMPTY_STRING) != -1)
def _verify_exact_grouping(numobj, candidate):
@@ -258,19 +259,19 @@ def _get_national_number_groups(numobj):
rfc3966_format = format_number(numobj, PhoneNumberFormat.RFC3966)
# We remove the extension part from the formatted string before splitting
# it into different groups.
- end_index = rfc3966_format.find(u(";"))
+ end_index = rfc3966_format.find(U_SEMICOLON)
if end_index < 0:
end_index = len(rfc3966_format)
# The country-code will have a '-' following it.
- start_index = rfc3966_format.find(u("-")) + 1
- return rfc3966_format[start_index:end_index].split(u("-"))
+ start_index = rfc3966_format.find(U_DASH) + 1
+ return rfc3966_format[start_index:end_index].split(U_DASH)
def _contains_more_than_one_slash(candidate):
- first_slash_index = candidate.find(u("/"))
+ first_slash_index = candidate.find(U_SLASH)
return (first_slash_index > 0 and
- (candidate.find(u("/"), (first_slash_index + 1)) != -1))
+ (candidate.find(U_SLASH, (first_slash_index + 1)) != -1))
def _contains_only_valid_x_chars(numobj, candidate):
@@ -283,9 +284,9 @@ def _contains_only_valid_x_chars(numobj, candidate):
# character of the string.
ii = 0
while ii < (len(candidate) - 1):
- if (candidate[ii] == 'x' or candidate[ii] == 'X'):
+ if (candidate[ii] == U_X_LOWER or candidate[ii] == U_X_UPPER):
next_char = candidate[ii + 1]
- if (next_char == 'x' or next_char == 'X'):
+ if (next_char == U_X_LOWER or next_char == U_X_UPPER):
# This is the carrier code case, in which the 'X's always
# precede the national significant number.
ii += 1
@@ -375,7 +376,7 @@ def __init__(self, text, region,
# The text searched for phone numbers.
self.text = text
if self.text is None:
- self.text = u("")
+ self.text = U_EMPTY_STRING
# The region (country) to assume for phone numbers without an
# international prefix, possibly None.
self.preferred_region = region
@@ -661,10 +662,10 @@ def __ne__(self, other):
return not self.__eq__(other)
def __repr__(self):
- return ("PhoneNumberMatch(start=%r, raw_string=%r, numobj=%r)" %
+ return (unicod("PhoneNumberMatch(start=%r, raw_string=%r, numobj=%r)") %
(self.start,
self.raw_string,
self.number))
def __unicode__(self):
- return u("PhoneNumberMatch [%s,%s) %s") % (self.start, self.end, self.raw_string)
+ return unicod("PhoneNumberMatch [%s,%s) %s") % (self.start, self.end, self.raw_string)
Oops, something went wrong.

0 comments on commit 319c33e

Please sign in to comment.