diff --git a/LICENSE b/LICENSE index e74ae64..20d5185 100644 --- a/LICENSE +++ b/LICENSE @@ -1,4 +1,5 @@ Copyright (c) 2016 Russell Keith-Magee. + All rights reserved. Redistribution and use in source and binary forms, with or without modification, @@ -25,3 +26,57 @@ LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + + +The original Python Standard Library, upon which this project is based, is +released under the terms of the PSF License version 2: + +Copyright (c) 2001-2014 Python Software Foundation; All rights reserved + +PYTHON SOFTWARE FOUNDATION LICENSE VERSION 2 +-------------------------------------------- + +1. This LICENSE AGREEMENT is between the Python Software Foundation +("PSF"), and the Individual or Organization ("Licensee") accessing and +otherwise using this software ("Python") in source or binary form and +its associated documentation. + +2. Subject to the terms and conditions of this License Agreement, PSF hereby +grants Licensee a nonexclusive, royalty-free, world-wide license to reproduce, +analyze, test, perform and/or display publicly, prepare derivative works, +distribute, and otherwise use Python alone or in any derivative version, +provided, however, that PSF's License Agreement and PSF's notice of copyright, +i.e., "Copyright (c) 2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010, +2011, 2012, 2013, 2014 Python Software Foundation; All Rights Reserved" are +retained in Python alone or in any derivative version prepared by Licensee. + +3. In the event Licensee prepares a derivative work that is based on +or incorporates Python or any part thereof, and wants to make +the derivative work available to others as provided herein, then +Licensee hereby agrees to include in any such work a brief summary of +the changes made to Python. + +4. PSF is making Python available to Licensee on an "AS IS" +basis. PSF MAKES NO REPRESENTATIONS OR WARRANTIES, EXPRESS OR +IMPLIED. BY WAY OF EXAMPLE, BUT NOT LIMITATION, PSF MAKES NO AND +DISCLAIMS ANY REPRESENTATION OR WARRANTY OF MERCHANTABILITY OR FITNESS +FOR ANY PARTICULAR PURPOSE OR THAT THE USE OF PYTHON WILL NOT +INFRINGE ANY THIRD PARTY RIGHTS. + +5. PSF SHALL NOT BE LIABLE TO LICENSEE OR ANY OTHER USERS OF PYTHON +FOR ANY INCIDENTAL, SPECIAL, OR CONSEQUENTIAL DAMAGES OR LOSS AS +A RESULT OF MODIFYING, DISTRIBUTING, OR OTHERWISE USING PYTHON, +OR ANY DERIVATIVE THEREOF, EVEN IF ADVISED OF THE POSSIBILITY THEREOF. + +6. This License Agreement will automatically terminate upon a material +breach of its terms and conditions. + +7. Nothing in this License Agreement shall be deemed to create any +relationship of agency, partnership, or joint venture between PSF and +Licensee. This License Agreement does not grant permission to use PSF +trademarks or trade name in a trademark sense to endorse or promote +products or services of Licensee, or any third party. + +8. By copying, installing or otherwise using Python, Licensee +agrees to be bound by the terms and conditions of this License +Agreement. diff --git a/ouroboros/__future__.py b/ouroboros/__future__.py new file mode 100644 index 0000000..3b2d5ec --- /dev/null +++ b/ouroboros/__future__.py @@ -0,0 +1,134 @@ +"""Record of phased-in incompatible language changes. + +Each line is of the form: + + FeatureName = "_Feature(" OptionalRelease "," MandatoryRelease "," + CompilerFlag ")" + +where, normally, OptionalRelease < MandatoryRelease, and both are 5-tuples +of the same form as sys.version_info: + + (PY_MAJOR_VERSION, # the 2 in 2.1.0a3; an int + PY_MINOR_VERSION, # the 1; an int + PY_MICRO_VERSION, # the 0; an int + PY_RELEASE_LEVEL, # "alpha", "beta", "candidate" or "final"; string + PY_RELEASE_SERIAL # the 3; an int + ) + +OptionalRelease records the first release in which + + from __future__ import FeatureName + +was accepted. + +In the case of MandatoryReleases that have not yet occurred, +MandatoryRelease predicts the release in which the feature will become part +of the language. + +Else MandatoryRelease records when the feature became part of the language; +in releases at or after that, modules no longer need + + from __future__ import FeatureName + +to use the feature in question, but may continue to use such imports. + +MandatoryRelease may also be None, meaning that a planned feature got +dropped. + +Instances of class _Feature have two corresponding methods, +.getOptionalRelease() and .getMandatoryRelease(). + +CompilerFlag is the (bitfield) flag that should be passed in the fourth +argument to the builtin function compile() to enable the feature in +dynamically compiled code. This flag is stored in the .compiler_flag +attribute on _Future instances. These values must match the appropriate +#defines of CO_xxx flags in Include/compile.h. + +No feature line is ever to be deleted from this file. +""" + +all_feature_names = [ + "nested_scopes", + "generators", + "division", + "absolute_import", + "with_statement", + "print_function", + "unicode_literals", + "barry_as_FLUFL", +] + +__all__ = ["all_feature_names"] + all_feature_names + +# The CO_xxx symbols are defined here under the same names used by +# compile.h, so that an editor search will find them here. However, +# they're not exported in __all__, because they don't really belong to +# this module. +CO_NESTED = 0x0010 # nested_scopes +CO_GENERATOR_ALLOWED = 0 # generators (obsolete, was 0x1000) +CO_FUTURE_DIVISION = 0x2000 # division +CO_FUTURE_ABSOLUTE_IMPORT = 0x4000 # perform absolute imports by default +CO_FUTURE_WITH_STATEMENT = 0x8000 # with statement +CO_FUTURE_PRINT_FUNCTION = 0x10000 # print function +CO_FUTURE_UNICODE_LITERALS = 0x20000 # unicode string literals +CO_FUTURE_BARRY_AS_BDFL = 0x40000 + +class _Feature: + def __init__(self, optionalRelease, mandatoryRelease, compiler_flag): + self.optional = optionalRelease + self.mandatory = mandatoryRelease + self.compiler_flag = compiler_flag + + def getOptionalRelease(self): + """Return first release in which this feature was recognized. + + This is a 5-tuple, of the same form as sys.version_info. + """ + + return self.optional + + def getMandatoryRelease(self): + """Return release in which this feature will become mandatory. + + This is a 5-tuple, of the same form as sys.version_info, or, if + the feature was dropped, is None. + """ + + return self.mandatory + + def __repr__(self): + return "_Feature" + repr((self.optional, + self.mandatory, + self.compiler_flag)) + +nested_scopes = _Feature((2, 1, 0, "beta", 1), + (2, 2, 0, "alpha", 0), + CO_NESTED) + +generators = _Feature((2, 2, 0, "alpha", 1), + (2, 3, 0, "final", 0), + CO_GENERATOR_ALLOWED) + +division = _Feature((2, 2, 0, "alpha", 2), + (3, 0, 0, "alpha", 0), + CO_FUTURE_DIVISION) + +absolute_import = _Feature((2, 5, 0, "alpha", 1), + (3, 0, 0, "alpha", 0), + CO_FUTURE_ABSOLUTE_IMPORT) + +with_statement = _Feature((2, 5, 0, "alpha", 1), + (2, 6, 0, "alpha", 0), + CO_FUTURE_WITH_STATEMENT) + +print_function = _Feature((2, 6, 0, "alpha", 2), + (3, 0, 0, "alpha", 0), + CO_FUTURE_PRINT_FUNCTION) + +unicode_literals = _Feature((2, 6, 0, "alpha", 2), + (3, 0, 0, "alpha", 0), + CO_FUTURE_UNICODE_LITERALS) + +barry_as_FLUFL = _Feature((3, 1, 0, "alpha", 2), + (3, 9, 0, "alpha", 0), + CO_FUTURE_BARRY_AS_BDFL) diff --git a/ouroboros/__init__.py b/ouroboros/__init__.py deleted file mode 100644 index 532a481..0000000 --- a/ouroboros/__init__.py +++ /dev/null @@ -1,9 +0,0 @@ -# Examples of valid version strings -# __version__ = '1.2.3.dev1' # Development release 1 -# __version__ = '1.2.3a1' # Alpha Release 1 -# __version__ = '1.2.3b1' # Beta Release 1 -# __version__ = '1.2.3rc1' # RC Release 1 -# __version__ = '1.2.3' # Final Release -# __version__ = '1.2.3.post1' # Post Release 1 - -__version__ = '0.0.0' diff --git a/ouroboros/__phello__.foo.py b/ouroboros/__phello__.foo.py new file mode 100644 index 0000000..8e8623e --- /dev/null +++ b/ouroboros/__phello__.foo.py @@ -0,0 +1 @@ +# This file exists as a helper for the test.test_frozen module. diff --git a/ouroboros/_bootlocale.py b/ouroboros/_bootlocale.py new file mode 100644 index 0000000..4bccac1 --- /dev/null +++ b/ouroboros/_bootlocale.py @@ -0,0 +1,34 @@ +"""A minimal subset of the locale module used at interpreter startup +(imported by the _io module), in order to reduce startup time. + +Don't import directly from third-party code; use the `locale` module instead! +""" + +import sys +import _locale + +if sys.platform.startswith("win"): + def getpreferredencoding(do_setlocale=True): + return _locale._getdefaultlocale()[1] +else: + try: + _locale.CODESET + except AttributeError: + def getpreferredencoding(do_setlocale=True): + # This path for legacy systems needs the more complex + # getdefaultlocale() function, import the full locale module. + import locale + return locale.getpreferredencoding(do_setlocale) + else: + def getpreferredencoding(do_setlocale=True): + assert not do_setlocale + result = _locale.nl_langinfo(_locale.CODESET) + if not result and sys.platform == 'darwin': + # nl_langinfo can return an empty string + # when the setting has an invalid value. + # Default to UTF-8 in that case because + # UTF-8 is the default charset on OSX and + # returning nothing will crash the + # interpreter. + result = 'UTF-8' + return result diff --git a/ouroboros/_collections_abc.py b/ouroboros/_collections_abc.py new file mode 100644 index 0000000..6477444 --- /dev/null +++ b/ouroboros/_collections_abc.py @@ -0,0 +1,753 @@ +# Copyright 2007 Google, Inc. All Rights Reserved. +# Licensed to PSF under a Contributor Agreement. + +"""Abstract Base Classes (ABCs) for collections, according to PEP 3119. + +Unit tests are in test_collections. +""" + +from abc import ABCMeta, abstractmethod +import sys + +__all__ = ["Hashable", "Iterable", "Iterator", + "Sized", "Container", "Callable", + "Set", "MutableSet", + "Mapping", "MutableMapping", + "MappingView", "KeysView", "ItemsView", "ValuesView", + "Sequence", "MutableSequence", + "ByteString", + ] + +# This module has been renamed from collections.abc to _collections_abc to +# speed up interpreter startup. Some of the types such as MutableMapping are +# required early but collections module imports a lot of other modules. +# See issue #19218 +__name__ = "collections.abc" + +# Private list of types that we want to register with the various ABCs +# so that they will pass tests like: +# it = iter(somebytearray) +# assert isinstance(it, Iterable) +# Note: in other implementations, these types many not be distinct +# and they make have their own implementation specific types that +# are not included on this list. +bytes_iterator = type(iter(b'')) +bytearray_iterator = type(iter(bytearray())) +#callable_iterator = ??? +dict_keyiterator = type(iter({}.keys())) +dict_valueiterator = type(iter({}.values())) +dict_itemiterator = type(iter({}.items())) +list_iterator = type(iter([])) +list_reverseiterator = type(iter(reversed([]))) +range_iterator = type(iter(range(0))) +set_iterator = type(iter(set())) +str_iterator = type(iter("")) +tuple_iterator = type(iter(())) +zip_iterator = type(iter(zip())) +## views ## +dict_keys = type({}.keys()) +dict_values = type({}.values()) +dict_items = type({}.items()) +## misc ## +mappingproxy = type(type.__dict__) + + +### ONE-TRICK PONIES ### + +class Hashable(metaclass=ABCMeta): + + __slots__ = () + + @abstractmethod + def __hash__(self): + return 0 + + @classmethod + def __subclasshook__(cls, C): + if cls is Hashable: + for B in C.__mro__: + if "__hash__" in B.__dict__: + if B.__dict__["__hash__"]: + return True + break + return NotImplemented + + +class Iterable(metaclass=ABCMeta): + + __slots__ = () + + @abstractmethod + def __iter__(self): + while False: + yield None + + @classmethod + def __subclasshook__(cls, C): + if cls is Iterable: + if any("__iter__" in B.__dict__ for B in C.__mro__): + return True + return NotImplemented + + +class Iterator(Iterable): + + __slots__ = () + + @abstractmethod + def __next__(self): + 'Return the next item from the iterator. When exhausted, raise StopIteration' + raise StopIteration + + def __iter__(self): + return self + + @classmethod + def __subclasshook__(cls, C): + if cls is Iterator: + if (any("__next__" in B.__dict__ for B in C.__mro__) and + any("__iter__" in B.__dict__ for B in C.__mro__)): + return True + return NotImplemented + +Iterator.register(bytes_iterator) +Iterator.register(bytearray_iterator) +#Iterator.register(callable_iterator) +Iterator.register(dict_keyiterator) +Iterator.register(dict_valueiterator) +Iterator.register(dict_itemiterator) +Iterator.register(list_iterator) +Iterator.register(list_reverseiterator) +Iterator.register(range_iterator) +Iterator.register(set_iterator) +Iterator.register(str_iterator) +Iterator.register(tuple_iterator) +Iterator.register(zip_iterator) + +class Sized(metaclass=ABCMeta): + + __slots__ = () + + @abstractmethod + def __len__(self): + return 0 + + @classmethod + def __subclasshook__(cls, C): + if cls is Sized: + if any("__len__" in B.__dict__ for B in C.__mro__): + return True + return NotImplemented + + +class Container(metaclass=ABCMeta): + + __slots__ = () + + @abstractmethod + def __contains__(self, x): + return False + + @classmethod + def __subclasshook__(cls, C): + if cls is Container: + if any("__contains__" in B.__dict__ for B in C.__mro__): + return True + return NotImplemented + + +class Callable(metaclass=ABCMeta): + + __slots__ = () + + @abstractmethod + def __call__(self, *args, **kwds): + return False + + @classmethod + def __subclasshook__(cls, C): + if cls is Callable: + if any("__call__" in B.__dict__ for B in C.__mro__): + return True + return NotImplemented + + +### SETS ### + + +class Set(Sized, Iterable, Container): + + """A set is a finite, iterable container. + + This class provides concrete generic implementations of all + methods except for __contains__, __iter__ and __len__. + + To override the comparisons (presumably for speed, as the + semantics are fixed), redefine __le__ and __ge__, + then the other operations will automatically follow suit. + """ + + __slots__ = () + + def __le__(self, other): + if not isinstance(other, Set): + return NotImplemented + if len(self) > len(other): + return False + for elem in self: + if elem not in other: + return False + return True + + def __lt__(self, other): + if not isinstance(other, Set): + return NotImplemented + return len(self) < len(other) and self.__le__(other) + + def __gt__(self, other): + if not isinstance(other, Set): + return NotImplemented + return len(self) > len(other) and self.__ge__(other) + + def __ge__(self, other): + if not isinstance(other, Set): + return NotImplemented + if len(self) < len(other): + return False + for elem in other: + if elem not in self: + return False + return True + + def __eq__(self, other): + if not isinstance(other, Set): + return NotImplemented + return len(self) == len(other) and self.__le__(other) + + def __ne__(self, other): + return not (self == other) + + @classmethod + def _from_iterable(cls, it): + '''Construct an instance of the class from any iterable input. + + Must override this method if the class constructor signature + does not accept an iterable for an input. + ''' + return cls(it) + + def __and__(self, other): + if not isinstance(other, Iterable): + return NotImplemented + return self._from_iterable(value for value in other if value in self) + + __rand__ = __and__ + + def isdisjoint(self, other): + 'Return True if two sets have a null intersection.' + for value in other: + if value in self: + return False + return True + + def __or__(self, other): + if not isinstance(other, Iterable): + return NotImplemented + chain = (e for s in (self, other) for e in s) + return self._from_iterable(chain) + + __ror__ = __or__ + + def __sub__(self, other): + if not isinstance(other, Set): + if not isinstance(other, Iterable): + return NotImplemented + other = self._from_iterable(other) + return self._from_iterable(value for value in self + if value not in other) + + def __rsub__(self, other): + if not isinstance(other, Set): + if not isinstance(other, Iterable): + return NotImplemented + other = self._from_iterable(other) + return self._from_iterable(value for value in other + if value not in self) + + def __xor__(self, other): + if not isinstance(other, Set): + if not isinstance(other, Iterable): + return NotImplemented + other = self._from_iterable(other) + return (self - other) | (other - self) + + __rxor__ = __xor__ + + def _hash(self): + """Compute the hash value of a set. + + Note that we don't define __hash__: not all sets are hashable. + But if you define a hashable set type, its __hash__ should + call this function. + + This must be compatible __eq__. + + All sets ought to compare equal if they contain the same + elements, regardless of how they are implemented, and + regardless of the order of the elements; so there's not much + freedom for __eq__ or __hash__. We match the algorithm used + by the built-in frozenset type. + """ + MAX = sys.maxsize + MASK = 2 * MAX + 1 + n = len(self) + h = 1927868237 * (n + 1) + h &= MASK + for x in self: + hx = hash(x) + h ^= (hx ^ (hx << 16) ^ 89869747) * 3644798167 + h &= MASK + h = h * 69069 + 907133923 + h &= MASK + if h > MAX: + h -= MASK + 1 + if h == -1: + h = 590923713 + return h + +Set.register(frozenset) + + +class MutableSet(Set): + """A mutable set is a finite, iterable container. + + This class provides concrete generic implementations of all + methods except for __contains__, __iter__, __len__, + add(), and discard(). + + To override the comparisons (presumably for speed, as the + semantics are fixed), all you have to do is redefine __le__ and + then the other operations will automatically follow suit. + """ + + __slots__ = () + + @abstractmethod + def add(self, value): + """Add an element.""" + raise NotImplementedError + + @abstractmethod + def discard(self, value): + """Remove an element. Do not raise an exception if absent.""" + raise NotImplementedError + + def remove(self, value): + """Remove an element. If not a member, raise a KeyError.""" + if value not in self: + raise KeyError(value) + self.discard(value) + + def pop(self): + """Return the popped value. Raise KeyError if empty.""" + it = iter(self) + try: + value = next(it) + except StopIteration: + raise KeyError + self.discard(value) + return value + + def clear(self): + """This is slow (creates N new iterators!) but effective.""" + try: + while True: + self.pop() + except KeyError: + pass + + def __ior__(self, it): + for value in it: + self.add(value) + return self + + def __iand__(self, it): + for value in (self - it): + self.discard(value) + return self + + def __ixor__(self, it): + if it is self: + self.clear() + else: + if not isinstance(it, Set): + it = self._from_iterable(it) + for value in it: + if value in self: + self.discard(value) + else: + self.add(value) + return self + + def __isub__(self, it): + if it is self: + self.clear() + else: + for value in it: + self.discard(value) + return self + +MutableSet.register(set) + + +### MAPPINGS ### + + +class Mapping(Sized, Iterable, Container): + + __slots__ = () + + """A Mapping is a generic container for associating key/value + pairs. + + This class provides concrete generic implementations of all + methods except for __getitem__, __iter__, and __len__. + + """ + + @abstractmethod + def __getitem__(self, key): + raise KeyError + + def get(self, key, default=None): + 'D.get(k[,d]) -> D[k] if k in D, else d. d defaults to None.' + try: + return self[key] + except KeyError: + return default + + def __contains__(self, key): + try: + self[key] + except KeyError: + return False + else: + return True + + def keys(self): + "D.keys() -> a set-like object providing a view on D's keys" + return KeysView(self) + + def items(self): + "D.items() -> a set-like object providing a view on D's items" + return ItemsView(self) + + def values(self): + "D.values() -> an object providing a view on D's values" + return ValuesView(self) + + def __eq__(self, other): + if not isinstance(other, Mapping): + return NotImplemented + return dict(self.items()) == dict(other.items()) + + def __ne__(self, other): + return not (self == other) + +Mapping.register(mappingproxy) + + +class MappingView(Sized): + + def __init__(self, mapping): + self._mapping = mapping + + def __len__(self): + return len(self._mapping) + + def __repr__(self): + return '{0.__class__.__name__}({0._mapping!r})'.format(self) + + +class KeysView(MappingView, Set): + + @classmethod + def _from_iterable(self, it): + return set(it) + + def __contains__(self, key): + return key in self._mapping + + def __iter__(self): + yield from self._mapping + +KeysView.register(dict_keys) + + +class ItemsView(MappingView, Set): + + @classmethod + def _from_iterable(self, it): + return set(it) + + def __contains__(self, item): + key, value = item + try: + v = self._mapping[key] + except KeyError: + return False + else: + return v == value + + def __iter__(self): + for key in self._mapping: + yield (key, self._mapping[key]) + +ItemsView.register(dict_items) + + +class ValuesView(MappingView): + + def __contains__(self, value): + for key in self._mapping: + if value == self._mapping[key]: + return True + return False + + def __iter__(self): + for key in self._mapping: + yield self._mapping[key] + +ValuesView.register(dict_values) + + +class MutableMapping(Mapping): + + __slots__ = () + + """A MutableMapping is a generic container for associating + key/value pairs. + + This class provides concrete generic implementations of all + methods except for __getitem__, __setitem__, __delitem__, + __iter__, and __len__. + + """ + + @abstractmethod + def __setitem__(self, key, value): + raise KeyError + + @abstractmethod + def __delitem__(self, key): + raise KeyError + + __marker = object() + + def pop(self, key, default=__marker): + '''D.pop(k[,d]) -> v, remove specified key and return the corresponding value. + If key is not found, d is returned if given, otherwise KeyError is raised. + ''' + try: + value = self[key] + except KeyError: + if default is self.__marker: + raise + return default + else: + del self[key] + return value + + def popitem(self): + '''D.popitem() -> (k, v), remove and return some (key, value) pair + as a 2-tuple; but raise KeyError if D is empty. + ''' + try: + key = next(iter(self)) + except StopIteration: + raise KeyError + value = self[key] + del self[key] + return key, value + + def clear(self): + 'D.clear() -> None. Remove all items from D.' + try: + while True: + self.popitem() + except KeyError: + pass + + def update(*args, **kwds): + ''' D.update([E, ]**F) -> None. Update D from mapping/iterable E and F. + If E present and has a .keys() method, does: for k in E: D[k] = E[k] + If E present and lacks .keys() method, does: for (k, v) in E: D[k] = v + In either case, this is followed by: for k, v in F.items(): D[k] = v + ''' + if len(args) > 2: + raise TypeError("update() takes at most 2 positional " + "arguments ({} given)".format(len(args))) + elif not args: + raise TypeError("update() takes at least 1 argument (0 given)") + self = args[0] + other = args[1] if len(args) >= 2 else () + + if isinstance(other, Mapping): + for key in other: + self[key] = other[key] + elif hasattr(other, "keys"): + for key in other.keys(): + self[key] = other[key] + else: + for key, value in other: + self[key] = value + for key, value in kwds.items(): + self[key] = value + + def setdefault(self, key, default=None): + 'D.setdefault(k[,d]) -> D.get(k,d), also set D[k]=d if k not in D' + try: + return self[key] + except KeyError: + self[key] = default + return default + +MutableMapping.register(dict) + + +### SEQUENCES ### + + +class Sequence(Sized, Iterable, Container): + + """All the operations on a read-only sequence. + + Concrete subclasses must override __new__ or __init__, + __getitem__, and __len__. + """ + + __slots__ = () + + @abstractmethod + def __getitem__(self, index): + raise IndexError + + def __iter__(self): + i = 0 + try: + while True: + v = self[i] + yield v + i += 1 + except IndexError: + return + + def __contains__(self, value): + for v in self: + if v == value: + return True + return False + + def __reversed__(self): + for i in reversed(range(len(self))): + yield self[i] + + def index(self, value): + '''S.index(value) -> integer -- return first index of value. + Raises ValueError if the value is not present. + ''' + for i, v in enumerate(self): + if v == value: + return i + raise ValueError + + def count(self, value): + 'S.count(value) -> integer -- return number of occurrences of value' + return sum(1 for v in self if v == value) + +Sequence.register(tuple) +Sequence.register(str) +Sequence.register(range) +Sequence.register(memoryview) + + +class ByteString(Sequence): + + """This unifies bytes and bytearray. + + XXX Should add all their methods. + """ + + __slots__ = () + +ByteString.register(bytes) +ByteString.register(bytearray) + + +class MutableSequence(Sequence): + + __slots__ = () + + """All the operations on a read-write sequence. + + Concrete subclasses must provide __new__ or __init__, + __getitem__, __setitem__, __delitem__, __len__, and insert(). + + """ + + @abstractmethod + def __setitem__(self, index, value): + raise IndexError + + @abstractmethod + def __delitem__(self, index): + raise IndexError + + @abstractmethod + def insert(self, index, value): + 'S.insert(index, value) -- insert value before index' + raise IndexError + + def append(self, value): + 'S.append(value) -- append value to the end of the sequence' + self.insert(len(self), value) + + def clear(self): + 'S.clear() -> None -- remove all items from S' + try: + while True: + self.pop() + except IndexError: + pass + + def reverse(self): + 'S.reverse() -- reverse *IN PLACE*' + n = len(self) + for i in range(n//2): + self[i], self[n-i-1] = self[n-i-1], self[i] + + def extend(self, values): + 'S.extend(iterable) -- extend sequence by appending elements from the iterable' + for v in values: + self.append(v) + + def pop(self, index=-1): + '''S.pop([index]) -> item -- remove and return item at index (default last). + Raise IndexError if list is empty or index is out of range. + ''' + v = self[index] + del self[index] + return v + + def remove(self, value): + '''S.remove(value) -- remove first occurrence of value. + Raise ValueError if the value is not present. + ''' + del self[self.index(value)] + + def __iadd__(self, values): + self.extend(values) + return self + +MutableSequence.register(list) +MutableSequence.register(bytearray) # Multiply inheriting, see ByteString diff --git a/ouroboros/_compat_pickle.py b/ouroboros/_compat_pickle.py new file mode 100644 index 0000000..978c01e --- /dev/null +++ b/ouroboros/_compat_pickle.py @@ -0,0 +1,137 @@ +# This module is used to map the old Python 2 names to the new names used in +# Python 3 for the pickle module. This needed to make pickle streams +# generated with Python 2 loadable by Python 3. + +# This is a copy of lib2to3.fixes.fix_imports.MAPPING. We cannot import +# lib2to3 and use the mapping defined there, because lib2to3 uses pickle. +# Thus, this could cause the module to be imported recursively. +IMPORT_MAPPING = { + 'StringIO': 'io', + 'cStringIO': 'io', + 'cPickle': 'pickle', + '__builtin__' : 'builtins', + 'copy_reg': 'copyreg', + 'Queue': 'queue', + 'SocketServer': 'socketserver', + 'ConfigParser': 'configparser', + 'repr': 'reprlib', + 'FileDialog': 'tkinter.filedialog', + 'tkFileDialog': 'tkinter.filedialog', + 'SimpleDialog': 'tkinter.simpledialog', + 'tkSimpleDialog': 'tkinter.simpledialog', + 'tkColorChooser': 'tkinter.colorchooser', + 'tkCommonDialog': 'tkinter.commondialog', + 'Dialog': 'tkinter.dialog', + 'Tkdnd': 'tkinter.dnd', + 'tkFont': 'tkinter.font', + 'tkMessageBox': 'tkinter.messagebox', + 'ScrolledText': 'tkinter.scrolledtext', + 'Tkconstants': 'tkinter.constants', + 'Tix': 'tkinter.tix', + 'ttk': 'tkinter.ttk', + 'Tkinter': 'tkinter', + 'markupbase': '_markupbase', + '_winreg': 'winreg', + 'thread': '_thread', + 'dummy_thread': '_dummy_thread', + 'dbhash': 'dbm.bsd', + 'dumbdbm': 'dbm.dumb', + 'dbm': 'dbm.ndbm', + 'gdbm': 'dbm.gnu', + 'xmlrpclib': 'xmlrpc.client', + 'DocXMLRPCServer': 'xmlrpc.server', + 'SimpleXMLRPCServer': 'xmlrpc.server', + 'httplib': 'http.client', + 'htmlentitydefs' : 'html.entities', + 'HTMLParser' : 'html.parser', + 'Cookie': 'http.cookies', + 'cookielib': 'http.cookiejar', + 'BaseHTTPServer': 'http.server', + 'SimpleHTTPServer': 'http.server', + 'CGIHTTPServer': 'http.server', + 'test.test_support': 'test.support', + 'commands': 'subprocess', + 'UserString' : 'collections', + 'UserList' : 'collections', + 'urlparse' : 'urllib.parse', + 'robotparser' : 'urllib.robotparser', + 'whichdb': 'dbm', + 'anydbm': 'dbm' +} + + +# This contains rename rules that are easy to handle. We ignore the more +# complex stuff (e.g. mapping the names in the urllib and types modules). +# These rules should be run before import names are fixed. +NAME_MAPPING = { + ('__builtin__', 'xrange'): ('builtins', 'range'), + ('__builtin__', 'reduce'): ('functools', 'reduce'), + ('__builtin__', 'intern'): ('sys', 'intern'), + ('__builtin__', 'unichr'): ('builtins', 'chr'), + ('__builtin__', 'basestring'): ('builtins', 'str'), + ('__builtin__', 'long'): ('builtins', 'int'), + ('itertools', 'izip'): ('builtins', 'zip'), + ('itertools', 'imap'): ('builtins', 'map'), + ('itertools', 'ifilter'): ('builtins', 'filter'), + ('itertools', 'ifilterfalse'): ('itertools', 'filterfalse'), +} + +PYTHON2_EXCEPTIONS = ( + "ArithmeticError", + "AssertionError", + "AttributeError", + "BaseException", + "BufferError", + "BytesWarning", + "DeprecationWarning", + "EOFError", + "EnvironmentError", + "Exception", + "FloatingPointError", + "FutureWarning", + "GeneratorExit", + "IOError", + "ImportError", + "ImportWarning", + "IndentationError", + "IndexError", + "KeyError", + "KeyboardInterrupt", + "LookupError", + "MemoryError", + "NameError", + "NotImplementedError", + "OSError", + "OverflowError", + "PendingDeprecationWarning", + "ReferenceError", + "RuntimeError", + "RuntimeWarning", + # StandardError is gone in Python 3, so we map it to Exception + "StopIteration", + "SyntaxError", + "SyntaxWarning", + "SystemError", + "SystemExit", + "TabError", + "TypeError", + "UnboundLocalError", + "UnicodeDecodeError", + "UnicodeEncodeError", + "UnicodeError", + "UnicodeTranslateError", + "UnicodeWarning", + "UserWarning", + "ValueError", + "Warning", + "ZeroDivisionError", +) + +for excname in PYTHON2_EXCEPTIONS: + NAME_MAPPING[("exceptions", excname)] = ("builtins", excname) + +NAME_MAPPING[("exceptions", "StandardError")] = ("builtins", "Exception") + +# Same, but for 3.x to 2.x +REVERSE_IMPORT_MAPPING = dict((v, k) for (k, v) in IMPORT_MAPPING.items()) +REVERSE_NAME_MAPPING = dict((v, k) for (k, v) in NAME_MAPPING.items()) diff --git a/ouroboros/_dummy_thread.py b/ouroboros/_dummy_thread.py new file mode 100644 index 0000000..b67cfb9 --- /dev/null +++ b/ouroboros/_dummy_thread.py @@ -0,0 +1,155 @@ +"""Drop-in replacement for the thread module. + +Meant to be used as a brain-dead substitute so that threaded code does +not need to be rewritten for when the thread module is not present. + +Suggested usage is:: + + try: + import _thread + except ImportError: + import _dummy_thread as _thread + +""" +# Exports only things specified by thread documentation; +# skipping obsolete synonyms allocate(), start_new(), exit_thread(). +__all__ = ['error', 'start_new_thread', 'exit', 'get_ident', 'allocate_lock', + 'interrupt_main', 'LockType'] + +# A dummy value +TIMEOUT_MAX = 2**31 + +# NOTE: this module can be imported early in the extension building process, +# and so top level imports of other modules should be avoided. Instead, all +# imports are done when needed on a function-by-function basis. Since threads +# are disabled, the import lock should not be an issue anyway (??). + +error = RuntimeError + +def start_new_thread(function, args, kwargs={}): + """Dummy implementation of _thread.start_new_thread(). + + Compatibility is maintained by making sure that ``args`` is a + tuple and ``kwargs`` is a dictionary. If an exception is raised + and it is SystemExit (which can be done by _thread.exit()) it is + caught and nothing is done; all other exceptions are printed out + by using traceback.print_exc(). + + If the executed function calls interrupt_main the KeyboardInterrupt will be + raised when the function returns. + + """ + if type(args) != type(tuple()): + raise TypeError("2nd arg must be a tuple") + if type(kwargs) != type(dict()): + raise TypeError("3rd arg must be a dict") + global _main + _main = False + try: + function(*args, **kwargs) + except SystemExit: + pass + except: + import traceback + traceback.print_exc() + _main = True + global _interrupt + if _interrupt: + _interrupt = False + raise KeyboardInterrupt + +def exit(): + """Dummy implementation of _thread.exit().""" + raise SystemExit + +def get_ident(): + """Dummy implementation of _thread.get_ident(). + + Since this module should only be used when _threadmodule is not + available, it is safe to assume that the current process is the + only thread. Thus a constant can be safely returned. + """ + return -1 + +def allocate_lock(): + """Dummy implementation of _thread.allocate_lock().""" + return LockType() + +def stack_size(size=None): + """Dummy implementation of _thread.stack_size().""" + if size is not None: + raise error("setting thread stack size not supported") + return 0 + +def _set_sentinel(): + """Dummy implementation of _thread._set_sentinel().""" + return LockType() + +class LockType(object): + """Class implementing dummy implementation of _thread.LockType. + + Compatibility is maintained by maintaining self.locked_status + which is a boolean that stores the state of the lock. Pickling of + the lock, though, should not be done since if the _thread module is + then used with an unpickled ``lock()`` from here problems could + occur from this class not having atomic methods. + + """ + + def __init__(self): + self.locked_status = False + + def acquire(self, waitflag=None, timeout=-1): + """Dummy implementation of acquire(). + + For blocking calls, self.locked_status is automatically set to + True and returned appropriately based on value of + ``waitflag``. If it is non-blocking, then the value is + actually checked and not set if it is already acquired. This + is all done so that threading.Condition's assert statements + aren't triggered and throw a little fit. + + """ + if waitflag is None or waitflag: + self.locked_status = True + return True + else: + if not self.locked_status: + self.locked_status = True + return True + else: + if timeout > 0: + import time + time.sleep(timeout) + return False + + __enter__ = acquire + + def __exit__(self, typ, val, tb): + self.release() + + def release(self): + """Release the dummy lock.""" + # XXX Perhaps shouldn't actually bother to test? Could lead + # to problems for complex, threaded code. + if not self.locked_status: + raise error + self.locked_status = False + return True + + def locked(self): + return self.locked_status + +# Used to signal that interrupt_main was called in a "thread" +_interrupt = False +# True when not executing in a "thread" +_main = True + +def interrupt_main(): + """Set _interrupt flag to True to have start_new_thread raise + KeyboardInterrupt upon exiting.""" + if _main: + raise KeyboardInterrupt + else: + global _interrupt + _interrupt = True diff --git a/ouroboros/_markupbase.py b/ouroboros/_markupbase.py new file mode 100644 index 0000000..2af5f1c --- /dev/null +++ b/ouroboros/_markupbase.py @@ -0,0 +1,395 @@ +"""Shared support for scanning document type declarations in HTML and XHTML. + +This module is used as a foundation for the html.parser module. It has no +documented public API and should not be used directly. + +""" + +import re + +_declname_match = re.compile(r'[a-zA-Z][-_.a-zA-Z0-9]*\s*').match +_declstringlit_match = re.compile(r'(\'[^\']*\'|"[^"]*")\s*').match +_commentclose = re.compile(r'--\s*>') +_markedsectionclose = re.compile(r']\s*]\s*>') + +# An analysis of the MS-Word extensions is available at +# http://www.planetpublish.com/xmlarena/xap/Thursday/WordtoXML.pdf + +_msmarkedsectionclose = re.compile(r']\s*>') + +del re + + +class ParserBase: + """Parser base class which provides some common support methods used + by the SGML/HTML and XHTML parsers.""" + + def __init__(self): + if self.__class__ is ParserBase: + raise RuntimeError( + "_markupbase.ParserBase must be subclassed") + + def error(self, message): + raise NotImplementedError( + "subclasses of ParserBase must override error()") + + def reset(self): + self.lineno = 1 + self.offset = 0 + + def getpos(self): + """Return current line number and offset.""" + return self.lineno, self.offset + + # Internal -- update line number and offset. This should be + # called for each piece of data exactly once, in order -- in other + # words the concatenation of all the input strings to this + # function should be exactly the entire input. + def updatepos(self, i, j): + if i >= j: + return j + rawdata = self.rawdata + nlines = rawdata.count("\n", i, j) + if nlines: + self.lineno = self.lineno + nlines + pos = rawdata.rindex("\n", i, j) # Should not fail + self.offset = j-(pos+1) + else: + self.offset = self.offset + j-i + return j + + _decl_otherchars = '' + + # Internal -- parse declaration (for use by subclasses). + def parse_declaration(self, i): + # This is some sort of declaration; in "HTML as + # deployed," this should only be the document type + # declaration (""). + # ISO 8879:1986, however, has more complex + # declaration syntax for elements in , including: + # --comment-- + # [marked section] + # name in the following list: ENTITY, DOCTYPE, ELEMENT, + # ATTLIST, NOTATION, SHORTREF, USEMAP, + # LINKTYPE, LINK, IDLINK, USELINK, SYSTEM + rawdata = self.rawdata + j = i + 2 + assert rawdata[i:j] == "": + # the empty comment + return j + 1 + if rawdata[j:j+1] in ("-", ""): + # Start of comment followed by buffer boundary, + # or just a buffer boundary. + return -1 + # A simple, practical version could look like: ((name|stringlit) S*) + '>' + n = len(rawdata) + if rawdata[j:j+2] == '--': #comment + # Locate --.*-- as the body of the comment + return self.parse_comment(i) + elif rawdata[j] == '[': #marked section + # Locate [statusWord [...arbitrary SGML...]] as the body of the marked section + # Where statusWord is one of TEMP, CDATA, IGNORE, INCLUDE, RCDATA + # Note that this is extended by Microsoft Office "Save as Web" function + # to include [if...] and [endif]. + return self.parse_marked_section(i) + else: #all other declaration elements + decltype, j = self._scan_name(j, i) + if j < 0: + return j + if decltype == "doctype": + self._decl_otherchars = '' + while j < n: + c = rawdata[j] + if c == ">": + # end of declaration syntax + data = rawdata[i+2:j] + if decltype == "doctype": + self.handle_decl(data) + else: + # According to the HTML5 specs sections "8.2.4.44 Bogus + # comment state" and "8.2.4.45 Markup declaration open + # state", a comment token should be emitted. + # Calling unknown_decl provides more flexibility though. + self.unknown_decl(data) + return j + 1 + if c in "\"'": + m = _declstringlit_match(rawdata, j) + if not m: + return -1 # incomplete + j = m.end() + elif c in "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ": + name, j = self._scan_name(j, i) + elif c in self._decl_otherchars: + j = j + 1 + elif c == "[": + # this could be handled in a separate doctype parser + if decltype == "doctype": + j = self._parse_doctype_subset(j + 1, i) + elif decltype in {"attlist", "linktype", "link", "element"}: + # must tolerate []'d groups in a content model in an element declaration + # also in data attribute specifications of attlist declaration + # also link type declaration subsets in linktype declarations + # also link attribute specification lists in link declarations + self.error("unsupported '[' char in %s declaration" % decltype) + else: + self.error("unexpected '[' char in declaration") + else: + self.error( + "unexpected %r char in declaration" % rawdata[j]) + if j < 0: + return j + return -1 # incomplete + + # Internal -- parse a marked section + # Override this to handle MS-word extension syntax content + def parse_marked_section(self, i, report=1): + rawdata= self.rawdata + assert rawdata[i:i+3] == ' ending + match= _markedsectionclose.search(rawdata, i+3) + elif sectName in {"if", "else", "endif"}: + # look for MS Office ]> ending + match= _msmarkedsectionclose.search(rawdata, i+3) + else: + self.error('unknown status keyword %r in marked section' % rawdata[i+3:j]) + if not match: + return -1 + if report: + j = match.start(0) + self.unknown_decl(rawdata[i+3: j]) + return match.end(0) + + # Internal -- parse comment, return length or -1 if not terminated + def parse_comment(self, i, report=1): + rawdata = self.rawdata + if rawdata[i:i+4] != ' + --> --> + + ''' + +__UNDEF__ = [] # a special sentinel object +def small(text): + if text: + return '' + text + '' + else: + return '' + +def strong(text): + if text: + return '' + text + '' + else: + return '' + +def grey(text): + if text: + return '' + text + '' + else: + return '' + +def lookup(name, frame, locals): + """Find the value for a given name in the given environment.""" + if name in locals: + return 'local', locals[name] + if name in frame.f_globals: + return 'global', frame.f_globals[name] + if '__builtins__' in frame.f_globals: + builtins = frame.f_globals['__builtins__'] + if type(builtins) is type({}): + if name in builtins: + return 'builtin', builtins[name] + else: + if hasattr(builtins, name): + return 'builtin', getattr(builtins, name) + return None, __UNDEF__ + +def scanvars(reader, frame, locals): + """Scan one logical line of Python and look up values of variables used.""" + vars, lasttoken, parent, prefix, value = [], None, None, '', __UNDEF__ + for ttype, token, start, end, line in tokenize.generate_tokens(reader): + if ttype == tokenize.NEWLINE: break + if ttype == tokenize.NAME and token not in keyword.kwlist: + if lasttoken == '.': + if parent is not __UNDEF__: + value = getattr(parent, token, __UNDEF__) + vars.append((prefix + token, prefix, value)) + else: + where, value = lookup(token, frame, locals) + vars.append((token, where, value)) + elif token == '.': + prefix += lasttoken + '.' + parent = value + else: + parent, prefix = None, '' + lasttoken = token + return vars + +def html(einfo, context=5): + """Return a nice HTML document describing a given traceback.""" + etype, evalue, etb = einfo + if isinstance(etype, type): + etype = etype.__name__ + pyver = 'Python ' + sys.version.split()[0] + ': ' + sys.executable + date = time.ctime(time.time()) + head = '' + pydoc.html.heading( + '%s' % + strong(pydoc.html.escape(str(etype))), + '#ffffff', '#6622aa', pyver + '
' + date) + ''' +

A problem occurred in a Python script. Here is the sequence of +function calls leading up to the error, in the order they occurred.

''' + + indent = '' + small(' ' * 5) + ' ' + frames = [] + records = inspect.getinnerframes(etb, context) + for frame, file, lnum, func, lines, index in records: + if file: + file = os.path.abspath(file) + link = '%s' % (file, pydoc.html.escape(file)) + else: + file = link = '?' + args, varargs, varkw, locals = inspect.getargvalues(frame) + call = '' + if func != '?': + call = 'in ' + strong(func) + \ + inspect.formatargvalues(args, varargs, varkw, locals, + formatvalue=lambda value: '=' + pydoc.html.repr(value)) + + highlight = {} + def reader(lnum=[lnum]): + highlight[lnum[0]] = 1 + try: return linecache.getline(file, lnum[0]) + finally: lnum[0] += 1 + vars = scanvars(reader, frame, locals) + + rows = ['%s%s %s' % + (' ', link, call)] + if index is not None: + i = lnum - index + for line in lines: + num = small(' ' * (5-len(str(i))) + str(i)) + ' ' + if i in highlight: + line = '=>%s%s' % (num, pydoc.html.preformat(line)) + rows.append('%s' % line) + else: + line = '  %s%s' % (num, pydoc.html.preformat(line)) + rows.append('%s' % grey(line)) + i += 1 + + done, dump = {}, [] + for name, where, value in vars: + if name in done: continue + done[name] = 1 + if value is not __UNDEF__: + if where in ('global', 'builtin'): + name = ('%s ' % where) + strong(name) + elif where == 'local': + name = strong(name) + else: + name = where + strong(name.split('.')[-1]) + dump.append('%s = %s' % (name, pydoc.html.repr(value))) + else: + dump.append(name + ' undefined') + + rows.append('%s' % small(grey(', '.join(dump)))) + frames.append(''' + +%s
''' % '\n'.join(rows)) + + exception = ['

%s: %s' % (strong(pydoc.html.escape(str(etype))), + pydoc.html.escape(str(evalue)))] + for name in dir(evalue): + if name[:1] == '_': continue + value = pydoc.html.repr(getattr(evalue, name)) + exception.append('\n
%s%s =\n%s' % (indent, name, value)) + + return head + ''.join(frames) + ''.join(exception) + ''' + + + +''' % pydoc.html.escape( + ''.join(traceback.format_exception(etype, evalue, etb))) + +def text(einfo, context=5): + """Return a plain text document describing a given traceback.""" + etype, evalue, etb = einfo + if isinstance(etype, type): + etype = etype.__name__ + pyver = 'Python ' + sys.version.split()[0] + ': ' + sys.executable + date = time.ctime(time.time()) + head = "%s\n%s\n%s\n" % (str(etype), pyver, date) + ''' +A problem occurred in a Python script. Here is the sequence of +function calls leading up to the error, in the order they occurred. +''' + + frames = [] + records = inspect.getinnerframes(etb, context) + for frame, file, lnum, func, lines, index in records: + file = file and os.path.abspath(file) or '?' + args, varargs, varkw, locals = inspect.getargvalues(frame) + call = '' + if func != '?': + call = 'in ' + func + \ + inspect.formatargvalues(args, varargs, varkw, locals, + formatvalue=lambda value: '=' + pydoc.text.repr(value)) + + highlight = {} + def reader(lnum=[lnum]): + highlight[lnum[0]] = 1 + try: return linecache.getline(file, lnum[0]) + finally: lnum[0] += 1 + vars = scanvars(reader, frame, locals) + + rows = [' %s %s' % (file, call)] + if index is not None: + i = lnum - index + for line in lines: + num = '%5d ' % i + rows.append(num+line.rstrip()) + i += 1 + + done, dump = {}, [] + for name, where, value in vars: + if name in done: continue + done[name] = 1 + if value is not __UNDEF__: + if where == 'global': name = 'global ' + name + elif where != 'local': name = where + name.split('.')[-1] + dump.append('%s = %s' % (name, pydoc.text.repr(value))) + else: + dump.append(name + ' undefined') + + rows.append('\n'.join(dump)) + frames.append('\n%s\n' % '\n'.join(rows)) + + exception = ['%s: %s' % (str(etype), str(evalue))] + for name in dir(evalue): + value = pydoc.text.repr(getattr(evalue, name)) + exception.append('\n%s%s = %s' % (" "*4, name, value)) + + return head + ''.join(frames) + ''.join(exception) + ''' + +The above is a description of an error in a Python program. Here is +the original traceback: + +%s +''' % ''.join(traceback.format_exception(etype, evalue, etb)) + +class Hook: + """A hook to replace sys.excepthook that shows tracebacks in HTML.""" + + def __init__(self, display=1, logdir=None, context=5, file=None, + format="html"): + self.display = display # send tracebacks to browser if true + self.logdir = logdir # log tracebacks to files if not None + self.context = context # number of source code lines per frame + self.file = file or sys.stdout # place to send the output + self.format = format + + def __call__(self, etype, evalue, etb): + self.handle((etype, evalue, etb)) + + def handle(self, info=None): + info = info or sys.exc_info() + if self.format == "html": + self.file.write(reset()) + + formatter = (self.format=="html") and html or text + plain = False + try: + doc = formatter(info, self.context) + except: # just in case something goes wrong + doc = ''.join(traceback.format_exception(*info)) + plain = True + + if self.display: + if plain: + doc = doc.replace('&', '&').replace('<', '<') + self.file.write('

' + doc + '
\n') + else: + self.file.write(doc + '\n') + else: + self.file.write('

A problem occurred in a Python script.\n') + + if self.logdir is not None: + suffix = ['.txt', '.html'][self.format=="html"] + (fd, path) = tempfile.mkstemp(suffix=suffix, dir=self.logdir) + + try: + file = os.fdopen(fd, 'w') + file.write(doc) + file.close() + msg = '%s contains the description of this error.' % path + except: + msg = 'Tried to save traceback to %s, but failed.' % path + + if self.format == 'html': + self.file.write('

%s

\n' % msg) + else: + self.file.write(msg + '\n') + try: + self.file.flush() + except: pass + +handler = Hook().handle +def enable(display=1, logdir=None, context=5, format="html"): + """Install an exception handler that formats tracebacks as HTML. + + The optional argument 'display' can be set to 0 to suppress sending the + traceback to the browser, and 'logdir' can be set to a directory to cause + tracebacks to be written to files there.""" + sys.excepthook = Hook(display=display, logdir=logdir, + context=context, format=format) diff --git a/ouroboros/chunk.py b/ouroboros/chunk.py new file mode 100644 index 0000000..dc90a75 --- /dev/null +++ b/ouroboros/chunk.py @@ -0,0 +1,167 @@ +"""Simple class to read IFF chunks. + +An IFF chunk (used in formats such as AIFF, TIFF, RMFF (RealMedia File +Format)) has the following structure: + ++----------------+ +| ID (4 bytes) | ++----------------+ +| size (4 bytes) | ++----------------+ +| data | +| ... | ++----------------+ + +The ID is a 4-byte string which identifies the type of chunk. + +The size field (a 32-bit value, encoded using big-endian byte order) +gives the size of the whole chunk, including the 8-byte header. + +Usually an IFF-type file consists of one or more chunks. The proposed +usage of the Chunk class defined here is to instantiate an instance at +the start of each chunk and read from the instance until it reaches +the end, after which a new instance can be instantiated. At the end +of the file, creating a new instance will fail with a EOFError +exception. + +Usage: +while True: + try: + chunk = Chunk(file) + except EOFError: + break + chunktype = chunk.getname() + while True: + data = chunk.read(nbytes) + if not data: + pass + # do something with data + +The interface is file-like. The implemented methods are: +read, close, seek, tell, isatty. +Extra methods are: skip() (called by close, skips to the end of the chunk), +getname() (returns the name (ID) of the chunk) + +The __init__ method has one required argument, a file-like object +(including a chunk instance), and one optional argument, a flag which +specifies whether or not chunks are aligned on 2-byte boundaries. The +default is 1, i.e. aligned. +""" + +class Chunk: + def __init__(self, file, align=True, bigendian=True, inclheader=False): + import struct + self.closed = False + self.align = align # whether to align to word (2-byte) boundaries + if bigendian: + strflag = '>' + else: + strflag = '<' + self.file = file + self.chunkname = file.read(4) + if len(self.chunkname) < 4: + raise EOFError + try: + self.chunksize = struct.unpack_from(strflag+'L', file.read(4))[0] + except struct.error: + raise EOFError + if inclheader: + self.chunksize = self.chunksize - 8 # subtract header + self.size_read = 0 + try: + self.offset = self.file.tell() + except (AttributeError, OSError): + self.seekable = False + else: + self.seekable = True + + def getname(self): + """Return the name (ID) of the current chunk.""" + return self.chunkname + + def getsize(self): + """Return the size of the current chunk.""" + return self.chunksize + + def close(self): + if not self.closed: + self.skip() + self.closed = True + + def isatty(self): + if self.closed: + raise ValueError("I/O operation on closed file") + return False + + def seek(self, pos, whence=0): + """Seek to specified position into the chunk. + Default position is 0 (start of chunk). + If the file is not seekable, this will result in an error. + """ + + if self.closed: + raise ValueError("I/O operation on closed file") + if not self.seekable: + raise OSError("cannot seek") + if whence == 1: + pos = pos + self.size_read + elif whence == 2: + pos = pos + self.chunksize + if pos < 0 or pos > self.chunksize: + raise RuntimeError + self.file.seek(self.offset + pos, 0) + self.size_read = pos + + def tell(self): + if self.closed: + raise ValueError("I/O operation on closed file") + return self.size_read + + def read(self, size=-1): + """Read at most size bytes from the chunk. + If size is omitted or negative, read until the end + of the chunk. + """ + + if self.closed: + raise ValueError("I/O operation on closed file") + if self.size_read >= self.chunksize: + return '' + if size < 0: + size = self.chunksize - self.size_read + if size > self.chunksize - self.size_read: + size = self.chunksize - self.size_read + data = self.file.read(size) + self.size_read = self.size_read + len(data) + if self.size_read == self.chunksize and \ + self.align and \ + (self.chunksize & 1): + dummy = self.file.read(1) + self.size_read = self.size_read + len(dummy) + return data + + def skip(self): + """Skip the rest of the chunk. + If you are not interested in the contents of the chunk, + this method should be called so that the file points to + the start of the next chunk. + """ + + if self.closed: + raise ValueError("I/O operation on closed file") + if self.seekable: + try: + n = self.chunksize - self.size_read + # maybe fix alignment + if self.align and (self.chunksize & 1): + n = n + 1 + self.file.seek(n, 1) + self.size_read = self.size_read + n + return + except OSError: + pass + while self.size_read < self.chunksize: + n = min(8192, self.chunksize - self.size_read) + dummy = self.read(n) + if not dummy: + raise EOFError diff --git a/ouroboros/cmd.py b/ouroboros/cmd.py new file mode 100644 index 0000000..859e910 --- /dev/null +++ b/ouroboros/cmd.py @@ -0,0 +1,401 @@ +"""A generic class to build line-oriented command interpreters. + +Interpreters constructed with this class obey the following conventions: + +1. End of file on input is processed as the command 'EOF'. +2. A command is parsed out of each line by collecting the prefix composed + of characters in the identchars member. +3. A command `foo' is dispatched to a method 'do_foo()'; the do_ method + is passed a single argument consisting of the remainder of the line. +4. Typing an empty line repeats the last command. (Actually, it calls the + method `emptyline', which may be overridden in a subclass.) +5. There is a predefined `help' method. Given an argument `topic', it + calls the command `help_topic'. With no arguments, it lists all topics + with defined help_ functions, broken into up to three topics; documented + commands, miscellaneous help topics, and undocumented commands. +6. The command '?' is a synonym for `help'. The command '!' is a synonym + for `shell', if a do_shell method exists. +7. If completion is enabled, completing commands will be done automatically, + and completing of commands args is done by calling complete_foo() with + arguments text, line, begidx, endidx. text is string we are matching + against, all returned matches must begin with it. line is the current + input line (lstripped), begidx and endidx are the beginning and end + indexes of the text being matched, which could be used to provide + different completion depending upon which position the argument is in. + +The `default' method may be overridden to intercept commands for which there +is no do_ method. + +The `completedefault' method may be overridden to intercept completions for +commands that have no complete_ method. + +The data member `self.ruler' sets the character used to draw separator lines +in the help messages. If empty, no ruler line is drawn. It defaults to "=". + +If the value of `self.intro' is nonempty when the cmdloop method is called, +it is printed out on interpreter startup. This value may be overridden +via an optional argument to the cmdloop() method. + +The data members `self.doc_header', `self.misc_header', and +`self.undoc_header' set the headers used for the help function's +listings of documented functions, miscellaneous topics, and undocumented +functions respectively. +""" + +import string, sys + +__all__ = ["Cmd"] + +PROMPT = '(Cmd) ' +IDENTCHARS = string.ascii_letters + string.digits + '_' + +class Cmd: + """A simple framework for writing line-oriented command interpreters. + + These are often useful for test harnesses, administrative tools, and + prototypes that will later be wrapped in a more sophisticated interface. + + A Cmd instance or subclass instance is a line-oriented interpreter + framework. There is no good reason to instantiate Cmd itself; rather, + it's useful as a superclass of an interpreter class you define yourself + in order to inherit Cmd's methods and encapsulate action methods. + + """ + prompt = PROMPT + identchars = IDENTCHARS + ruler = '=' + lastcmd = '' + intro = None + doc_leader = "" + doc_header = "Documented commands (type help ):" + misc_header = "Miscellaneous help topics:" + undoc_header = "Undocumented commands:" + nohelp = "*** No help on %s" + use_rawinput = 1 + + def __init__(self, completekey='tab', stdin=None, stdout=None): + """Instantiate a line-oriented interpreter framework. + + The optional argument 'completekey' is the readline name of a + completion key; it defaults to the Tab key. If completekey is + not None and the readline module is available, command completion + is done automatically. The optional arguments stdin and stdout + specify alternate input and output file objects; if not specified, + sys.stdin and sys.stdout are used. + + """ + if stdin is not None: + self.stdin = stdin + else: + self.stdin = sys.stdin + if stdout is not None: + self.stdout = stdout + else: + self.stdout = sys.stdout + self.cmdqueue = [] + self.completekey = completekey + + def cmdloop(self, intro=None): + """Repeatedly issue a prompt, accept input, parse an initial prefix + off the received input, and dispatch to action methods, passing them + the remainder of the line as argument. + + """ + + self.preloop() + if self.use_rawinput and self.completekey: + try: + import readline + self.old_completer = readline.get_completer() + readline.set_completer(self.complete) + readline.parse_and_bind(self.completekey+": complete") + except ImportError: + pass + try: + if intro is not None: + self.intro = intro + if self.intro: + self.stdout.write(str(self.intro)+"\n") + stop = None + while not stop: + if self.cmdqueue: + line = self.cmdqueue.pop(0) + else: + if self.use_rawinput: + try: + line = input(self.prompt) + except EOFError: + line = 'EOF' + else: + self.stdout.write(self.prompt) + self.stdout.flush() + line = self.stdin.readline() + if not len(line): + line = 'EOF' + else: + line = line.rstrip('\r\n') + line = self.precmd(line) + stop = self.onecmd(line) + stop = self.postcmd(stop, line) + self.postloop() + finally: + if self.use_rawinput and self.completekey: + try: + import readline + readline.set_completer(self.old_completer) + except ImportError: + pass + + + def precmd(self, line): + """Hook method executed just before the command line is + interpreted, but after the input prompt is generated and issued. + + """ + return line + + def postcmd(self, stop, line): + """Hook method executed just after a command dispatch is finished.""" + return stop + + def preloop(self): + """Hook method executed once when the cmdloop() method is called.""" + pass + + def postloop(self): + """Hook method executed once when the cmdloop() method is about to + return. + + """ + pass + + def parseline(self, line): + """Parse the line into a command name and a string containing + the arguments. Returns a tuple containing (command, args, line). + 'command' and 'args' may be None if the line couldn't be parsed. + """ + line = line.strip() + if not line: + return None, None, line + elif line[0] == '?': + line = 'help ' + line[1:] + elif line[0] == '!': + if hasattr(self, 'do_shell'): + line = 'shell ' + line[1:] + else: + return None, None, line + i, n = 0, len(line) + while i < n and line[i] in self.identchars: i = i+1 + cmd, arg = line[:i], line[i:].strip() + return cmd, arg, line + + def onecmd(self, line): + """Interpret the argument as though it had been typed in response + to the prompt. + + This may be overridden, but should not normally need to be; + see the precmd() and postcmd() methods for useful execution hooks. + The return value is a flag indicating whether interpretation of + commands by the interpreter should stop. + + """ + cmd, arg, line = self.parseline(line) + if not line: + return self.emptyline() + if cmd is None: + return self.default(line) + self.lastcmd = line + if line == 'EOF' : + self.lastcmd = '' + if cmd == '': + return self.default(line) + else: + try: + func = getattr(self, 'do_' + cmd) + except AttributeError: + return self.default(line) + return func(arg) + + def emptyline(self): + """Called when an empty line is entered in response to the prompt. + + If this method is not overridden, it repeats the last nonempty + command entered. + + """ + if self.lastcmd: + return self.onecmd(self.lastcmd) + + def default(self, line): + """Called on an input line when the command prefix is not recognized. + + If this method is not overridden, it prints an error message and + returns. + + """ + self.stdout.write('*** Unknown syntax: %s\n'%line) + + def completedefault(self, *ignored): + """Method called to complete an input line when no command-specific + complete_*() method is available. + + By default, it returns an empty list. + + """ + return [] + + def completenames(self, text, *ignored): + dotext = 'do_'+text + return [a[3:] for a in self.get_names() if a.startswith(dotext)] + + def complete(self, text, state): + """Return the next possible completion for 'text'. + + If a command has not been entered, then complete against command list. + Otherwise try to call complete_ to get list of completions. + """ + if state == 0: + import readline + origline = readline.get_line_buffer() + line = origline.lstrip() + stripped = len(origline) - len(line) + begidx = readline.get_begidx() - stripped + endidx = readline.get_endidx() - stripped + if begidx>0: + cmd, args, foo = self.parseline(line) + if cmd == '': + compfunc = self.completedefault + else: + try: + compfunc = getattr(self, 'complete_' + cmd) + except AttributeError: + compfunc = self.completedefault + else: + compfunc = self.completenames + self.completion_matches = compfunc(text, line, begidx, endidx) + try: + return self.completion_matches[state] + except IndexError: + return None + + def get_names(self): + # This method used to pull in base class attributes + # at a time dir() didn't do it yet. + return dir(self.__class__) + + def complete_help(self, *args): + commands = set(self.completenames(*args)) + topics = set(a[5:] for a in self.get_names() + if a.startswith('help_' + args[0])) + return list(commands | topics) + + def do_help(self, arg): + 'List available commands with "help" or detailed help with "help cmd".' + if arg: + # XXX check arg syntax + try: + func = getattr(self, 'help_' + arg) + except AttributeError: + try: + doc=getattr(self, 'do_' + arg).__doc__ + if doc: + self.stdout.write("%s\n"%str(doc)) + return + except AttributeError: + pass + self.stdout.write("%s\n"%str(self.nohelp % (arg,))) + return + func() + else: + names = self.get_names() + cmds_doc = [] + cmds_undoc = [] + help = {} + for name in names: + if name[:5] == 'help_': + help[name[5:]]=1 + names.sort() + # There can be duplicates if routines overridden + prevname = '' + for name in names: + if name[:3] == 'do_': + if name == prevname: + continue + prevname = name + cmd=name[3:] + if cmd in help: + cmds_doc.append(cmd) + del help[cmd] + elif getattr(self, name).__doc__: + cmds_doc.append(cmd) + else: + cmds_undoc.append(cmd) + self.stdout.write("%s\n"%str(self.doc_leader)) + self.print_topics(self.doc_header, cmds_doc, 15,80) + self.print_topics(self.misc_header, list(help.keys()),15,80) + self.print_topics(self.undoc_header, cmds_undoc, 15,80) + + def print_topics(self, header, cmds, cmdlen, maxcol): + if cmds: + self.stdout.write("%s\n"%str(header)) + if self.ruler: + self.stdout.write("%s\n"%str(self.ruler * len(header))) + self.columnize(cmds, maxcol-1) + self.stdout.write("\n") + + def columnize(self, list, displaywidth=80): + """Display a list of strings as a compact set of columns. + + Each column is only as wide as necessary. + Columns are separated by two spaces (one was not legible enough). + """ + if not list: + self.stdout.write("\n") + return + + nonstrings = [i for i in range(len(list)) + if not isinstance(list[i], str)] + if nonstrings: + raise TypeError("list[i] not a string for i in %s" + % ", ".join(map(str, nonstrings))) + size = len(list) + if size == 1: + self.stdout.write('%s\n'%str(list[0])) + return + # Try every row count from 1 upwards + for nrows in range(1, len(list)): + ncols = (size+nrows-1) // nrows + colwidths = [] + totwidth = -2 + for col in range(ncols): + colwidth = 0 + for row in range(nrows): + i = row + nrows*col + if i >= size: + break + x = list[i] + colwidth = max(colwidth, len(x)) + colwidths.append(colwidth) + totwidth += colwidth + 2 + if totwidth > displaywidth: + break + if totwidth <= displaywidth: + break + else: + nrows = len(list) + ncols = 1 + colwidths = [0] + for row in range(nrows): + texts = [] + for col in range(ncols): + i = row + nrows*col + if i >= size: + x = "" + else: + x = list[i] + texts.append(x) + while texts and not texts[-1]: + del texts[-1] + for col in range(len(texts)): + texts[col] = texts[col].ljust(colwidths[col]) + self.stdout.write("%s\n"%str(" ".join(texts))) diff --git a/ouroboros/code.py b/ouroboros/code.py new file mode 100644 index 0000000..f8184b6 --- /dev/null +++ b/ouroboros/code.py @@ -0,0 +1,302 @@ +"""Utilities needed to emulate Python's interactive interpreter. + +""" + +# Inspired by similar code by Jeff Epler and Fredrik Lundh. + + +import sys +import traceback +from codeop import CommandCompiler, compile_command + +__all__ = ["InteractiveInterpreter", "InteractiveConsole", "interact", + "compile_command"] + +class InteractiveInterpreter: + """Base class for InteractiveConsole. + + This class deals with parsing and interpreter state (the user's + namespace); it doesn't deal with input buffering or prompting or + input file naming (the filename is always passed in explicitly). + + """ + + def __init__(self, locals=None): + """Constructor. + + The optional 'locals' argument specifies the dictionary in + which code will be executed; it defaults to a newly created + dictionary with key "__name__" set to "__console__" and key + "__doc__" set to None. + + """ + if locals is None: + locals = {"__name__": "__console__", "__doc__": None} + self.locals = locals + self.compile = CommandCompiler() + + def runsource(self, source, filename="", symbol="single"): + """Compile and run some source in the interpreter. + + Arguments are as for compile_command(). + + One several things can happen: + + 1) The input is incorrect; compile_command() raised an + exception (SyntaxError or OverflowError). A syntax traceback + will be printed by calling the showsyntaxerror() method. + + 2) The input is incomplete, and more input is required; + compile_command() returned None. Nothing happens. + + 3) The input is complete; compile_command() returned a code + object. The code is executed by calling self.runcode() (which + also handles run-time exceptions, except for SystemExit). + + The return value is True in case 2, False in the other cases (unless + an exception is raised). The return value can be used to + decide whether to use sys.ps1 or sys.ps2 to prompt the next + line. + + """ + try: + code = self.compile(source, filename, symbol) + except (OverflowError, SyntaxError, ValueError): + # Case 1 + self.showsyntaxerror(filename) + return False + + if code is None: + # Case 2 + return True + + # Case 3 + self.runcode(code) + return False + + def runcode(self, code): + """Execute a code object. + + When an exception occurs, self.showtraceback() is called to + display a traceback. All exceptions are caught except + SystemExit, which is reraised. + + A note about KeyboardInterrupt: this exception may occur + elsewhere in this code, and may not always be caught. The + caller should be prepared to deal with it. + + """ + try: + exec(code, self.locals) + except SystemExit: + raise + except: + self.showtraceback() + + def showsyntaxerror(self, filename=None): + """Display the syntax error that just occurred. + + This doesn't display a stack trace because there isn't one. + + If a filename is given, it is stuffed in the exception instead + of what was there before (because Python's parser always uses + "" when reading from a string). + + The output is written by self.write(), below. + + """ + type, value, tb = sys.exc_info() + sys.last_type = type + sys.last_value = value + sys.last_traceback = tb + if filename and type is SyntaxError: + # Work hard to stuff the correct filename in the exception + try: + msg, (dummy_filename, lineno, offset, line) = value.args + except ValueError: + # Not the format we expect; leave it alone + pass + else: + # Stuff in the right filename + value = SyntaxError(msg, (filename, lineno, offset, line)) + sys.last_value = value + if sys.excepthook is sys.__excepthook__: + lines = traceback.format_exception_only(type, value) + self.write(''.join(lines)) + else: + # If someone has set sys.excepthook, we let that take precedence + # over self.write + sys.excepthook(type, value, tb) + + def showtraceback(self): + """Display the exception that just occurred. + + We remove the first stack item because it is our own code. + + The output is written by self.write(), below. + + """ + try: + type, value, tb = sys.exc_info() + sys.last_type = type + sys.last_value = value + sys.last_traceback = tb + tblist = traceback.extract_tb(tb) + del tblist[:1] + lines = traceback.format_list(tblist) + if lines: + lines.insert(0, "Traceback (most recent call last):\n") + lines.extend(traceback.format_exception_only(type, value)) + finally: + tblist = tb = None + if sys.excepthook is sys.__excepthook__: + self.write(''.join(lines)) + else: + # If someone has set sys.excepthook, we let that take precedence + # over self.write + sys.excepthook(type, value, tb) + + def write(self, data): + """Write a string. + + The base implementation writes to sys.stderr; a subclass may + replace this with a different implementation. + + """ + sys.stderr.write(data) + + +class InteractiveConsole(InteractiveInterpreter): + """Closely emulate the behavior of the interactive Python interpreter. + + This class builds on InteractiveInterpreter and adds prompting + using the familiar sys.ps1 and sys.ps2, and input buffering. + + """ + + def __init__(self, locals=None, filename=""): + """Constructor. + + The optional locals argument will be passed to the + InteractiveInterpreter base class. + + The optional filename argument should specify the (file)name + of the input stream; it will show up in tracebacks. + + """ + InteractiveInterpreter.__init__(self, locals) + self.filename = filename + self.resetbuffer() + + def resetbuffer(self): + """Reset the input buffer.""" + self.buffer = [] + + def interact(self, banner=None): + """Closely emulate the interactive Python console. + + The optional banner argument specifies the banner to print + before the first interaction; by default it prints a banner + similar to the one printed by the real Python interpreter, + followed by the current class name in parentheses (so as not + to confuse this with the real interpreter -- since it's so + close!). + + """ + try: + sys.ps1 + except AttributeError: + sys.ps1 = ">>> " + try: + sys.ps2 + except AttributeError: + sys.ps2 = "... " + cprt = 'Type "help", "copyright", "credits" or "license" for more information.' + if banner is None: + self.write("Python %s on %s\n%s\n(%s)\n" % + (sys.version, sys.platform, cprt, + self.__class__.__name__)) + elif banner: + self.write("%s\n" % str(banner)) + more = 0 + while 1: + try: + if more: + prompt = sys.ps2 + else: + prompt = sys.ps1 + try: + line = self.raw_input(prompt) + except EOFError: + self.write("\n") + break + else: + more = self.push(line) + except KeyboardInterrupt: + self.write("\nKeyboardInterrupt\n") + self.resetbuffer() + more = 0 + + def push(self, line): + """Push a line to the interpreter. + + The line should not have a trailing newline; it may have + internal newlines. The line is appended to a buffer and the + interpreter's runsource() method is called with the + concatenated contents of the buffer as source. If this + indicates that the command was executed or invalid, the buffer + is reset; otherwise, the command is incomplete, and the buffer + is left as it was after the line was appended. The return + value is 1 if more input is required, 0 if the line was dealt + with in some way (this is the same as runsource()). + + """ + self.buffer.append(line) + source = "\n".join(self.buffer) + more = self.runsource(source, self.filename) + if not more: + self.resetbuffer() + return more + + def raw_input(self, prompt=""): + """Write a prompt and read a line. + + The returned line does not include the trailing newline. + When the user enters the EOF key sequence, EOFError is raised. + + The base implementation uses the built-in function + input(); a subclass may replace this with a different + implementation. + + """ + return input(prompt) + + + +def interact(banner=None, readfunc=None, local=None): + """Closely emulate the interactive Python interpreter. + + This is a backwards compatible interface to the InteractiveConsole + class. When readfunc is not specified, it attempts to import the + readline module to enable GNU readline if it is available. + + Arguments (all optional, all default to None): + + banner -- passed to InteractiveConsole.interact() + readfunc -- if not None, replaces InteractiveConsole.raw_input() + local -- passed to InteractiveInterpreter.__init__() + + """ + console = InteractiveConsole(local) + if readfunc is not None: + console.raw_input = readfunc + else: + try: + import readline + except ImportError: + pass + console.interact(banner) + + +if __name__ == "__main__": + interact() diff --git a/ouroboros/codecs.py b/ouroboros/codecs.py new file mode 100644 index 0000000..c2065da --- /dev/null +++ b/ouroboros/codecs.py @@ -0,0 +1,1110 @@ +""" codecs -- Python Codec Registry, API and helpers. + + +Written by Marc-Andre Lemburg (mal@lemburg.com). + +(c) Copyright CNRI, All Rights Reserved. NO WARRANTY. + +"""#" + +import builtins, sys + +### Registry and builtin stateless codec functions + +try: + from _codecs import * +except ImportError as why: + raise SystemError('Failed to load the builtin codecs: %s' % why) + +__all__ = ["register", "lookup", "open", "EncodedFile", "BOM", "BOM_BE", + "BOM_LE", "BOM32_BE", "BOM32_LE", "BOM64_BE", "BOM64_LE", + "BOM_UTF8", "BOM_UTF16", "BOM_UTF16_LE", "BOM_UTF16_BE", + "BOM_UTF32", "BOM_UTF32_LE", "BOM_UTF32_BE", + "strict_errors", "ignore_errors", "replace_errors", + "xmlcharrefreplace_errors", + "register_error", "lookup_error"] + +### Constants + +# +# Byte Order Mark (BOM = ZERO WIDTH NO-BREAK SPACE = U+FEFF) +# and its possible byte string values +# for UTF8/UTF16/UTF32 output and little/big endian machines +# + +# UTF-8 +BOM_UTF8 = b'\xef\xbb\xbf' + +# UTF-16, little endian +BOM_LE = BOM_UTF16_LE = b'\xff\xfe' + +# UTF-16, big endian +BOM_BE = BOM_UTF16_BE = b'\xfe\xff' + +# UTF-32, little endian +BOM_UTF32_LE = b'\xff\xfe\x00\x00' + +# UTF-32, big endian +BOM_UTF32_BE = b'\x00\x00\xfe\xff' + +if sys.byteorder == 'little': + + # UTF-16, native endianness + BOM = BOM_UTF16 = BOM_UTF16_LE + + # UTF-32, native endianness + BOM_UTF32 = BOM_UTF32_LE + +else: + + # UTF-16, native endianness + BOM = BOM_UTF16 = BOM_UTF16_BE + + # UTF-32, native endianness + BOM_UTF32 = BOM_UTF32_BE + +# Old broken names (don't use in new code) +BOM32_LE = BOM_UTF16_LE +BOM32_BE = BOM_UTF16_BE +BOM64_LE = BOM_UTF32_LE +BOM64_BE = BOM_UTF32_BE + + +### Codec base classes (defining the API) + +class CodecInfo(tuple): + """Codec details when looking up the codec registry""" + + # Private API to allow Python 3.4 to blacklist the known non-Unicode + # codecs in the standard library. A more general mechanism to + # reliably distinguish test encodings from other codecs will hopefully + # be defined for Python 3.5 + # + # See http://bugs.python.org/issue19619 + _is_text_encoding = True # Assume codecs are text encodings by default + + def __new__(cls, encode, decode, streamreader=None, streamwriter=None, + incrementalencoder=None, incrementaldecoder=None, name=None, + *, _is_text_encoding=None): + self = tuple.__new__(cls, (encode, decode, streamreader, streamwriter)) + self.name = name + self.encode = encode + self.decode = decode + self.incrementalencoder = incrementalencoder + self.incrementaldecoder = incrementaldecoder + self.streamwriter = streamwriter + self.streamreader = streamreader + if _is_text_encoding is not None: + self._is_text_encoding = _is_text_encoding + return self + + def __repr__(self): + return "<%s.%s object for encoding %s at 0x%x>" % \ + (self.__class__.__module__, self.__class__.__name__, + self.name, id(self)) + +class Codec: + + """ Defines the interface for stateless encoders/decoders. + + The .encode()/.decode() methods may use different error + handling schemes by providing the errors argument. These + string values are predefined: + + 'strict' - raise a ValueError error (or a subclass) + 'ignore' - ignore the character and continue with the next + 'replace' - replace with a suitable replacement character; + Python will use the official U+FFFD REPLACEMENT + CHARACTER for the builtin Unicode codecs on + decoding and '?' on encoding. + 'surrogateescape' - replace with private codepoints U+DCnn. + 'xmlcharrefreplace' - Replace with the appropriate XML + character reference (only for encoding). + 'backslashreplace' - Replace with backslashed escape sequences + (only for encoding). + + The set of allowed values can be extended via register_error. + + """ + def encode(self, input, errors='strict'): + + """ Encodes the object input and returns a tuple (output + object, length consumed). + + errors defines the error handling to apply. It defaults to + 'strict' handling. + + The method may not store state in the Codec instance. Use + StreamCodec for codecs which have to keep state in order to + make encoding/decoding efficient. + + The encoder must be able to handle zero length input and + return an empty object of the output object type in this + situation. + + """ + raise NotImplementedError + + def decode(self, input, errors='strict'): + + """ Decodes the object input and returns a tuple (output + object, length consumed). + + input must be an object which provides the bf_getreadbuf + buffer slot. Python strings, buffer objects and memory + mapped files are examples of objects providing this slot. + + errors defines the error handling to apply. It defaults to + 'strict' handling. + + The method may not store state in the Codec instance. Use + StreamCodec for codecs which have to keep state in order to + make encoding/decoding efficient. + + The decoder must be able to handle zero length input and + return an empty object of the output object type in this + situation. + + """ + raise NotImplementedError + +class IncrementalEncoder(object): + """ + An IncrementalEncoder encodes an input in multiple steps. The input can + be passed piece by piece to the encode() method. The IncrementalEncoder + remembers the state of the encoding process between calls to encode(). + """ + def __init__(self, errors='strict'): + """ + Creates an IncrementalEncoder instance. + + The IncrementalEncoder may use different error handling schemes by + providing the errors keyword argument. See the module docstring + for a list of possible values. + """ + self.errors = errors + self.buffer = "" + + def encode(self, input, final=False): + """ + Encodes input and returns the resulting object. + """ + raise NotImplementedError + + def reset(self): + """ + Resets the encoder to the initial state. + """ + + def getstate(self): + """ + Return the current state of the encoder. + """ + return 0 + + def setstate(self, state): + """ + Set the current state of the encoder. state must have been + returned by getstate(). + """ + +class BufferedIncrementalEncoder(IncrementalEncoder): + """ + This subclass of IncrementalEncoder can be used as the baseclass for an + incremental encoder if the encoder must keep some of the output in a + buffer between calls to encode(). + """ + def __init__(self, errors='strict'): + IncrementalEncoder.__init__(self, errors) + # unencoded input that is kept between calls to encode() + self.buffer = "" + + def _buffer_encode(self, input, errors, final): + # Overwrite this method in subclasses: It must encode input + # and return an (output, length consumed) tuple + raise NotImplementedError + + def encode(self, input, final=False): + # encode input (taking the buffer into account) + data = self.buffer + input + (result, consumed) = self._buffer_encode(data, self.errors, final) + # keep unencoded input until the next call + self.buffer = data[consumed:] + return result + + def reset(self): + IncrementalEncoder.reset(self) + self.buffer = "" + + def getstate(self): + return self.buffer or 0 + + def setstate(self, state): + self.buffer = state or "" + +class IncrementalDecoder(object): + """ + An IncrementalDecoder decodes an input in multiple steps. The input can + be passed piece by piece to the decode() method. The IncrementalDecoder + remembers the state of the decoding process between calls to decode(). + """ + def __init__(self, errors='strict'): + """ + Create a IncrementalDecoder instance. + + The IncrementalDecoder may use different error handling schemes by + providing the errors keyword argument. See the module docstring + for a list of possible values. + """ + self.errors = errors + + def decode(self, input, final=False): + """ + Decode input and returns the resulting object. + """ + raise NotImplementedError + + def reset(self): + """ + Reset the decoder to the initial state. + """ + + def getstate(self): + """ + Return the current state of the decoder. + + This must be a (buffered_input, additional_state_info) tuple. + buffered_input must be a bytes object containing bytes that + were passed to decode() that have not yet been converted. + additional_state_info must be a non-negative integer + representing the state of the decoder WITHOUT yet having + processed the contents of buffered_input. In the initial state + and after reset(), getstate() must return (b"", 0). + """ + return (b"", 0) + + def setstate(self, state): + """ + Set the current state of the decoder. + + state must have been returned by getstate(). The effect of + setstate((b"", 0)) must be equivalent to reset(). + """ + +class BufferedIncrementalDecoder(IncrementalDecoder): + """ + This subclass of IncrementalDecoder can be used as the baseclass for an + incremental decoder if the decoder must be able to handle incomplete + byte sequences. + """ + def __init__(self, errors='strict'): + IncrementalDecoder.__init__(self, errors) + # undecoded input that is kept between calls to decode() + self.buffer = b"" + + def _buffer_decode(self, input, errors, final): + # Overwrite this method in subclasses: It must decode input + # and return an (output, length consumed) tuple + raise NotImplementedError + + def decode(self, input, final=False): + # decode input (taking the buffer into account) + data = self.buffer + input + (result, consumed) = self._buffer_decode(data, self.errors, final) + # keep undecoded input until the next call + self.buffer = data[consumed:] + return result + + def reset(self): + IncrementalDecoder.reset(self) + self.buffer = b"" + + def getstate(self): + # additional state info is always 0 + return (self.buffer, 0) + + def setstate(self, state): + # ignore additional state info + self.buffer = state[0] + +# +# The StreamWriter and StreamReader class provide generic working +# interfaces which can be used to implement new encoding submodules +# very easily. See encodings/utf_8.py for an example on how this is +# done. +# + +class StreamWriter(Codec): + + def __init__(self, stream, errors='strict'): + + """ Creates a StreamWriter instance. + + stream must be a file-like object open for writing + (binary) data. + + The StreamWriter may use different error handling + schemes by providing the errors keyword argument. These + parameters are predefined: + + 'strict' - raise a ValueError (or a subclass) + 'ignore' - ignore the character and continue with the next + 'replace'- replace with a suitable replacement character + 'xmlcharrefreplace' - Replace with the appropriate XML + character reference. + 'backslashreplace' - Replace with backslashed escape + sequences (only for encoding). + + The set of allowed parameter values can be extended via + register_error. + """ + self.stream = stream + self.errors = errors + + def write(self, object): + + """ Writes the object's contents encoded to self.stream. + """ + data, consumed = self.encode(object, self.errors) + self.stream.write(data) + + def writelines(self, list): + + """ Writes the concatenated list of strings to the stream + using .write(). + """ + self.write(''.join(list)) + + def reset(self): + + """ Flushes and resets the codec buffers used for keeping state. + + Calling this method should ensure that the data on the + output is put into a clean state, that allows appending + of new fresh data without having to rescan the whole + stream to recover state. + + """ + pass + + def seek(self, offset, whence=0): + self.stream.seek(offset, whence) + if whence == 0 and offset == 0: + self.reset() + + def __getattr__(self, name, + getattr=getattr): + + """ Inherit all other methods from the underlying stream. + """ + return getattr(self.stream, name) + + def __enter__(self): + return self + + def __exit__(self, type, value, tb): + self.stream.close() + +### + +class StreamReader(Codec): + + charbuffertype = str + + def __init__(self, stream, errors='strict'): + + """ Creates a StreamReader instance. + + stream must be a file-like object open for reading + (binary) data. + + The StreamReader may use different error handling + schemes by providing the errors keyword argument. These + parameters are predefined: + + 'strict' - raise a ValueError (or a subclass) + 'ignore' - ignore the character and continue with the next + 'replace'- replace with a suitable replacement character; + + The set of allowed parameter values can be extended via + register_error. + """ + self.stream = stream + self.errors = errors + self.bytebuffer = b"" + self._empty_charbuffer = self.charbuffertype() + self.charbuffer = self._empty_charbuffer + self.linebuffer = None + + def decode(self, input, errors='strict'): + raise NotImplementedError + + def read(self, size=-1, chars=-1, firstline=False): + + """ Decodes data from the stream self.stream and returns the + resulting object. + + chars indicates the number of characters to read from the + stream. read() will never return more than chars + characters, but it might return less, if there are not enough + characters available. + + size indicates the approximate maximum number of bytes to + read from the stream for decoding purposes. The decoder + can modify this setting as appropriate. The default value + -1 indicates to read and decode as much as possible. size + is intended to prevent having to decode huge files in one + step. + + If firstline is true, and a UnicodeDecodeError happens + after the first line terminator in the input only the first line + will be returned, the rest of the input will be kept until the + next call to read(). + + The method should use a greedy read strategy meaning that + it should read as much data as is allowed within the + definition of the encoding and the given size, e.g. if + optional encoding endings or state markers are available + on the stream, these should be read too. + """ + # If we have lines cached, first merge them back into characters + if self.linebuffer: + self.charbuffer = self._empty_charbuffer.join(self.linebuffer) + self.linebuffer = None + + # read until we get the required number of characters (if available) + while True: + # can the request be satisfied from the character buffer? + if chars >= 0: + if len(self.charbuffer) >= chars: + break + elif size >= 0: + if len(self.charbuffer) >= size: + break + # we need more data + if size < 0: + newdata = self.stream.read() + else: + newdata = self.stream.read(size) + # decode bytes (those remaining from the last call included) + data = self.bytebuffer + newdata + if not data: + break + try: + newchars, decodedbytes = self.decode(data, self.errors) + except UnicodeDecodeError as exc: + if firstline: + newchars, decodedbytes = \ + self.decode(data[:exc.start], self.errors) + lines = newchars.splitlines(keepends=True) + if len(lines)<=1: + raise + else: + raise + # keep undecoded bytes until the next call + self.bytebuffer = data[decodedbytes:] + # put new characters in the character buffer + self.charbuffer += newchars + # there was no data available + if not newdata: + break + if chars < 0: + # Return everything we've got + result = self.charbuffer + self.charbuffer = self._empty_charbuffer + else: + # Return the first chars characters + result = self.charbuffer[:chars] + self.charbuffer = self.charbuffer[chars:] + return result + + def readline(self, size=None, keepends=True): + + """ Read one line from the input stream and return the + decoded data. + + size, if given, is passed as size argument to the + read() method. + + """ + # If we have lines cached from an earlier read, return + # them unconditionally + if self.linebuffer: + line = self.linebuffer[0] + del self.linebuffer[0] + if len(self.linebuffer) == 1: + # revert to charbuffer mode; we might need more data + # next time + self.charbuffer = self.linebuffer[0] + self.linebuffer = None + if not keepends: + line = line.splitlines(keepends=False)[0] + return line + + readsize = size or 72 + line = self._empty_charbuffer + # If size is given, we call read() only once + while True: + data = self.read(readsize, firstline=True) + if data: + # If we're at a "\r" read one extra character (which might + # be a "\n") to get a proper line ending. If the stream is + # temporarily exhausted we return the wrong line ending. + if (isinstance(data, str) and data.endswith("\r")) or \ + (isinstance(data, bytes) and data.endswith(b"\r")): + data += self.read(size=1, chars=1) + + line += data + lines = line.splitlines(keepends=True) + if lines: + if len(lines) > 1: + # More than one line result; the first line is a full line + # to return + line = lines[0] + del lines[0] + if len(lines) > 1: + # cache the remaining lines + lines[-1] += self.charbuffer + self.linebuffer = lines + self.charbuffer = None + else: + # only one remaining line, put it back into charbuffer + self.charbuffer = lines[0] + self.charbuffer + if not keepends: + line = line.splitlines(keepends=False)[0] + break + line0withend = lines[0] + line0withoutend = lines[0].splitlines(keepends=False)[0] + if line0withend != line0withoutend: # We really have a line end + # Put the rest back together and keep it until the next call + self.charbuffer = self._empty_charbuffer.join(lines[1:]) + \ + self.charbuffer + if keepends: + line = line0withend + else: + line = line0withoutend + break + # we didn't get anything or this was our only try + if not data or size is not None: + if line and not keepends: + line = line.splitlines(keepends=False)[0] + break + if readsize < 8000: + readsize *= 2 + return line + + def readlines(self, sizehint=None, keepends=True): + + """ Read all lines available on the input stream + and return them as list of lines. + + Line breaks are implemented using the codec's decoder + method and are included in the list entries. + + sizehint, if given, is ignored since there is no efficient + way to finding the true end-of-line. + + """ + data = self.read() + return data.splitlines(keepends) + + def reset(self): + + """ Resets the codec buffers used for keeping state. + + Note that no stream repositioning should take place. + This method is primarily intended to be able to recover + from decoding errors. + + """ + self.bytebuffer = b"" + self.charbuffer = self._empty_charbuffer + self.linebuffer = None + + def seek(self, offset, whence=0): + """ Set the input stream's current position. + + Resets the codec buffers used for keeping state. + """ + self.stream.seek(offset, whence) + self.reset() + + def __next__(self): + + """ Return the next decoded line from the input stream.""" + line = self.readline() + if line: + return line + raise StopIteration + + def __iter__(self): + return self + + def __getattr__(self, name, + getattr=getattr): + + """ Inherit all other methods from the underlying stream. + """ + return getattr(self.stream, name) + + def __enter__(self): + return self + + def __exit__(self, type, value, tb): + self.stream.close() + +### + +class StreamReaderWriter: + + """ StreamReaderWriter instances allow wrapping streams which + work in both read and write modes. + + The design is such that one can use the factory functions + returned by the codec.lookup() function to construct the + instance. + + """ + # Optional attributes set by the file wrappers below + encoding = 'unknown' + + def __init__(self, stream, Reader, Writer, errors='strict'): + + """ Creates a StreamReaderWriter instance. + + stream must be a Stream-like object. + + Reader, Writer must be factory functions or classes + providing the StreamReader, StreamWriter interface resp. + + Error handling is done in the same way as defined for the + StreamWriter/Readers. + + """ + self.stream = stream + self.reader = Reader(stream, errors) + self.writer = Writer(stream, errors) + self.errors = errors + + def read(self, size=-1): + + return self.reader.read(size) + + def readline(self, size=None): + + return self.reader.readline(size) + + def readlines(self, sizehint=None): + + return self.reader.readlines(sizehint) + + def __next__(self): + + """ Return the next decoded line from the input stream.""" + return next(self.reader) + + def __iter__(self): + return self + + def write(self, data): + + return self.writer.write(data) + + def writelines(self, list): + + return self.writer.writelines(list) + + def reset(self): + + self.reader.reset() + self.writer.reset() + + def seek(self, offset, whence=0): + self.stream.seek(offset, whence) + self.reader.reset() + if whence == 0 and offset == 0: + self.writer.reset() + + def __getattr__(self, name, + getattr=getattr): + + """ Inherit all other methods from the underlying stream. + """ + return getattr(self.stream, name) + + # these are needed to make "with codecs.open(...)" work properly + + def __enter__(self): + return self + + def __exit__(self, type, value, tb): + self.stream.close() + +### + +class StreamRecoder: + + """ StreamRecoder instances provide a frontend - backend + view of encoding data. + + They use the complete set of APIs returned by the + codecs.lookup() function to implement their task. + + Data written to the stream is first decoded into an + intermediate format (which is dependent on the given codec + combination) and then written to the stream using an instance + of the provided Writer class. + + In the other direction, data is read from the stream using a + Reader instance and then return encoded data to the caller. + + """ + # Optional attributes set by the file wrappers below + data_encoding = 'unknown' + file_encoding = 'unknown' + + def __init__(self, stream, encode, decode, Reader, Writer, + errors='strict'): + + """ Creates a StreamRecoder instance which implements a two-way + conversion: encode and decode work on the frontend (the + input to .read() and output of .write()) while + Reader and Writer work on the backend (reading and + writing to the stream). + + You can use these objects to do transparent direct + recodings from e.g. latin-1 to utf-8 and back. + + stream must be a file-like object. + + encode, decode must adhere to the Codec interface, Reader, + Writer must be factory functions or classes providing the + StreamReader, StreamWriter interface resp. + + encode and decode are needed for the frontend translation, + Reader and Writer for the backend translation. Unicode is + used as intermediate encoding. + + Error handling is done in the same way as defined for the + StreamWriter/Readers. + + """ + self.stream = stream + self.encode = encode + self.decode = decode + self.reader = Reader(stream, errors) + self.writer = Writer(stream, errors) + self.errors = errors + + def read(self, size=-1): + + data = self.reader.read(size) + data, bytesencoded = self.encode(data, self.errors) + return data + + def readline(self, size=None): + + if size is None: + data = self.reader.readline() + else: + data = self.reader.readline(size) + data, bytesencoded = self.encode(data, self.errors) + return data + + def readlines(self, sizehint=None): + + data = self.reader.read() + data, bytesencoded = self.encode(data, self.errors) + return data.splitlines(keepends=True) + + def __next__(self): + + """ Return the next decoded line from the input stream.""" + data = next(self.reader) + data, bytesencoded = self.encode(data, self.errors) + return data + + def __iter__(self): + return self + + def write(self, data): + + data, bytesdecoded = self.decode(data, self.errors) + return self.writer.write(data) + + def writelines(self, list): + + data = ''.join(list) + data, bytesdecoded = self.decode(data, self.errors) + return self.writer.write(data) + + def reset(self): + + self.reader.reset() + self.writer.reset() + + def __getattr__(self, name, + getattr=getattr): + + """ Inherit all other methods from the underlying stream. + """ + return getattr(self.stream, name) + + def __enter__(self): + return self + + def __exit__(self, type, value, tb): + self.stream.close() + +### Shortcuts + +def open(filename, mode='rb', encoding=None, errors='strict', buffering=1): + + """ Open an encoded file using the given mode and return + a wrapped version providing transparent encoding/decoding. + + Note: The wrapped version will only accept the object format + defined by the codecs, i.e. Unicode objects for most builtin + codecs. Output is also codec dependent and will usually be + Unicode as well. + + Files are always opened in binary mode, even if no binary mode + was specified. This is done to avoid data loss due to encodings + using 8-bit values. The default file mode is 'rb' meaning to + open the file in binary read mode. + + encoding specifies the encoding which is to be used for the + file. + + errors may be given to define the error handling. It defaults + to 'strict' which causes ValueErrors to be raised in case an + encoding error occurs. + + buffering has the same meaning as for the builtin open() API. + It defaults to line buffered. + + The returned wrapped file object provides an extra attribute + .encoding which allows querying the used encoding. This + attribute is only available if an encoding was specified as + parameter. + + """ + if encoding is not None and \ + 'b' not in mode: + # Force opening of the file in binary mode + mode = mode + 'b' + file = builtins.open(filename, mode, buffering) + if encoding is None: + return file + info = lookup(encoding) + srw = StreamReaderWriter(file, info.streamreader, info.streamwriter, errors) + # Add attributes to simplify introspection + srw.encoding = encoding + return srw + +def EncodedFile(file, data_encoding, file_encoding=None, errors='strict'): + + """ Return a wrapped version of file which provides transparent + encoding translation. + + Strings written to the wrapped file are interpreted according + to the given data_encoding and then written to the original + file as string using file_encoding. The intermediate encoding + will usually be Unicode but depends on the specified codecs. + + Strings are read from the file using file_encoding and then + passed back to the caller as string using data_encoding. + + If file_encoding is not given, it defaults to data_encoding. + + errors may be given to define the error handling. It defaults + to 'strict' which causes ValueErrors to be raised in case an + encoding error occurs. + + The returned wrapped file object provides two extra attributes + .data_encoding and .file_encoding which reflect the given + parameters of the same name. The attributes can be used for + introspection by Python programs. + + """ + if file_encoding is None: + file_encoding = data_encoding + data_info = lookup(data_encoding) + file_info = lookup(file_encoding) + sr = StreamRecoder(file, data_info.encode, data_info.decode, + file_info.streamreader, file_info.streamwriter, errors) + # Add attributes to simplify introspection + sr.data_encoding = data_encoding + sr.file_encoding = file_encoding + return sr + +### Helpers for codec lookup + +def getencoder(encoding): + + """ Lookup up the codec for the given encoding and return + its encoder function. + + Raises a LookupError in case the encoding cannot be found. + + """ + return lookup(encoding).encode + +def getdecoder(encoding): + + """ Lookup up the codec for the given encoding and return + its decoder function. + + Raises a LookupError in case the encoding cannot be found. + + """ + return lookup(encoding).decode + +def getincrementalencoder(encoding): + + """ Lookup up the codec for the given encoding and return + its IncrementalEncoder class or factory function. + + Raises a LookupError in case the encoding cannot be found + or the codecs doesn't provide an incremental encoder. + + """ + encoder = lookup(encoding).incrementalencoder + if encoder is None: + raise LookupError(encoding) + return encoder + +def getincrementaldecoder(encoding): + + """ Lookup up the codec for the given encoding and return + its IncrementalDecoder class or factory function. + + Raises a LookupError in case the encoding cannot be found + or the codecs doesn't provide an incremental decoder. + + """ + decoder = lookup(encoding).incrementaldecoder + if decoder is None: + raise LookupError(encoding) + return decoder + +def getreader(encoding): + + """ Lookup up the codec for the given encoding and return + its StreamReader class or factory function. + + Raises a LookupError in case the encoding cannot be found. + + """ + return lookup(encoding).streamreader + +def getwriter(encoding): + + """ Lookup up the codec for the given encoding and return + its StreamWriter class or factory function. + + Raises a LookupError in case the encoding cannot be found. + + """ + return lookup(encoding).streamwriter + +def iterencode(iterator, encoding, errors='strict', **kwargs): + """ + Encoding iterator. + + Encodes the input strings from the iterator using a IncrementalEncoder. + + errors and kwargs are passed through to the IncrementalEncoder + constructor. + """ + encoder = getincrementalencoder(encoding)(errors, **kwargs) + for input in iterator: + output = encoder.encode(input) + if output: + yield output + output = encoder.encode("", True) + if output: + yield output + +def iterdecode(iterator, encoding, errors='strict', **kwargs): + """ + Decoding iterator. + + Decodes the input strings from the iterator using a IncrementalDecoder. + + errors and kwargs are passed through to the IncrementalDecoder + constructor. + """ + decoder = getincrementaldecoder(encoding)(errors, **kwargs) + for input in iterator: + output = decoder.decode(input) + if output: + yield output + output = decoder.decode(b"", True) + if output: + yield output + +### Helpers for charmap-based codecs + +def make_identity_dict(rng): + + """ make_identity_dict(rng) -> dict + + Return a dictionary where elements of the rng sequence are + mapped to themselves. + + """ + return {i:i for i in rng} + +def make_encoding_map(decoding_map): + + """ Creates an encoding map from a decoding map. + + If a target mapping in the decoding map occurs multiple + times, then that target is mapped to None (undefined mapping), + causing an exception when encountered by the charmap codec + during translation. + + One example where this happens is cp875.py which decodes + multiple character to \u001a. + + """ + m = {} + for k,v in decoding_map.items(): + if not v in m: + m[v] = k + else: + m[v] = None + return m + +### error handlers + +try: + strict_errors = lookup_error("strict") + ignore_errors = lookup_error("ignore") + replace_errors = lookup_error("replace") + xmlcharrefreplace_errors = lookup_error("xmlcharrefreplace") + backslashreplace_errors = lookup_error("backslashreplace") +except LookupError: + # In --disable-unicode builds, these error handler are missing + strict_errors = None + ignore_errors = None + replace_errors = None + xmlcharrefreplace_errors = None + backslashreplace_errors = None + +# Tell modulefinder that using codecs probably needs the encodings +# package +_false = 0 +if _false: + import encodings + +### Tests + +if __name__ == '__main__': + + # Make stdout translate Latin-1 output into UTF-8 output + sys.stdout = EncodedFile(sys.stdout, 'latin-1', 'utf-8') + + # Have stdin translate Latin-1 input into UTF-8 input + sys.stdin = EncodedFile(sys.stdin, 'utf-8', 'latin-1') diff --git a/ouroboros/codeop.py b/ouroboros/codeop.py new file mode 100644 index 0000000..fb759da --- /dev/null +++ b/ouroboros/codeop.py @@ -0,0 +1,168 @@ +r"""Utilities to compile possibly incomplete Python source code. + +This module provides two interfaces, broadly similar to the builtin +function compile(), which take program text, a filename and a 'mode' +and: + +- Return code object if the command is complete and valid +- Return None if the command is incomplete +- Raise SyntaxError, ValueError or OverflowError if the command is a + syntax error (OverflowError and ValueError can be produced by + malformed literals). + +Approach: + +First, check if the source consists entirely of blank lines and +comments; if so, replace it with 'pass', because the built-in +parser doesn't always do the right thing for these. + +Compile three times: as is, with \n, and with \n\n appended. If it +compiles as is, it's complete. If it compiles with one \n appended, +we expect more. If it doesn't compile either way, we compare the +error we get when compiling with \n or \n\n appended. If the errors +are the same, the code is broken. But if the errors are different, we +expect more. Not intuitive; not even guaranteed to hold in future +releases; but this matches the compiler's behavior from Python 1.4 +through 2.2, at least. + +Caveat: + +It is possible (but not likely) that the parser stops parsing with a +successful outcome before reaching the end of the source; in this +case, trailing symbols may be ignored instead of causing an error. +For example, a backslash followed by two newlines may be followed by +arbitrary garbage. This will be fixed once the API for the parser is +better. + +The two interfaces are: + +compile_command(source, filename, symbol): + + Compiles a single command in the manner described above. + +CommandCompiler(): + + Instances of this class have __call__ methods identical in + signature to compile_command; the difference is that if the + instance compiles program text containing a __future__ statement, + the instance 'remembers' and compiles all subsequent program texts + with the statement in force. + +The module also provides another class: + +Compile(): + + Instances of this class act like the built-in function compile, + but with 'memory' in the sense described above. +""" + +import __future__ + +_features = [getattr(__future__, fname) + for fname in __future__.all_feature_names] + +__all__ = ["compile_command", "Compile", "CommandCompiler"] + +PyCF_DONT_IMPLY_DEDENT = 0x200 # Matches pythonrun.h + +def _maybe_compile(compiler, source, filename, symbol): + # Check for source consisting of only blank lines and comments + for line in source.split("\n"): + line = line.strip() + if line and line[0] != '#': + break # Leave it alone + else: + if symbol != "eval": + source = "pass" # Replace it with a 'pass' statement + + err = err1 = err2 = None + code = code1 = code2 = None + + try: + code = compiler(source, filename, symbol) + except SyntaxError as err: + pass + + try: + code1 = compiler(source + "\n", filename, symbol) + except SyntaxError as e: + err1 = e + + try: + code2 = compiler(source + "\n\n", filename, symbol) + except SyntaxError as e: + err2 = e + + if code: + return code + if not code1 and repr(err1) == repr(err2): + raise err1 + +def _compile(source, filename, symbol): + return compile(source, filename, symbol, PyCF_DONT_IMPLY_DEDENT) + +def compile_command(source, filename="", symbol="single"): + r"""Compile a command and determine whether it is incomplete. + + Arguments: + + source -- the source string; may contain \n characters + filename -- optional filename from which source was read; default + "" + symbol -- optional grammar start symbol; "single" (default) or "eval" + + Return value / exceptions raised: + + - Return a code object if the command is complete and valid + - Return None if the command is incomplete + - Raise SyntaxError, ValueError or OverflowError if the command is a + syntax error (OverflowError and ValueError can be produced by + malformed literals). + """ + return _maybe_compile(_compile, source, filename, symbol) + +class Compile: + """Instances of this class behave much like the built-in compile + function, but if one is used to compile text containing a future + statement, it "remembers" and compiles all subsequent program texts + with the statement in force.""" + def __init__(self): + self.flags = PyCF_DONT_IMPLY_DEDENT + + def __call__(self, source, filename, symbol): + codeob = compile(source, filename, symbol, self.flags, 1) + for feature in _features: + if codeob.co_flags & feature.compiler_flag: + self.flags |= feature.compiler_flag + return codeob + +class CommandCompiler: + """Instances of this class have __call__ methods identical in + signature to compile_command; the difference is that if the + instance compiles program text containing a __future__ statement, + the instance 'remembers' and compiles all subsequent program texts + with the statement in force.""" + + def __init__(self,): + self.compiler = Compile() + + def __call__(self, source, filename="", symbol="single"): + r"""Compile a command and determine whether it is incomplete. + + Arguments: + + source -- the source string; may contain \n characters + filename -- optional filename from which source was read; + default "" + symbol -- optional grammar start symbol; "single" (default) or + "eval" + + Return value / exceptions raised: + + - Return a code object if the command is complete and valid + - Return None if the command is incomplete + - Raise SyntaxError, ValueError or OverflowError if the command is a + syntax error (OverflowError and ValueError can be produced by + malformed literals). + """ + return _maybe_compile(self.compiler, source, filename, symbol) diff --git a/ouroboros/collections/__init__.py b/ouroboros/collections/__init__.py new file mode 100644 index 0000000..d993fe0 --- /dev/null +++ b/ouroboros/collections/__init__.py @@ -0,0 +1,1132 @@ +__all__ = ['deque', 'defaultdict', 'namedtuple', 'UserDict', 'UserList', + 'UserString', 'Counter', 'OrderedDict', 'ChainMap'] + +# For backwards compatibility, continue to make the collections ABCs +# available through the collections module. +from _collections_abc import * +import _collections_abc +__all__ += _collections_abc.__all__ + +from _collections import deque, defaultdict +from operator import itemgetter as _itemgetter, eq as _eq +from keyword import iskeyword as _iskeyword +import sys as _sys +import heapq as _heapq +from _weakref import proxy as _proxy +from itertools import repeat as _repeat, chain as _chain, starmap as _starmap +from reprlib import recursive_repr as _recursive_repr + +################################################################################ +### OrderedDict +################################################################################ + +class _Link(object): + __slots__ = 'prev', 'next', 'key', '__weakref__' + +class OrderedDict(dict): + 'Dictionary that remembers insertion order' + # An inherited dict maps keys to values. + # The inherited dict provides __getitem__, __len__, __contains__, and get. + # The remaining methods are order-aware. + # Big-O running times for all methods are the same as regular dictionaries. + + # The internal self.__map dict maps keys to links in a doubly linked list. + # The circular doubly linked list starts and ends with a sentinel element. + # The sentinel element never gets deleted (this simplifies the algorithm). + # The sentinel is in self.__hardroot with a weakref proxy in self.__root. + # The prev links are weakref proxies (to prevent circular references). + # Individual links are kept alive by the hard reference in self.__map. + # Those hard references disappear when a key is deleted from an OrderedDict. + + def __init__(self, *args, **kwds): + '''Initialize an ordered dictionary. The signature is the same as + regular dictionaries, but keyword arguments are not recommended because + their insertion order is arbitrary. + + ''' + if len(args) > 1: + raise TypeError('expected at most 1 arguments, got %d' % len(args)) + try: + self.__root + except AttributeError: + self.__hardroot = _Link() + self.__root = root = _proxy(self.__hardroot) + root.prev = root.next = root + self.__map = {} + self.__update(*args, **kwds) + + def __setitem__(self, key, value, + dict_setitem=dict.__setitem__, proxy=_proxy, Link=_Link): + 'od.__setitem__(i, y) <==> od[i]=y' + # Setting a new item creates a new link at the end of the linked list, + # and the inherited dictionary is updated with the new key/value pair. + if key not in self: + self.__map[key] = link = Link() + root = self.__root + last = root.prev + link.prev, link.next, link.key = last, root, key + last.next = link + root.prev = proxy(link) + dict_setitem(self, key, value) + + def __delitem__(self, key, dict_delitem=dict.__delitem__): + 'od.__delitem__(y) <==> del od[y]' + # Deleting an existing item uses self.__map to find the link which gets + # removed by updating the links in the predecessor and successor nodes. + dict_delitem(self, key) + link = self.__map.pop(key) + link_prev = link.prev + link_next = link.next + link_prev.next = link_next + link_next.prev = link_prev + + def __iter__(self): + 'od.__iter__() <==> iter(od)' + # Traverse the linked list in order. + root = self.__root + curr = root.next + while curr is not root: + yield curr.key + curr = curr.next + + def __reversed__(self): + 'od.__reversed__() <==> reversed(od)' + # Traverse the linked list in reverse order. + root = self.__root + curr = root.prev + while curr is not root: + yield curr.key + curr = curr.prev + + def clear(self): + 'od.clear() -> None. Remove all items from od.' + root = self.__root + root.prev = root.next = root + self.__map.clear() + dict.clear(self) + + def popitem(self, last=True): + '''od.popitem() -> (k, v), return and remove a (key, value) pair. + Pairs are returned in LIFO order if last is true or FIFO order if false. + + ''' + if not self: + raise KeyError('dictionary is empty') + root = self.__root + if last: + link = root.prev + link_prev = link.prev + link_prev.next = root + root.prev = link_prev + else: + link = root.next + link_next = link.next + root.next = link_next + link_next.prev = root + key = link.key + del self.__map[key] + value = dict.pop(self, key) + return key, value + + def move_to_end(self, key, last=True): + '''Move an existing element to the end (or beginning if last==False). + + Raises KeyError if the element does not exist. + When last=True, acts like a fast version of self[key]=self.pop(key). + + ''' + link = self.__map[key] + link_prev = link.prev + link_next = link.next + link_prev.next = link_next + link_next.prev = link_prev + root = self.__root + if last: + last = root.prev + link.prev = last + link.next = root + last.next = root.prev = link + else: + first = root.next + link.prev = root + link.next = first + root.next = first.prev = link + + def __sizeof__(self): + sizeof = _sys.getsizeof + n = len(self) + 1 # number of links including root + size = sizeof(self.__dict__) # instance dictionary + size += sizeof(self.__map) * 2 # internal dict and inherited dict + size += sizeof(self.__hardroot) * n # link objects + size += sizeof(self.__root) * n # proxy objects + return size + + update = __update = MutableMapping.update + keys = MutableMapping.keys + values = MutableMapping.values + items = MutableMapping.items + __ne__ = MutableMapping.__ne__ + + __marker = object() + + def pop(self, key, default=__marker): + '''od.pop(k[,d]) -> v, remove specified key and return the corresponding + value. If key is not found, d is returned if given, otherwise KeyError + is raised. + + ''' + if key in self: + result = self[key] + del self[key] + return result + if default is self.__marker: + raise KeyError(key) + return default + + def setdefault(self, key, default=None): + 'od.setdefault(k[,d]) -> od.get(k,d), also set od[k]=d if k not in od' + if key in self: + return self[key] + self[key] = default + return default + + @_recursive_repr() + def __repr__(self): + 'od.__repr__() <==> repr(od)' + if not self: + return '%s()' % (self.__class__.__name__,) + return '%s(%r)' % (self.__class__.__name__, list(self.items())) + + def __reduce__(self): + 'Return state information for pickling' + inst_dict = vars(self).copy() + for k in vars(OrderedDict()): + inst_dict.pop(k, None) + return self.__class__, (), inst_dict or None, None, iter(self.items()) + + def copy(self): + 'od.copy() -> a shallow copy of od' + return self.__class__(self) + + @classmethod + def fromkeys(cls, iterable, value=None): + '''OD.fromkeys(S[, v]) -> New ordered dictionary with keys from S. + If not specified, the value defaults to None. + + ''' + self = cls() + for key in iterable: + self[key] = value + return self + + def __eq__(self, other): + '''od.__eq__(y) <==> od==y. Comparison to another OD is order-sensitive + while comparison to a regular mapping is order-insensitive. + + ''' + if isinstance(other, OrderedDict): + return dict.__eq__(self, other) and all(map(_eq, self, other)) + return dict.__eq__(self, other) + + +################################################################################ +### namedtuple +################################################################################ + +_class_template = """\ +from builtins import property as _property, tuple as _tuple +from operator import itemgetter as _itemgetter +from collections import OrderedDict + +class {typename}(tuple): + '{typename}({arg_list})' + + __slots__ = () + + _fields = {field_names!r} + + def __new__(_cls, {arg_list}): + 'Create new instance of {typename}({arg_list})' + return _tuple.__new__(_cls, ({arg_list})) + + @classmethod + def _make(cls, iterable, new=tuple.__new__, len=len): + 'Make a new {typename} object from a sequence or iterable' + result = new(cls, iterable) + if len(result) != {num_fields:d}: + raise TypeError('Expected {num_fields:d} arguments, got %d' % len(result)) + return result + + def _replace(_self, **kwds): + 'Return a new {typename} object replacing specified fields with new values' + result = _self._make(map(kwds.pop, {field_names!r}, _self)) + if kwds: + raise ValueError('Got unexpected field names: %r' % list(kwds)) + return result + + def __repr__(self): + 'Return a nicely formatted representation string' + return self.__class__.__name__ + '({repr_fmt})' % self + + @property + def __dict__(self): + 'A new OrderedDict mapping field names to their values' + return OrderedDict(zip(self._fields, self)) + + def _asdict(self): + 'Return a new OrderedDict which maps field names to their values.' + return self.__dict__ + + def __getnewargs__(self): + 'Return self as a plain tuple. Used by copy and pickle.' + return tuple(self) + + def __getstate__(self): + 'Exclude the OrderedDict from pickling' + return None + +{field_defs} +""" + +_repr_template = '{name}=%r' + +_field_template = '''\ + {name} = _property(_itemgetter({index:d}), doc='Alias for field number {index:d}') +''' + +def namedtuple(typename, field_names, verbose=False, rename=False): + """Returns a new subclass of tuple with named fields. + + >>> Point = namedtuple('Point', ['x', 'y']) + >>> Point.__doc__ # docstring for the new class + 'Point(x, y)' + >>> p = Point(11, y=22) # instantiate with positional args or keywords + >>> p[0] + p[1] # indexable like a plain tuple + 33 + >>> x, y = p # unpack like a regular tuple + >>> x, y + (11, 22) + >>> p.x + p.y # fields also accessable by name + 33 + >>> d = p._asdict() # convert to a dictionary + >>> d['x'] + 11 + >>> Point(**d) # convert from a dictionary + Point(x=11, y=22) + >>> p._replace(x=100) # _replace() is like str.replace() but targets named fields + Point(x=100, y=22) + + """ + + # Validate the field names. At the user's option, either generate an error + # message or automatically replace the field name with a valid name. + if isinstance(field_names, str): + field_names = field_names.replace(',', ' ').split() + field_names = list(map(str, field_names)) + typename = str(typename) + if rename: + seen = set() + for index, name in enumerate(field_names): + if (not name.isidentifier() + or _iskeyword(name) + or name.startswith('_') + or name in seen): + field_names[index] = '_%d' % index + seen.add(name) + for name in [typename] + field_names: + if type(name) != str: + raise TypeError('Type names and field names must be strings') + if not name.isidentifier(): + raise ValueError('Type names and field names must be valid ' + 'identifiers: %r' % name) + if _iskeyword(name): + raise ValueError('Type names and field names cannot be a ' + 'keyword: %r' % name) + seen = set() + for name in field_names: + if name.startswith('_') and not rename: + raise ValueError('Field names cannot start with an underscore: ' + '%r' % name) + if name in seen: + raise ValueError('Encountered duplicate field name: %r' % name) + seen.add(name) + + # Fill-in the class template + class_definition = _class_template.format( + typename = typename, + field_names = tuple(field_names), + num_fields = len(field_names), + arg_list = repr(tuple(field_names)).replace("'", "")[1:-1], + repr_fmt = ', '.join(_repr_template.format(name=name) + for name in field_names), + field_defs = '\n'.join(_field_template.format(index=index, name=name) + for index, name in enumerate(field_names)) + ) + + # Execute the template string in a temporary namespace and support + # tracing utilities by setting a value for frame.f_globals['__name__'] + namespace = dict(__name__='namedtuple_%s' % typename) + exec(class_definition, namespace) + result = namespace[typename] + result._source = class_definition + if verbose: + print(result._source) + + # For pickling to work, the __module__ variable needs to be set to the frame + # where the named tuple is created. Bypass this step in environments where + # sys._getframe is not defined (Jython for example) or sys._getframe is not + # defined for arguments greater than 0 (IronPython). + try: + result.__module__ = _sys._getframe(1).f_globals.get('__name__', '__main__') + except (AttributeError, ValueError): + pass + + return result + + +######################################################################## +### Counter +######################################################################## + +def _count_elements(mapping, iterable): + 'Tally elements from the iterable.' + mapping_get = mapping.get + for elem in iterable: + mapping[elem] = mapping_get(elem, 0) + 1 + +try: # Load C helper function if available + from _collections import _count_elements +except ImportError: + pass + +class Counter(dict): + '''Dict subclass for counting hashable items. Sometimes called a bag + or multiset. Elements are stored as dictionary keys and their counts + are stored as dictionary values. + + >>> c = Counter('abcdeabcdabcaba') # count elements from a string + + >>> c.most_common(3) # three most common elements + [('a', 5), ('b', 4), ('c', 3)] + >>> sorted(c) # list all unique elements + ['a', 'b', 'c', 'd', 'e'] + >>> ''.join(sorted(c.elements())) # list elements with repetitions + 'aaaaabbbbcccdde' + >>> sum(c.values()) # total of all counts + 15 + + >>> c['a'] # count of letter 'a' + 5 + >>> for elem in 'shazam': # update counts from an iterable + ... c[elem] += 1 # by adding 1 to each element's count + >>> c['a'] # now there are seven 'a' + 7 + >>> del c['b'] # remove all 'b' + >>> c['b'] # now there are zero 'b' + 0 + + >>> d = Counter('simsalabim') # make another counter + >>> c.update(d) # add in the second counter + >>> c['a'] # now there are nine 'a' + 9 + + >>> c.clear() # empty the counter + >>> c + Counter() + + Note: If a count is set to zero or reduced to zero, it will remain + in the counter until the entry is deleted or the counter is cleared: + + >>> c = Counter('aaabbc') + >>> c['b'] -= 2 # reduce the count of 'b' by two + >>> c.most_common() # 'b' is still in, but its count is zero + [('a', 3), ('c', 1), ('b', 0)] + + ''' + # References: + # http://en.wikipedia.org/wiki/Multiset + # http://www.gnu.org/software/smalltalk/manual-base/html_node/Bag.html + # http://www.demo2s.com/Tutorial/Cpp/0380__set-multiset/Catalog0380__set-multiset.htm + # http://code.activestate.com/recipes/259174/ + # Knuth, TAOCP Vol. II section 4.6.3 + + def __init__(self, iterable=None, **kwds): + '''Create a new, empty Counter object. And if given, count elements + from an input iterable. Or, initialize the count from another mapping + of elements to their counts. + + >>> c = Counter() # a new, empty counter + >>> c = Counter('gallahad') # a new counter from an iterable + >>> c = Counter({'a': 4, 'b': 2}) # a new counter from a mapping + >>> c = Counter(a=4, b=2) # a new counter from keyword args + + ''' + super().__init__() + self.update(iterable, **kwds) + + def __missing__(self, key): + 'The count of elements not in the Counter is zero.' + # Needed so that self[missing_item] does not raise KeyError + return 0 + + def most_common(self, n=None): + '''List the n most common elements and their counts from the most + common to the least. If n is None, then list all element counts. + + >>> Counter('abcdeabcdabcaba').most_common(3) + [('a', 5), ('b', 4), ('c', 3)] + + ''' + # Emulate Bag.sortedByCount from Smalltalk + if n is None: + return sorted(self.items(), key=_itemgetter(1), reverse=True) + return _heapq.nlargest(n, self.items(), key=_itemgetter(1)) + + def elements(self): + '''Iterator over elements repeating each as many times as its count. + + >>> c = Counter('ABCABC') + >>> sorted(c.elements()) + ['A', 'A', 'B', 'B', 'C', 'C'] + + # Knuth's example for prime factors of 1836: 2**2 * 3**3 * 17**1 + >>> prime_factors = Counter({2: 2, 3: 3, 17: 1}) + >>> product = 1 + >>> for factor in prime_factors.elements(): # loop over factors + ... product *= factor # and multiply them + >>> product + 1836 + + Note, if an element's count has been set to zero or is a negative + number, elements() will ignore it. + + ''' + # Emulate Bag.do from Smalltalk and Multiset.begin from C++. + return _chain.from_iterable(_starmap(_repeat, self.items())) + + # Override dict methods where necessary + + @classmethod + def fromkeys(cls, iterable, v=None): + # There is no equivalent method for counters because setting v=1 + # means that no element can have a count greater than one. + raise NotImplementedError( + 'Counter.fromkeys() is undefined. Use Counter(iterable) instead.') + + def update(self, iterable=None, **kwds): + '''Like dict.update() but add counts instead of replacing them. + + Source can be an iterable, a dictionary, or another Counter instance. + + >>> c = Counter('which') + >>> c.update('witch') # add elements from another iterable + >>> d = Counter('watch') + >>> c.update(d) # add elements from another counter + >>> c['h'] # four 'h' in which, witch, and watch + 4 + + ''' + # The regular dict.update() operation makes no sense here because the + # replace behavior results in the some of original untouched counts + # being mixed-in with all of the other counts for a mismash that + # doesn't have a straight-forward interpretation in most counting + # contexts. Instead, we implement straight-addition. Both the inputs + # and outputs are allowed to contain zero and negative counts. + + if iterable is not None: + if isinstance(iterable, Mapping): + if self: + self_get = self.get + for elem, count in iterable.items(): + self[elem] = count + self_get(elem, 0) + else: + super().update(iterable) # fast path when counter is empty + else: + _count_elements(self, iterable) + if kwds: + self.update(kwds) + + def subtract(self, iterable=None, **kwds): + '''Like dict.update() but subtracts counts instead of replacing them. + Counts can be reduced below zero. Both the inputs and outputs are + allowed to contain zero and negative counts. + + Source can be an iterable, a dictionary, or another Counter instance. + + >>> c = Counter('which') + >>> c.subtract('witch') # subtract elements from another iterable + >>> c.subtract(Counter('watch')) # subtract elements from another counter + >>> c['h'] # 2 in which, minus 1 in witch, minus 1 in watch + 0 + >>> c['w'] # 1 in which, minus 1 in witch, minus 1 in watch + -1 + + ''' + if iterable is not None: + self_get = self.get + if isinstance(iterable, Mapping): + for elem, count in iterable.items(): + self[elem] = self_get(elem, 0) - count + else: + for elem in iterable: + self[elem] = self_get(elem, 0) - 1 + if kwds: + self.subtract(kwds) + + def copy(self): + 'Return a shallow copy.' + return self.__class__(self) + + def __reduce__(self): + return self.__class__, (dict(self),) + + def __delitem__(self, elem): + 'Like dict.__delitem__() but does not raise KeyError for missing values.' + if elem in self: + super().__delitem__(elem) + + def __repr__(self): + if not self: + return '%s()' % self.__class__.__name__ + try: + items = ', '.join(map('%r: %r'.__mod__, self.most_common())) + return '%s({%s})' % (self.__class__.__name__, items) + except TypeError: + # handle case where values are not orderable + return '{0}({1!r})'.format(self.__class__.__name__, dict(self)) + + # Multiset-style mathematical operations discussed in: + # Knuth TAOCP Volume II section 4.6.3 exercise 19 + # and at http://en.wikipedia.org/wiki/Multiset + # + # Outputs guaranteed to only include positive counts. + # + # To strip negative and zero counts, add-in an empty counter: + # c += Counter() + + def __add__(self, other): + '''Add counts from two counters. + + >>> Counter('abbb') + Counter('bcc') + Counter({'b': 4, 'c': 2, 'a': 1}) + + ''' + if not isinstance(other, Counter): + return NotImplemented + result = Counter() + for elem, count in self.items(): + newcount = count + other[elem] + if newcount > 0: + result[elem] = newcount + for elem, count in other.items(): + if elem not in self and count > 0: + result[elem] = count + return result + + def __sub__(self, other): + ''' Subtract count, but keep only results with positive counts. + + >>> Counter('abbbc') - Counter('bccd') + Counter({'b': 2, 'a': 1}) + + ''' + if not isinstance(other, Counter): + return NotImplemented + result = Counter() + for elem, count in self.items(): + newcount = count - other[elem] + if newcount > 0: + result[elem] = newcount + for elem, count in other.items(): + if elem not in self and count < 0: + result[elem] = 0 - count + return result + + def __or__(self, other): + '''Union is the maximum of value in either of the input counters. + + >>> Counter('abbb') | Counter('bcc') + Counter({'b': 3, 'c': 2, 'a': 1}) + + ''' + if not isinstance(other, Counter): + return NotImplemented + result = Counter() + for elem, count in self.items(): + other_count = other[elem] + newcount = other_count if count < other_count else count + if newcount > 0: + result[elem] = newcount + for elem, count in other.items(): + if elem not in self and count > 0: + result[elem] = count + return result + + def __and__(self, other): + ''' Intersection is the minimum of corresponding counts. + + >>> Counter('abbb') & Counter('bcc') + Counter({'b': 1}) + + ''' + if not isinstance(other, Counter): + return NotImplemented + result = Counter() + for elem, count in self.items(): + other_count = other[elem] + newcount = count if count < other_count else other_count + if newcount > 0: + result[elem] = newcount + return result + + def __pos__(self): + 'Adds an empty counter, effectively stripping negative and zero counts' + return self + Counter() + + def __neg__(self): + '''Subtracts from an empty counter. Strips positive and zero counts, + and flips the sign on negative counts. + + ''' + return Counter() - self + + def _keep_positive(self): + '''Internal method to strip elements with a negative or zero count''' + nonpositive = [elem for elem, count in self.items() if not count > 0] + for elem in nonpositive: + del self[elem] + return self + + def __iadd__(self, other): + '''Inplace add from another counter, keeping only positive counts. + + >>> c = Counter('abbb') + >>> c += Counter('bcc') + >>> c + Counter({'b': 4, 'c': 2, 'a': 1}) + + ''' + for elem, count in other.items(): + self[elem] += count + return self._keep_positive() + + def __isub__(self, other): + '''Inplace subtract counter, but keep only results with positive counts. + + >>> c = Counter('abbbc') + >>> c -= Counter('bccd') + >>> c + Counter({'b': 2, 'a': 1}) + + ''' + for elem, count in other.items(): + self[elem] -= count + return self._keep_positive() + + def __ior__(self, other): + '''Inplace union is the maximum of value from either counter. + + >>> c = Counter('abbb') + >>> c |= Counter('bcc') + >>> c + Counter({'b': 3, 'c': 2, 'a': 1}) + + ''' + for elem, other_count in other.items(): + count = self[elem] + if other_count > count: + self[elem] = other_count + return self._keep_positive() + + def __iand__(self, other): + '''Inplace intersection is the minimum of corresponding counts. + + >>> c = Counter('abbb') + >>> c &= Counter('bcc') + >>> c + Counter({'b': 1}) + + ''' + for elem, count in self.items(): + other_count = other[elem] + if other_count < count: + self[elem] = other_count + return self._keep_positive() + + +######################################################################## +### ChainMap (helper for configparser and string.Template) +######################################################################## + +class ChainMap(MutableMapping): + ''' A ChainMap groups multiple dicts (or other mappings) together + to create a single, updateable view. + + The underlying mappings are stored in a list. That list is public and can + accessed or updated using the *maps* attribute. There is no other state. + + Lookups search the underlying mappings successively until a key is found. + In contrast, writes, updates, and deletions only operate on the first + mapping. + + ''' + + def __init__(self, *maps): + '''Initialize a ChainMap by setting *maps* to the given mappings. + If no mappings are provided, a single empty dictionary is used. + + ''' + self.maps = list(maps) or [{}] # always at least one map + + def __missing__(self, key): + raise KeyError(key) + + def __getitem__(self, key): + for mapping in self.maps: + try: + return mapping[key] # can't use 'key in mapping' with defaultdict + except KeyError: + pass + return self.__missing__(key) # support subclasses that define __missing__ + + def get(self, key, default=None): + return self[key] if key in self else default + + def __len__(self): + return len(set().union(*self.maps)) # reuses stored hash values if possible + + def __iter__(self): + return iter(set().union(*self.maps)) + + def __contains__(self, key): + return any(key in m for m in self.maps) + + def __bool__(self): + return any(self.maps) + + @_recursive_repr() + def __repr__(self): + return '{0.__class__.__name__}({1})'.format( + self, ', '.join(map(repr, self.maps))) + + @classmethod + def fromkeys(cls, iterable, *args): + 'Create a ChainMap with a single dict created from the iterable.' + return cls(dict.fromkeys(iterable, *args)) + + def copy(self): + 'New ChainMap or subclass with a new copy of maps[0] and refs to maps[1:]' + return self.__class__(self.maps[0].copy(), *self.maps[1:]) + + __copy__ = copy + + def new_child(self, m=None): # like Django's Context.push() + ''' + New ChainMap with a new map followed by all previous maps. If no + map is provided, an empty dict is used. + ''' + if m is None: + m = {} + return self.__class__(m, *self.maps) + + @property + def parents(self): # like Django's Context.pop() + 'New ChainMap from maps[1:].' + return self.__class__(*self.maps[1:]) + + def __setitem__(self, key, value): + self.maps[0][key] = value + + def __delitem__(self, key): + try: + del self.maps[0][key] + except KeyError: + raise KeyError('Key not found in the first mapping: {!r}'.format(key)) + + def popitem(self): + 'Remove and return an item pair from maps[0]. Raise KeyError is maps[0] is empty.' + try: + return self.maps[0].popitem() + except KeyError: + raise KeyError('No keys found in the first mapping.') + + def pop(self, key, *args): + 'Remove *key* from maps[0] and return its value. Raise KeyError if *key* not in maps[0].' + try: + return self.maps[0].pop(key, *args) + except KeyError: + raise KeyError('Key not found in the first mapping: {!r}'.format(key)) + + def clear(self): + 'Clear maps[0], leaving maps[1:] intact.' + self.maps[0].clear() + + +################################################################################ +### UserDict +################################################################################ + +class UserDict(MutableMapping): + + # Start by filling-out the abstract methods + def __init__(self, dict=None, **kwargs): + self.data = {} + if dict is not None: + self.update(dict) + if len(kwargs): + self.update(kwargs) + def __len__(self): return len(self.data) + def __getitem__(self, key): + if key in self.data: + return self.data[key] + if hasattr(self.__class__, "__missing__"): + return self.__class__.__missing__(self, key) + raise KeyError(key) + def __setitem__(self, key, item): self.data[key] = item + def __delitem__(self, key): del self.data[key] + def __iter__(self): + return iter(self.data) + + # Modify __contains__ to work correctly when __missing__ is present + def __contains__(self, key): + return key in self.data + + # Now, add the methods in dicts but not in MutableMapping + def __repr__(self): return repr(self.data) + def copy(self): + if self.__class__ is UserDict: + return UserDict(self.data.copy()) + import copy + data = self.data + try: + self.data = {} + c = copy.copy(self) + finally: + self.data = data + c.update(self) + return c + @classmethod + def fromkeys(cls, iterable, value=None): + d = cls() + for key in iterable: + d[key] = value + return d + + + +################################################################################ +### UserList +################################################################################ + +class UserList(MutableSequence): + """A more or less complete user-defined wrapper around list objects.""" + def __init__(self, initlist=None): + self.data = [] + if initlist is not None: + # XXX should this accept an arbitrary sequence? + if type(initlist) == type(self.data): + self.data[:] = initlist + elif isinstance(initlist, UserList): + self.data[:] = initlist.data[:] + else: + self.data = list(initlist) + def __repr__(self): return repr(self.data) + def __lt__(self, other): return self.data < self.__cast(other) + def __le__(self, other): return self.data <= self.__cast(other) + def __eq__(self, other): return self.data == self.__cast(other) + def __ne__(self, other): return self.data != self.__cast(other) + def __gt__(self, other): return self.data > self.__cast(other) + def __ge__(self, other): return self.data >= self.__cast(other) + def __cast(self, other): + return other.data if isinstance(other, UserList) else other + def __contains__(self, item): return item in self.data + def __len__(self): return len(self.data) + def __getitem__(self, i): return self.data[i] + def __setitem__(self, i, item): self.data[i] = item + def __delitem__(self, i): del self.data[i] + def __add__(self, other): + if isinstance(other, UserList): + return self.__class__(self.data + other.data) + elif isinstance(other, type(self.data)): + return self.__class__(self.data + other) + return self.__class__(self.data + list(other)) + def __radd__(self, other): + if isinstance(other, UserList): + return self.__class__(other.data + self.data) + elif isinstance(other, type(self.data)): + return self.__class__(other + self.data) + return self.__class__(list(other) + self.data) + def __iadd__(self, other): + if isinstance(other, UserList): + self.data += other.data + elif isinstance(other, type(self.data)): + self.data += other + else: + self.data += list(other) + return self + def __mul__(self, n): + return self.__class__(self.data*n) + __rmul__ = __mul__ + def __imul__(self, n): + self.data *= n + return self + def append(self, item): self.data.append(item) + def insert(self, i, item): self.data.insert(i, item) + def pop(self, i=-1): return self.data.pop(i) + def remove(self, item): self.data.remove(item) + def clear(self): self.data.clear() + def copy(self): return self.__class__(self) + def count(self, item): return self.data.count(item) + def index(self, item, *args): return self.data.index(item, *args) + def reverse(self): self.data.reverse() + def sort(self, *args, **kwds): self.data.sort(*args, **kwds) + def extend(self, other): + if isinstance(other, UserList): + self.data.extend(other.data) + else: + self.data.extend(other) + + + +################################################################################ +### UserString +################################################################################ + +class UserString(Sequence): + def __init__(self, seq): + if isinstance(seq, str): + self.data = seq + elif isinstance(seq, UserString): + self.data = seq.data[:] + else: + self.data = str(seq) + def __str__(self): return str(self.data) + def __repr__(self): return repr(self.data) + def __int__(self): return int(self.data) + def __float__(self): return float(self.data) + def __complex__(self): return complex(self.data) + def __hash__(self): return hash(self.data) + + def __eq__(self, string): + if isinstance(string, UserString): + return self.data == string.data + return self.data == string + def __ne__(self, string): + if isinstance(string, UserString): + return self.data != string.data + return self.data != string + def __lt__(self, string): + if isinstance(string, UserString): + return self.data < string.data + return self.data < string + def __le__(self, string): + if isinstance(string, UserString): + return self.data <= string.data + return self.data <= string + def __gt__(self, string): + if isinstance(string, UserString): + return self.data > string.data + return self.data > string + def __ge__(self, string): + if isinstance(string, UserString): + return self.data >= string.data + return self.data >= string + + def __contains__(self, char): + if isinstance(char, UserString): + char = char.data + return char in self.data + + def __len__(self): return len(self.data) + def __getitem__(self, index): return self.__class__(self.data[index]) + def __add__(self, other): + if isinstance(other, UserString): + return self.__class__(self.data + other.data) + elif isinstance(other, str): + return self.__class__(self.data + other) + return self.__class__(self.data + str(other)) + def __radd__(self, other): + if isinstance(other, str): + return self.__class__(other + self.data) + return self.__class__(str(other) + self.data) + def __mul__(self, n): + return self.__class__(self.data*n) + __rmul__ = __mul__ + def __mod__(self, args): + return self.__class__(self.data % args) + + # the following methods are defined in alphabetical order: + def capitalize(self): return self.__class__(self.data.capitalize()) + def center(self, width, *args): + return self.__class__(self.data.center(width, *args)) + def count(self, sub, start=0, end=_sys.maxsize): + if isinstance(sub, UserString): + sub = sub.data + return self.data.count(sub, start, end) + def encode(self, encoding=None, errors=None): # XXX improve this? + if encoding: + if errors: + return self.__class__(self.data.encode(encoding, errors)) + return self.__class__(self.data.encode(encoding)) + return self.__class__(self.data.encode()) + def endswith(self, suffix, start=0, end=_sys.maxsize): + return self.data.endswith(suffix, start, end) + def expandtabs(self, tabsize=8): + return self.__class__(self.data.expandtabs(tabsize)) + def find(self, sub, start=0, end=_sys.maxsize): + if isinstance(sub, UserString): + sub = sub.data + return self.data.find(sub, start, end) + def format(self, *args, **kwds): + return self.data.format(*args, **kwds) + def index(self, sub, start=0, end=_sys.maxsize): + return self.data.index(sub, start, end) + def isalpha(self): return self.data.isalpha() + def isalnum(self): return self.data.isalnum() + def isdecimal(self): return self.data.isdecimal() + def isdigit(self): return self.data.isdigit() + def isidentifier(self): return self.data.isidentifier() + def islower(self): return self.data.islower() + def isnumeric(self): return self.data.isnumeric() + def isspace(self): return self.data.isspace() + def istitle(self): return self.data.istitle() + def isupper(self): return self.data.isupper() + def join(self, seq): return self.data.join(seq) + def ljust(self, width, *args): + return self.__class__(self.data.ljust(width, *args)) + def lower(self): return self.__class__(self.data.lower()) + def lstrip(self, chars=None): return self.__class__(self.data.lstrip(chars)) + def partition(self, sep): + return self.data.partition(sep) + def replace(self, old, new, maxsplit=-1): + if isinstance(old, UserString): + old = old.data + if isinstance(new, UserString): + new = new.data + return self.__class__(self.data.replace(old, new, maxsplit)) + def rfind(self, sub, start=0, end=_sys.maxsize): + if isinstance(sub, UserString): + sub = sub.data + return self.data.rfind(sub, start, end) + def rindex(self, sub, start=0, end=_sys.maxsize): + return self.data.rindex(sub, start, end) + def rjust(self, width, *args): + return self.__class__(self.data.rjust(width, *args)) + def rpartition(self, sep): + return self.data.rpartition(sep) + def rstrip(self, chars=None): + return self.__class__(self.data.rstrip(chars)) + def split(self, sep=None, maxsplit=-1): + return self.data.split(sep, maxsplit) + def rsplit(self, sep=None, maxsplit=-1): + return self.data.rsplit(sep, maxsplit) + def splitlines(self, keepends=False): return self.data.splitlines(keepends) + def startswith(self, prefix, start=0, end=_sys.maxsize): + return self.data.startswith(prefix, start, end) + def strip(self, chars=None): return self.__class__(self.data.strip(chars)) + def swapcase(self): return self.__class__(self.data.swapcase()) + def title(self): return self.__class__(self.data.title()) + def translate(self, *args): + return self.__class__(self.data.translate(*args)) + def upper(self): return self.__class__(self.data.upper()) + def zfill(self, width): return self.__class__(self.data.zfill(width)) diff --git a/ouroboros/collections/__main__.py b/ouroboros/collections/__main__.py new file mode 100644 index 0000000..763e38e --- /dev/null +++ b/ouroboros/collections/__main__.py @@ -0,0 +1,38 @@ +################################################################################ +### Simple tests +################################################################################ + +# verify that instances can be pickled +from collections import namedtuple +from pickle import loads, dumps +Point = namedtuple('Point', 'x, y', True) +p = Point(x=10, y=20) +assert p == loads(dumps(p)) + +# test and demonstrate ability to override methods +class Point(namedtuple('Point', 'x y')): + __slots__ = () + @property + def hypot(self): + return (self.x ** 2 + self.y ** 2) ** 0.5 + def __str__(self): + return 'Point: x=%6.3f y=%6.3f hypot=%6.3f' % (self.x, self.y, self.hypot) + +for p in Point(3, 4), Point(14, 5/7.): + print (p) + +class Point(namedtuple('Point', 'x y')): + 'Point class with optimized _make() and _replace() without error-checking' + __slots__ = () + _make = classmethod(tuple.__new__) + def _replace(self, _map=map, **kwds): + return self._make(_map(kwds.get, ('x', 'y'), self)) + +print(Point(11, 22)._replace(x=100)) + +Point3D = namedtuple('Point3D', Point._fields + ('z',)) +print(Point3D.__doc__) + +import doctest, collections +TestResults = namedtuple('TestResults', 'failed attempted') +print(TestResults(*doctest.testmod(collections))) diff --git a/ouroboros/collections/abc.py b/ouroboros/collections/abc.py new file mode 100644 index 0000000..891600d --- /dev/null +++ b/ouroboros/collections/abc.py @@ -0,0 +1,2 @@ +from _collections_abc import * +from _collections_abc import __all__ diff --git a/ouroboros/colorsys.py b/ouroboros/colorsys.py new file mode 100644 index 0000000..b93e384 --- /dev/null +++ b/ouroboros/colorsys.py @@ -0,0 +1,164 @@ +"""Conversion functions between RGB and other color systems. + +This modules provides two functions for each color system ABC: + + rgb_to_abc(r, g, b) --> a, b, c + abc_to_rgb(a, b, c) --> r, g, b + +All inputs and outputs are triples of floats in the range [0.0...1.0] +(with the exception of I and Q, which covers a slightly larger range). +Inputs outside the valid range may cause exceptions or invalid outputs. + +Supported color systems: +RGB: Red, Green, Blue components +YIQ: Luminance, Chrominance (used by composite video signals) +HLS: Hue, Luminance, Saturation +HSV: Hue, Saturation, Value +""" + +# References: +# http://en.wikipedia.org/wiki/YIQ +# http://en.wikipedia.org/wiki/HLS_color_space +# http://en.wikipedia.org/wiki/HSV_color_space + +__all__ = ["rgb_to_yiq","yiq_to_rgb","rgb_to_hls","hls_to_rgb", + "rgb_to_hsv","hsv_to_rgb"] + +# Some floating point constants + +ONE_THIRD = 1.0/3.0 +ONE_SIXTH = 1.0/6.0 +TWO_THIRD = 2.0/3.0 + +# YIQ: used by composite video signals (linear combinations of RGB) +# Y: perceived grey level (0.0 == black, 1.0 == white) +# I, Q: color components +# +# There are a great many versions of the constants used in these formulae. +# The ones in this library uses constants from the FCC version of NTSC. + +def rgb_to_yiq(r, g, b): + y = 0.30*r + 0.59*g + 0.11*b + i = 0.74*(r-y) - 0.27*(b-y) + q = 0.48*(r-y) + 0.41*(b-y) + return (y, i, q) + +def yiq_to_rgb(y, i, q): + # r = y + (0.27*q + 0.41*i) / (0.74*0.41 + 0.27*0.48) + # b = y + (0.74*q - 0.48*i) / (0.74*0.41 + 0.27*0.48) + # g = y - (0.30*(r-y) + 0.11*(b-y)) / 0.59 + + r = y + 0.9468822170900693*i + 0.6235565819861433*q + g = y - 0.27478764629897834*i - 0.6356910791873801*q + b = y - 1.1085450346420322*i + 1.7090069284064666*q + + if r < 0.0: + r = 0.0 + if g < 0.0: + g = 0.0 + if b < 0.0: + b = 0.0 + if r > 1.0: + r = 1.0 + if g > 1.0: + g = 1.0 + if b > 1.0: + b = 1.0 + return (r, g, b) + + +# HLS: Hue, Luminance, Saturation +# H: position in the spectrum +# L: color lightness +# S: color saturation + +def rgb_to_hls(r, g, b): + maxc = max(r, g, b) + minc = min(r, g, b) + # XXX Can optimize (maxc+minc) and (maxc-minc) + l = (minc+maxc)/2.0 + if minc == maxc: + return 0.0, l, 0.0 + if l <= 0.5: + s = (maxc-minc) / (maxc+minc) + else: + s = (maxc-minc) / (2.0-maxc-minc) + rc = (maxc-r) / (maxc-minc) + gc = (maxc-g) / (maxc-minc) + bc = (maxc-b) / (maxc-minc) + if r == maxc: + h = bc-gc + elif g == maxc: + h = 2.0+rc-bc + else: + h = 4.0+gc-rc + h = (h/6.0) % 1.0 + return h, l, s + +def hls_to_rgb(h, l, s): + if s == 0.0: + return l, l, l + if l <= 0.5: + m2 = l * (1.0+s) + else: + m2 = l+s-(l*s) + m1 = 2.0*l - m2 + return (_v(m1, m2, h+ONE_THIRD), _v(m1, m2, h), _v(m1, m2, h-ONE_THIRD)) + +def _v(m1, m2, hue): + hue = hue % 1.0 + if hue < ONE_SIXTH: + return m1 + (m2-m1)*hue*6.0 + if hue < 0.5: + return m2 + if hue < TWO_THIRD: + return m1 + (m2-m1)*(TWO_THIRD-hue)*6.0 + return m1 + + +# HSV: Hue, Saturation, Value +# H: position in the spectrum +# S: color saturation ("purity") +# V: color brightness + +def rgb_to_hsv(r, g, b): + maxc = max(r, g, b) + minc = min(r, g, b) + v = maxc + if minc == maxc: + return 0.0, 0.0, v + s = (maxc-minc) / maxc + rc = (maxc-r) / (maxc-minc) + gc = (maxc-g) / (maxc-minc) + bc = (maxc-b) / (maxc-minc) + if r == maxc: + h = bc-gc + elif g == maxc: + h = 2.0+rc-bc + else: + h = 4.0+gc-rc + h = (h/6.0) % 1.0 + return h, s, v + +def hsv_to_rgb(h, s, v): + if s == 0.0: + return v, v, v + i = int(h*6.0) # XXX assume int() truncates! + f = (h*6.0) - i + p = v*(1.0 - s) + q = v*(1.0 - s*f) + t = v*(1.0 - s*(1.0-f)) + i = i%6 + if i == 0: + return v, t, p + if i == 1: + return q, v, p + if i == 2: + return p, v, t + if i == 3: + return p, q, v + if i == 4: + return t, p, v + if i == 5: + return v, p, q + # Cannot get here diff --git a/ouroboros/compileall.py b/ouroboros/compileall.py new file mode 100644 index 0000000..d957ee5 --- /dev/null +++ b/ouroboros/compileall.py @@ -0,0 +1,241 @@ +"""Module/script to byte-compile all .py files to .pyc (or .pyo) files. + +When called as a script with arguments, this compiles the directories +given as arguments recursively; the -l option prevents it from +recursing into directories. + +Without arguments, if compiles all modules on sys.path, without +recursing into subdirectories. (Even though it should do so for +packages -- for now, you'll have to deal with packages separately.) + +See module py_compile for details of the actual byte-compilation. +""" +import os +import sys +import importlib.util +import py_compile +import struct + +__all__ = ["compile_dir","compile_file","compile_path"] + +def compile_dir(dir, maxlevels=10, ddir=None, force=False, rx=None, + quiet=False, legacy=False, optimize=-1): + """Byte-compile all modules in the given directory tree. + + Arguments (only dir is required): + + dir: the directory to byte-compile + maxlevels: maximum recursion level (default 10) + ddir: the directory that will be prepended to the path to the + file as it is compiled into each byte-code file. + force: if True, force compilation, even if timestamps are up-to-date + quiet: if True, be quiet during compilation + legacy: if True, produce legacy pyc paths instead of PEP 3147 paths + optimize: optimization level or -1 for level of the interpreter + """ + if not quiet: + print('Listing {!r}...'.format(dir)) + try: + names = os.listdir(dir) + except OSError: + print("Can't list {!r}".format(dir)) + names = [] + names.sort() + success = 1 + for name in names: + if name == '__pycache__': + continue + fullname = os.path.join(dir, name) + if ddir is not None: + dfile = os.path.join(ddir, name) + else: + dfile = None + if not os.path.isdir(fullname): + if not compile_file(fullname, ddir, force, rx, quiet, + legacy, optimize): + success = 0 + elif (maxlevels > 0 and name != os.curdir and name != os.pardir and + os.path.isdir(fullname) and not os.path.islink(fullname)): + if not compile_dir(fullname, maxlevels - 1, dfile, force, rx, + quiet, legacy, optimize): + success = 0 + return success + +def compile_file(fullname, ddir=None, force=False, rx=None, quiet=False, + legacy=False, optimize=-1): + """Byte-compile one file. + + Arguments (only fullname is required): + + fullname: the file to byte-compile + ddir: if given, the directory name compiled in to the + byte-code file. + force: if True, force compilation, even if timestamps are up-to-date + quiet: if True, be quiet during compilation + legacy: if True, produce legacy pyc paths instead of PEP 3147 paths + optimize: optimization level or -1 for level of the interpreter + """ + success = 1 + name = os.path.basename(fullname) + if ddir is not None: + dfile = os.path.join(ddir, name) + else: + dfile = None + if rx is not None: + mo = rx.search(fullname) + if mo: + return success + if os.path.isfile(fullname): + if legacy: + cfile = fullname + ('c' if __debug__ else 'o') + else: + if optimize >= 0: + cfile = importlib.util.cache_from_source( + fullname, debug_override=not optimize) + else: + cfile = importlib.util.cache_from_source(fullname) + cache_dir = os.path.dirname(cfile) + head, tail = name[:-3], name[-3:] + if tail == '.py': + if not force: + try: + mtime = int(os.stat(fullname).st_mtime) + expect = struct.pack('<4sl', importlib.util.MAGIC_NUMBER, + mtime) + with open(cfile, 'rb') as chandle: + actual = chandle.read(8) + if expect == actual: + return success + except OSError: + pass + if not quiet: + print('Compiling {!r}...'.format(fullname)) + try: + ok = py_compile.compile(fullname, cfile, dfile, True, + optimize=optimize) + except py_compile.PyCompileError as err: + if quiet: + print('*** Error compiling {!r}...'.format(fullname)) + else: + print('*** ', end='') + # escape non-printable characters in msg + msg = err.msg.encode(sys.stdout.encoding, + errors='backslashreplace') + msg = msg.decode(sys.stdout.encoding) + print(msg) + success = 0 + except (SyntaxError, UnicodeError, OSError) as e: + if quiet: + print('*** Error compiling {!r}...'.format(fullname)) + else: + print('*** ', end='') + print(e.__class__.__name__ + ':', e) + success = 0 + else: + if ok == 0: + success = 0 + return success + +def compile_path(skip_curdir=1, maxlevels=0, force=False, quiet=False, + legacy=False, optimize=-1): + """Byte-compile all module on sys.path. + + Arguments (all optional): + + skip_curdir: if true, skip current directory (default True) + maxlevels: max recursion level (default 0) + force: as for compile_dir() (default False) + quiet: as for compile_dir() (default False) + legacy: as for compile_dir() (default False) + optimize: as for compile_dir() (default -1) + """ + success = 1 + for dir in sys.path: + if (not dir or dir == os.curdir) and skip_curdir: + print('Skipping current directory') + else: + success = success and compile_dir(dir, maxlevels, None, + force, quiet=quiet, + legacy=legacy, optimize=optimize) + return success + + +def main(): + """Script main program.""" + import argparse + + parser = argparse.ArgumentParser( + description='Utilities to support installing Python libraries.') + parser.add_argument('-l', action='store_const', const=0, + default=10, dest='maxlevels', + help="don't recurse into subdirectories") + parser.add_argument('-f', action='store_true', dest='force', + help='force rebuild even if timestamps are up to date') + parser.add_argument('-q', action='store_true', dest='quiet', + help='output only error messages') + parser.add_argument('-b', action='store_true', dest='legacy', + help='use legacy (pre-PEP3147) compiled file locations') + parser.add_argument('-d', metavar='DESTDIR', dest='ddir', default=None, + help=('directory to prepend to file paths for use in ' + 'compile-time tracebacks and in runtime ' + 'tracebacks in cases where the source file is ' + 'unavailable')) + parser.add_argument('-x', metavar='REGEXP', dest='rx', default=None, + help=('skip files matching the regular expression; ' + 'the regexp is searched for in the full path ' + 'of each file considered for compilation')) + parser.add_argument('-i', metavar='FILE', dest='flist', + help=('add all the files and directories listed in ' + 'FILE to the list considered for compilation; ' + 'if "-", names are read from stdin')) + parser.add_argument('compile_dest', metavar='FILE|DIR', nargs='*', + help=('zero or more file and directory names ' + 'to compile; if no arguments given, defaults ' + 'to the equivalent of -l sys.path')) + args = parser.parse_args() + + compile_dests = args.compile_dest + + if (args.ddir and (len(compile_dests) != 1 + or not os.path.isdir(compile_dests[0]))): + parser.exit('-d destdir requires exactly one directory argument') + if args.rx: + import re + args.rx = re.compile(args.rx) + + # if flist is provided then load it + if args.flist: + try: + with (sys.stdin if args.flist=='-' else open(args.flist)) as f: + for line in f: + compile_dests.append(line.strip()) + except OSError: + print("Error reading file list {}".format(args.flist)) + return False + + success = True + try: + if compile_dests: + for dest in compile_dests: + if os.path.isfile(dest): + if not compile_file(dest, args.ddir, args.force, args.rx, + args.quiet, args.legacy): + success = False + else: + if not compile_dir(dest, args.maxlevels, args.ddir, + args.force, args.rx, args.quiet, + args.legacy): + success = False + return success + else: + return compile_path(legacy=args.legacy, force=args.force, + quiet=args.quiet) + except KeyboardInterrupt: + print("\n[interrupted]") + return False + return True + + +if __name__ == '__main__': + exit_status = int(not main()) + sys.exit(exit_status) diff --git a/ouroboros/concurrent/__init__.py b/ouroboros/concurrent/__init__.py new file mode 100644 index 0000000..196d378 --- /dev/null +++ b/ouroboros/concurrent/__init__.py @@ -0,0 +1 @@ +# This directory is a Python package. diff --git a/ouroboros/concurrent/futures/__init__.py b/ouroboros/concurrent/futures/__init__.py new file mode 100644 index 0000000..b5231f8 --- /dev/null +++ b/ouroboros/concurrent/futures/__init__.py @@ -0,0 +1,18 @@ +# Copyright 2009 Brian Quinlan. All Rights Reserved. +# Licensed to PSF under a Contributor Agreement. + +"""Execute computations asynchronously using threads or processes.""" + +__author__ = 'Brian Quinlan (brian@sweetapp.com)' + +from concurrent.futures._base import (FIRST_COMPLETED, + FIRST_EXCEPTION, + ALL_COMPLETED, + CancelledError, + TimeoutError, + Future, + Executor, + wait, + as_completed) +from concurrent.futures.process import ProcessPoolExecutor +from concurrent.futures.thread import ThreadPoolExecutor diff --git a/ouroboros/concurrent/futures/_base.py b/ouroboros/concurrent/futures/_base.py new file mode 100644 index 0000000..acd05d0 --- /dev/null +++ b/ouroboros/concurrent/futures/_base.py @@ -0,0 +1,575 @@ +# Copyright 2009 Brian Quinlan. All Rights Reserved. +# Licensed to PSF under a Contributor Agreement. + +__author__ = 'Brian Quinlan (brian@sweetapp.com)' + +import collections +import logging +import threading +import time + +FIRST_COMPLETED = 'FIRST_COMPLETED' +FIRST_EXCEPTION = 'FIRST_EXCEPTION' +ALL_COMPLETED = 'ALL_COMPLETED' +_AS_COMPLETED = '_AS_COMPLETED' + +# Possible future states (for internal use by the futures package). +PENDING = 'PENDING' +RUNNING = 'RUNNING' +# The future was cancelled by the user... +CANCELLED = 'CANCELLED' +# ...and _Waiter.add_cancelled() was called by a worker. +CANCELLED_AND_NOTIFIED = 'CANCELLED_AND_NOTIFIED' +FINISHED = 'FINISHED' + +_FUTURE_STATES = [ + PENDING, + RUNNING, + CANCELLED, + CANCELLED_AND_NOTIFIED, + FINISHED +] + +_STATE_TO_DESCRIPTION_MAP = { + PENDING: "pending", + RUNNING: "running", + CANCELLED: "cancelled", + CANCELLED_AND_NOTIFIED: "cancelled", + FINISHED: "finished" +} + +# Logger for internal use by the futures package. +LOGGER = logging.getLogger("concurrent.futures") + +class Error(Exception): + """Base class for all future-related exceptions.""" + pass + +class CancelledError(Error): + """The Future was cancelled.""" + pass + +class TimeoutError(Error): + """The operation exceeded the given deadline.""" + pass + +class _Waiter(object): + """Provides the event that wait() and as_completed() block on.""" + def __init__(self): + self.event = threading.Event() + self.finished_futures = [] + + def add_result(self, future): + self.finished_futures.append(future) + + def add_exception(self, future): + self.finished_futures.append(future) + + def add_cancelled(self, future): + self.finished_futures.append(future) + +class _AsCompletedWaiter(_Waiter): + """Used by as_completed().""" + + def __init__(self): + super(_AsCompletedWaiter, self).__init__() + self.lock = threading.Lock() + + def add_result(self, future): + with self.lock: + super(_AsCompletedWaiter, self).add_result(future) + self.event.set() + + def add_exception(self, future): + with self.lock: + super(_AsCompletedWaiter, self).add_exception(future) + self.event.set() + + def add_cancelled(self, future): + with self.lock: + super(_AsCompletedWaiter, self).add_cancelled(future) + self.event.set() + +class _FirstCompletedWaiter(_Waiter): + """Used by wait(return_when=FIRST_COMPLETED).""" + + def add_result(self, future): + super().add_result(future) + self.event.set() + + def add_exception(self, future): + super().add_exception(future) + self.event.set() + + def add_cancelled(self, future): + super().add_cancelled(future) + self.event.set() + +class _AllCompletedWaiter(_Waiter): + """Used by wait(return_when=FIRST_EXCEPTION and ALL_COMPLETED).""" + + def __init__(self, num_pending_calls, stop_on_exception): + self.num_pending_calls = num_pending_calls + self.stop_on_exception = stop_on_exception + self.lock = threading.Lock() + super().__init__() + + def _decrement_pending_calls(self): + with self.lock: + self.num_pending_calls -= 1 + if not self.num_pending_calls: + self.event.set() + + def add_result(self, future): + super().add_result(future) + self._decrement_pending_calls() + + def add_exception(self, future): + super().add_exception(future) + if self.stop_on_exception: + self.event.set() + else: + self._decrement_pending_calls() + + def add_cancelled(self, future): + super().add_cancelled(future) + self._decrement_pending_calls() + +class _AcquireFutures(object): + """A context manager that does an ordered acquire of Future conditions.""" + + def __init__(self, futures): + self.futures = sorted(futures, key=id) + + def __enter__(self): + for future in self.futures: + future._condition.acquire() + + def __exit__(self, *args): + for future in self.futures: + future._condition.release() + +def _create_and_install_waiters(fs, return_when): + if return_when == _AS_COMPLETED: + waiter = _AsCompletedWaiter() + elif return_when == FIRST_COMPLETED: + waiter = _FirstCompletedWaiter() + else: + pending_count = sum( + f._state not in [CANCELLED_AND_NOTIFIED, FINISHED] for f in fs) + + if return_when == FIRST_EXCEPTION: + waiter = _AllCompletedWaiter(pending_count, stop_on_exception=True) + elif return_when == ALL_COMPLETED: + waiter = _AllCompletedWaiter(pending_count, stop_on_exception=False) + else: + raise ValueError("Invalid return condition: %r" % return_when) + + for f in fs: + f._waiters.append(waiter) + + return waiter + +def as_completed(fs, timeout=None): + """An iterator over the given futures that yields each as it completes. + + Args: + fs: The sequence of Futures (possibly created by different Executors) to + iterate over. + timeout: The maximum number of seconds to wait. If None, then there + is no limit on the wait time. + + Returns: + An iterator that yields the given Futures as they complete (finished or + cancelled). If any given Futures are duplicated, they will be returned + once. + + Raises: + TimeoutError: If the entire result iterator could not be generated + before the given timeout. + """ + if timeout is not None: + end_time = timeout + time.time() + + fs = set(fs) + with _AcquireFutures(fs): + finished = set( + f for f in fs + if f._state in [CANCELLED_AND_NOTIFIED, FINISHED]) + pending = fs - finished + waiter = _create_and_install_waiters(fs, _AS_COMPLETED) + + try: + yield from finished + + while pending: + if timeout is None: + wait_timeout = None + else: + wait_timeout = end_time - time.time() + if wait_timeout < 0: + raise TimeoutError( + '%d (of %d) futures unfinished' % ( + len(pending), len(fs))) + + waiter.event.wait(wait_timeout) + + with waiter.lock: + finished = waiter.finished_futures + waiter.finished_futures = [] + waiter.event.clear() + + for future in finished: + yield future + pending.remove(future) + + finally: + for f in fs: + with f._condition: + f._waiters.remove(waiter) + +DoneAndNotDoneFutures = collections.namedtuple( + 'DoneAndNotDoneFutures', 'done not_done') +def wait(fs, timeout=None, return_when=ALL_COMPLETED): + """Wait for the futures in the given sequence to complete. + + Args: + fs: The sequence of Futures (possibly created by different Executors) to + wait upon. + timeout: The maximum number of seconds to wait. If None, then there + is no limit on the wait time. + return_when: Indicates when this function should return. The options + are: + + FIRST_COMPLETED - Return when any future finishes or is + cancelled. + FIRST_EXCEPTION - Return when any future finishes by raising an + exception. If no future raises an exception + then it is equivalent to ALL_COMPLETED. + ALL_COMPLETED - Return when all futures finish or are cancelled. + + Returns: + A named 2-tuple of sets. The first set, named 'done', contains the + futures that completed (is finished or cancelled) before the wait + completed. The second set, named 'not_done', contains uncompleted + futures. + """ + with _AcquireFutures(fs): + done = set(f for f in fs + if f._state in [CANCELLED_AND_NOTIFIED, FINISHED]) + not_done = set(fs) - done + + if (return_when == FIRST_COMPLETED) and done: + return DoneAndNotDoneFutures(done, not_done) + elif (return_when == FIRST_EXCEPTION) and done: + if any(f for f in done + if not f.cancelled() and f.exception() is not None): + return DoneAndNotDoneFutures(done, not_done) + + if len(done) == len(fs): + return DoneAndNotDoneFutures(done, not_done) + + waiter = _create_and_install_waiters(fs, return_when) + + waiter.event.wait(timeout) + for f in fs: + with f._condition: + f._waiters.remove(waiter) + + done.update(waiter.finished_futures) + return DoneAndNotDoneFutures(done, set(fs) - done) + +class Future(object): + """Represents the result of an asynchronous computation.""" + + def __init__(self): + """Initializes the future. Should not be called by clients.""" + self._condition = threading.Condition() + self._state = PENDING + self._result = None + self._exception = None + self._waiters = [] + self._done_callbacks = [] + + def _invoke_callbacks(self): + for callback in self._done_callbacks: + try: + callback(self) + except Exception: + LOGGER.exception('exception calling callback for %r', self) + + def __repr__(self): + with self._condition: + if self._state == FINISHED: + if self._exception: + return '' % ( + hex(id(self)), + _STATE_TO_DESCRIPTION_MAP[self._state], + self._exception.__class__.__name__) + else: + return '' % ( + hex(id(self)), + _STATE_TO_DESCRIPTION_MAP[self._state], + self._result.__class__.__name__) + return '' % ( + hex(id(self)), + _STATE_TO_DESCRIPTION_MAP[self._state]) + + def cancel(self): + """Cancel the future if possible. + + Returns True if the future was cancelled, False otherwise. A future + cannot be cancelled if it is running or has already completed. + """ + with self._condition: + if self._state in [RUNNING, FINISHED]: + return False + + if self._state in [CANCELLED, CANCELLED_AND_NOTIFIED]: + return True + + self._state = CANCELLED + self._condition.notify_all() + + self._invoke_callbacks() + return True + + def cancelled(self): + """Return True if the future was cancelled.""" + with self._condition: + return self._state in [CANCELLED, CANCELLED_AND_NOTIFIED] + + def running(self): + """Return True if the future is currently executing.""" + with self._condition: + return self._state == RUNNING + + def done(self): + """Return True of the future was cancelled or finished executing.""" + with self._condition: + return self._state in [CANCELLED, CANCELLED_AND_NOTIFIED, FINISHED] + + def __get_result(self): + if self._exception: + raise self._exception + else: + return self._result + + def add_done_callback(self, fn): + """Attaches a callable that will be called when the future finishes. + + Args: + fn: A callable that will be called with this future as its only + argument when the future completes or is cancelled. The callable + will always be called by a thread in the same process in which + it was added. If the future has already completed or been + cancelled then the callable will be called immediately. These + callables are called in the order that they were added. + """ + with self._condition: + if self._state not in [CANCELLED, CANCELLED_AND_NOTIFIED, FINISHED]: + self._done_callbacks.append(fn) + return + fn(self) + + def result(self, timeout=None): + """Return the result of the call that the future represents. + + Args: + timeout: The number of seconds to wait for the result if the future + isn't done. If None, then there is no limit on the wait time. + + Returns: + The result of the call that the future represents. + + Raises: + CancelledError: If the future was cancelled. + TimeoutError: If the future didn't finish executing before the given + timeout. + Exception: If the call raised then that exception will be raised. + """ + with self._condition: + if self._state in [CANCELLED, CANCELLED_AND_NOTIFIED]: + raise CancelledError() + elif self._state == FINISHED: + return self.__get_result() + + self._condition.wait(timeout) + + if self._state in [CANCELLED, CANCELLED_AND_NOTIFIED]: + raise CancelledError() + elif self._state == FINISHED: + return self.__get_result() + else: + raise TimeoutError() + + def exception(self, timeout=None): + """Return the exception raised by the call that the future represents. + + Args: + timeout: The number of seconds to wait for the exception if the + future isn't done. If None, then there is no limit on the wait + time. + + Returns: + The exception raised by the call that the future represents or None + if the call completed without raising. + + Raises: + CancelledError: If the future was cancelled. + TimeoutError: If the future didn't finish executing before the given + timeout. + """ + + with self._condition: + if self._state in [CANCELLED, CANCELLED_AND_NOTIFIED]: + raise CancelledError() + elif self._state == FINISHED: + return self._exception + + self._condition.wait(timeout) + + if self._state in [CANCELLED, CANCELLED_AND_NOTIFIED]: + raise CancelledError() + elif self._state == FINISHED: + return self._exception + else: + raise TimeoutError() + + # The following methods should only be used by Executors and in tests. + def set_running_or_notify_cancel(self): + """Mark the future as running or process any cancel notifications. + + Should only be used by Executor implementations and unit tests. + + If the future has been cancelled (cancel() was called and returned + True) then any threads waiting on the future completing (though calls + to as_completed() or wait()) are notified and False is returned. + + If the future was not cancelled then it is put in the running state + (future calls to running() will return True) and True is returned. + + This method should be called by Executor implementations before + executing the work associated with this future. If this method returns + False then the work should not be executed. + + Returns: + False if the Future was cancelled, True otherwise. + + Raises: + RuntimeError: if this method was already called or if set_result() + or set_exception() was called. + """ + with self._condition: + if self._state == CANCELLED: + self._state = CANCELLED_AND_NOTIFIED + for waiter in self._waiters: + waiter.add_cancelled(self) + # self._condition.notify_all() is not necessary because + # self.cancel() triggers a notification. + return False + elif self._state == PENDING: + self._state = RUNNING + return True + else: + LOGGER.critical('Future %s in unexpected state: %s', + id(self), + self._state) + raise RuntimeError('Future in unexpected state') + + def set_result(self, result): + """Sets the return value of work associated with the future. + + Should only be used by Executor implementations and unit tests. + """ + with self._condition: + self._result = result + self._state = FINISHED + for waiter in self._waiters: + waiter.add_result(self) + self._condition.notify_all() + self._invoke_callbacks() + + def set_exception(self, exception): + """Sets the result of the future as being the given exception. + + Should only be used by Executor implementations and unit tests. + """ + with self._condition: + self._exception = exception + self._state = FINISHED + for waiter in self._waiters: + waiter.add_exception(self) + self._condition.notify_all() + self._invoke_callbacks() + +class Executor(object): + """This is an abstract base class for concrete asynchronous executors.""" + + def submit(self, fn, *args, **kwargs): + """Submits a callable to be executed with the given arguments. + + Schedules the callable to be executed as fn(*args, **kwargs) and returns + a Future instance representing the execution of the callable. + + Returns: + A Future representing the given call. + """ + raise NotImplementedError() + + def map(self, fn, *iterables, timeout=None): + """Returns a iterator equivalent to map(fn, iter). + + Args: + fn: A callable that will take as many arguments as there are + passed iterables. + timeout: The maximum number of seconds to wait. If None, then there + is no limit on the wait time. + + Returns: + An iterator equivalent to: map(func, *iterables) but the calls may + be evaluated out-of-order. + + Raises: + TimeoutError: If the entire result iterator could not be generated + before the given timeout. + Exception: If fn(*args) raises for any values. + """ + if timeout is not None: + end_time = timeout + time.time() + + fs = [self.submit(fn, *args) for args in zip(*iterables)] + + # Yield must be hidden in closure so that the futures are submitted + # before the first iterator value is required. + def result_iterator(): + try: + for future in fs: + if timeout is None: + yield future.result() + else: + yield future.result(end_time - time.time()) + finally: + for future in fs: + future.cancel() + return result_iterator() + + def shutdown(self, wait=True): + """Clean-up the resources associated with the Executor. + + It is safe to call this method several times. Otherwise, no other + methods can be called after this one. + + Args: + wait: If True then shutdown will not return until all running + futures have finished executing and the resources used by the + executor have been reclaimed. + """ + pass + + def __enter__(self): + return self + + def __exit__(self, exc_type, exc_val, exc_tb): + self.shutdown(wait=True) + return False diff --git a/ouroboros/concurrent/futures/process.py b/ouroboros/concurrent/futures/process.py new file mode 100644 index 0000000..07b5225 --- /dev/null +++ b/ouroboros/concurrent/futures/process.py @@ -0,0 +1,427 @@ +# Copyright 2009 Brian Quinlan. All Rights Reserved. +# Licensed to PSF under a Contributor Agreement. + +"""Implements ProcessPoolExecutor. + +The follow diagram and text describe the data-flow through the system: + +|======================= In-process =====================|== Out-of-process ==| + ++----------+ +----------+ +--------+ +-----------+ +---------+ +| | => | Work Ids | => | | => | Call Q | => | | +| | +----------+ | | +-----------+ | | +| | | ... | | | | ... | | | +| | | 6 | | | | 5, call() | | | +| | | 7 | | | | ... | | | +| Process | | ... | | Local | +-----------+ | Process | +| Pool | +----------+ | Worker | | #1..n | +| Executor | | Thread | | | +| | +----------- + | | +-----------+ | | +| | <=> | Work Items | <=> | | <= | Result Q | <= | | +| | +------------+ | | +-----------+ | | +| | | 6: call() | | | | ... | | | +| | | future | | | | 4, result | | | +| | | ... | | | | 3, except | | | ++----------+ +------------+ +--------+ +-----------+ +---------+ + +Executor.submit() called: +- creates a uniquely numbered _WorkItem and adds it to the "Work Items" dict +- adds the id of the _WorkItem to the "Work Ids" queue + +Local worker thread: +- reads work ids from the "Work Ids" queue and looks up the corresponding + WorkItem from the "Work Items" dict: if the work item has been cancelled then + it is simply removed from the dict, otherwise it is repackaged as a + _CallItem and put in the "Call Q". New _CallItems are put in the "Call Q" + until "Call Q" is full. NOTE: the size of the "Call Q" is kept small because + calls placed in the "Call Q" can no longer be cancelled with Future.cancel(). +- reads _ResultItems from "Result Q", updates the future stored in the + "Work Items" dict and deletes the dict entry + +Process #1..n: +- reads _CallItems from "Call Q", executes the calls, and puts the resulting + _ResultItems in "Result Q" +""" + +__author__ = 'Brian Quinlan (brian@sweetapp.com)' + +import atexit +import os +from concurrent.futures import _base +import queue +from queue import Full +import multiprocessing +from multiprocessing import SimpleQueue +from multiprocessing.connection import wait +import threading +import weakref + +# Workers are created as daemon threads and processes. This is done to allow the +# interpreter to exit when there are still idle processes in a +# ProcessPoolExecutor's process pool (i.e. shutdown() was not called). However, +# allowing workers to die with the interpreter has two undesirable properties: +# - The workers would still be running during interpretor shutdown, +# meaning that they would fail in unpredictable ways. +# - The workers could be killed while evaluating a work item, which could +# be bad if the callable being evaluated has external side-effects e.g. +# writing to a file. +# +# To work around this problem, an exit handler is installed which tells the +# workers to exit when their work queues are empty and then waits until the +# threads/processes finish. + +_threads_queues = weakref.WeakKeyDictionary() +_shutdown = False + +def _python_exit(): + global _shutdown + _shutdown = True + items = list(_threads_queues.items()) + for t, q in items: + q.put(None) + for t, q in items: + t.join() + +# Controls how many more calls than processes will be queued in the call queue. +# A smaller number will mean that processes spend more time idle waiting for +# work while a larger number will make Future.cancel() succeed less frequently +# (Futures in the call queue cannot be cancelled). +EXTRA_QUEUED_CALLS = 1 + +class _WorkItem(object): + def __init__(self, future, fn, args, kwargs): + self.future = future + self.fn = fn + self.args = args + self.kwargs = kwargs + +class _ResultItem(object): + def __init__(self, work_id, exception=None, result=None): + self.work_id = work_id + self.exception = exception + self.result = result + +class _CallItem(object): + def __init__(self, work_id, fn, args, kwargs): + self.work_id = work_id + self.fn = fn + self.args = args + self.kwargs = kwargs + +def _process_worker(call_queue, result_queue): + """Evaluates calls from call_queue and places the results in result_queue. + + This worker is run in a separate process. + + Args: + call_queue: A multiprocessing.Queue of _CallItems that will be read and + evaluated by the worker. + result_queue: A multiprocessing.Queue of _ResultItems that will written + to by the worker. + shutdown: A multiprocessing.Event that will be set as a signal to the + worker that it should exit when call_queue is empty. + """ + while True: + call_item = call_queue.get(block=True) + if call_item is None: + # Wake up queue management thread + result_queue.put(os.getpid()) + return + try: + r = call_item.fn(*call_item.args, **call_item.kwargs) + except BaseException as e: + result_queue.put(_ResultItem(call_item.work_id, + exception=e)) + else: + result_queue.put(_ResultItem(call_item.work_id, + result=r)) + +def _add_call_item_to_queue(pending_work_items, + work_ids, + call_queue): + """Fills call_queue with _WorkItems from pending_work_items. + + This function never blocks. + + Args: + pending_work_items: A dict mapping work ids to _WorkItems e.g. + {5: <_WorkItem...>, 6: <_WorkItem...>, ...} + work_ids: A queue.Queue of work ids e.g. Queue([5, 6, ...]). Work ids + are consumed and the corresponding _WorkItems from + pending_work_items are transformed into _CallItems and put in + call_queue. + call_queue: A multiprocessing.Queue that will be filled with _CallItems + derived from _WorkItems. + """ + while True: + if call_queue.full(): + return + try: + work_id = work_ids.get(block=False) + except queue.Empty: + return + else: + work_item = pending_work_items[work_id] + + if work_item.future.set_running_or_notify_cancel(): + call_queue.put(_CallItem(work_id, + work_item.fn, + work_item.args, + work_item.kwargs), + block=True) + else: + del pending_work_items[work_id] + continue + +def _queue_management_worker(executor_reference, + processes, + pending_work_items, + work_ids_queue, + call_queue, + result_queue): + """Manages the communication between this process and the worker processes. + + This function is run in a local thread. + + Args: + executor_reference: A weakref.ref to the ProcessPoolExecutor that owns + this thread. Used to determine if the ProcessPoolExecutor has been + garbage collected and that this function can exit. + process: A list of the multiprocessing.Process instances used as + workers. + pending_work_items: A dict mapping work ids to _WorkItems e.g. + {5: <_WorkItem...>, 6: <_WorkItem...>, ...} + work_ids_queue: A queue.Queue of work ids e.g. Queue([5, 6, ...]). + call_queue: A multiprocessing.Queue that will be filled with _CallItems + derived from _WorkItems for processing by the process workers. + result_queue: A multiprocessing.Queue of _ResultItems generated by the + process workers. + """ + executor = None + + def shutting_down(): + return _shutdown or executor is None or executor._shutdown_thread + + def shutdown_worker(): + # This is an upper bound + nb_children_alive = sum(p.is_alive() for p in processes.values()) + for i in range(0, nb_children_alive): + call_queue.put_nowait(None) + # Release the queue's resources as soon as possible. + call_queue.close() + # If .join() is not called on the created processes then + # some multiprocessing.Queue methods may deadlock on Mac OS X. + for p in processes.values(): + p.join() + + reader = result_queue._reader + + while True: + _add_call_item_to_queue(pending_work_items, + work_ids_queue, + call_queue) + + sentinels = [p.sentinel for p in processes.values()] + assert sentinels + ready = wait([reader] + sentinels) + if reader in ready: + result_item = reader.recv() + else: + # Mark the process pool broken so that submits fail right now. + executor = executor_reference() + if executor is not None: + executor._broken = True + executor._shutdown_thread = True + executor = None + # All futures in flight must be marked failed + for work_id, work_item in pending_work_items.items(): + work_item.future.set_exception( + BrokenProcessPool( + "A process in the process pool was " + "terminated abruptly while the future was " + "running or pending." + )) + # Delete references to object. See issue16284 + del work_item + pending_work_items.clear() + # Terminate remaining workers forcibly: the queues or their + # locks may be in a dirty state and block forever. + for p in processes.values(): + p.terminate() + shutdown_worker() + return + if isinstance(result_item, int): + # Clean shutdown of a worker using its PID + # (avoids marking the executor broken) + assert shutting_down() + p = processes.pop(result_item) + p.join() + if not processes: + shutdown_worker() + return + elif result_item is not None: + work_item = pending_work_items.pop(result_item.work_id, None) + # work_item can be None if another process terminated (see above) + if work_item is not None: + if result_item.exception: + work_item.future.set_exception(result_item.exception) + else: + work_item.future.set_result(result_item.result) + # Delete references to object. See issue16284 + del work_item + # Check whether we should start shutting down. + executor = executor_reference() + # No more work items can be added if: + # - The interpreter is shutting down OR + # - The executor that owns this worker has been collected OR + # - The executor that owns this worker has been shutdown. + if shutting_down(): + try: + # Since no new work items can be added, it is safe to shutdown + # this thread if there are no pending work items. + if not pending_work_items: + shutdown_worker() + return + except Full: + # This is not a problem: we will eventually be woken up (in + # result_queue.get()) and be able to send a sentinel again. + pass + executor = None + +_system_limits_checked = False +_system_limited = None +def _check_system_limits(): + global _system_limits_checked, _system_limited + if _system_limits_checked: + if _system_limited: + raise NotImplementedError(_system_limited) + _system_limits_checked = True + try: + nsems_max = os.sysconf("SC_SEM_NSEMS_MAX") + except (AttributeError, ValueError): + # sysconf not available or setting not available + return + if nsems_max == -1: + # indetermined limit, assume that limit is determined + # by available memory only + return + if nsems_max >= 256: + # minimum number of semaphores available + # according to POSIX + return + _system_limited = "system provides too few semaphores (%d available, 256 necessary)" % nsems_max + raise NotImplementedError(_system_limited) + + +class BrokenProcessPool(RuntimeError): + """ + Raised when a process in a ProcessPoolExecutor terminated abruptly + while a future was in the running state. + """ + + +class ProcessPoolExecutor(_base.Executor): + def __init__(self, max_workers=None): + """Initializes a new ProcessPoolExecutor instance. + + Args: + max_workers: The maximum number of processes that can be used to + execute the given calls. If None or not given then as many + worker processes will be created as the machine has processors. + """ + _check_system_limits() + + if max_workers is None: + self._max_workers = os.cpu_count() or 1 + else: + self._max_workers = max_workers + + # Make the call queue slightly larger than the number of processes to + # prevent the worker processes from idling. But don't make it too big + # because futures in the call queue cannot be cancelled. + self._call_queue = multiprocessing.Queue(self._max_workers + + EXTRA_QUEUED_CALLS) + # Killed worker processes can produce spurious "broken pipe" + # tracebacks in the queue's own worker thread. But we detect killed + # processes anyway, so silence the tracebacks. + self._call_queue._ignore_epipe = True + self._result_queue = SimpleQueue() + self._work_ids = queue.Queue() + self._queue_management_thread = None + # Map of pids to processes + self._processes = {} + + # Shutdown is a two-step process. + self._shutdown_thread = False + self._shutdown_lock = threading.Lock() + self._broken = False + self._queue_count = 0 + self._pending_work_items = {} + + def _start_queue_management_thread(self): + # When the executor gets lost, the weakref callback will wake up + # the queue management thread. + def weakref_cb(_, q=self._result_queue): + q.put(None) + if self._queue_management_thread is None: + # Start the processes so that their sentinels are known. + self._adjust_process_count() + self._queue_management_thread = threading.Thread( + target=_queue_management_worker, + args=(weakref.ref(self, weakref_cb), + self._processes, + self._pending_work_items, + self._work_ids, + self._call_queue, + self._result_queue)) + self._queue_management_thread.daemon = True + self._queue_management_thread.start() + _threads_queues[self._queue_management_thread] = self._result_queue + + def _adjust_process_count(self): + for _ in range(len(self._processes), self._max_workers): + p = multiprocessing.Process( + target=_process_worker, + args=(self._call_queue, + self._result_queue)) + p.start() + self._processes[p.pid] = p + + def submit(self, fn, *args, **kwargs): + with self._shutdown_lock: + if self._broken: + raise BrokenProcessPool('A child process terminated ' + 'abruptly, the process pool is not usable anymore') + if self._shutdown_thread: + raise RuntimeError('cannot schedule new futures after shutdown') + + f = _base.Future() + w = _WorkItem(f, fn, args, kwargs) + + self._pending_work_items[self._queue_count] = w + self._work_ids.put(self._queue_count) + self._queue_count += 1 + # Wake up queue management thread + self._result_queue.put(None) + + self._start_queue_management_thread() + return f + submit.__doc__ = _base.Executor.submit.__doc__ + + def shutdown(self, wait=True): + with self._shutdown_lock: + self._shutdown_thread = True + if self._queue_management_thread: + # Wake up queue management thread + self._result_queue.put(None) + if wait: + self._queue_management_thread.join() + # To reduce the risk of opening too many files, remove references to + # objects that use file descriptors. + self._queue_management_thread = None + self._call_queue = None + self._result_queue = None + self._processes = None + shutdown.__doc__ = _base.Executor.shutdown.__doc__ + +atexit.register(_python_exit) diff --git a/ouroboros/concurrent/futures/thread.py b/ouroboros/concurrent/futures/thread.py new file mode 100644 index 0000000..f9beb0f --- /dev/null +++ b/ouroboros/concurrent/futures/thread.py @@ -0,0 +1,132 @@ +# Copyright 2009 Brian Quinlan. All Rights Reserved. +# Licensed to PSF under a Contributor Agreement. + +"""Implements ThreadPoolExecutor.""" + +__author__ = 'Brian Quinlan (brian@sweetapp.com)' + +import atexit +from concurrent.futures import _base +import queue +import threading +import weakref + +# Workers are created as daemon threads. This is done to allow the interpreter +# to exit when there are still idle threads in a ThreadPoolExecutor's thread +# pool (i.e. shutdown() was not called). However, allowing workers to die with +# the interpreter has two undesirable properties: +# - The workers would still be running during interpretor shutdown, +# meaning that they would fail in unpredictable ways. +# - The workers could be killed while evaluating a work item, which could +# be bad if the callable being evaluated has external side-effects e.g. +# writing to a file. +# +# To work around this problem, an exit handler is installed which tells the +# workers to exit when their work queues are empty and then waits until the +# threads finish. + +_threads_queues = weakref.WeakKeyDictionary() +_shutdown = False + +def _python_exit(): + global _shutdown + _shutdown = True + items = list(_threads_queues.items()) + for t, q in items: + q.put(None) + for t, q in items: + t.join() + +atexit.register(_python_exit) + +class _WorkItem(object): + def __init__(self, future, fn, args, kwargs): + self.future = future + self.fn = fn + self.args = args + self.kwargs = kwargs + + def run(self): + if not self.future.set_running_or_notify_cancel(): + return + + try: + result = self.fn(*self.args, **self.kwargs) + except BaseException as e: + self.future.set_exception(e) + else: + self.future.set_result(result) + +def _worker(executor_reference, work_queue): + try: + while True: + work_item = work_queue.get(block=True) + if work_item is not None: + work_item.run() + # Delete references to object. See issue16284 + del work_item + continue + executor = executor_reference() + # Exit if: + # - The interpreter is shutting down OR + # - The executor that owns the worker has been collected OR + # - The executor that owns the worker has been shutdown. + if _shutdown or executor is None or executor._shutdown: + # Notice other workers + work_queue.put(None) + return + del executor + except BaseException: + _base.LOGGER.critical('Exception in worker', exc_info=True) + +class ThreadPoolExecutor(_base.Executor): + def __init__(self, max_workers): + """Initializes a new ThreadPoolExecutor instance. + + Args: + max_workers: The maximum number of threads that can be used to + execute the given calls. + """ + self._max_workers = max_workers + self._work_queue = queue.Queue() + self._threads = set() + self._shutdown = False + self._shutdown_lock = threading.Lock() + + def submit(self, fn, *args, **kwargs): + with self._shutdown_lock: + if self._shutdown: + raise RuntimeError('cannot schedule new futures after shutdown') + + f = _base.Future() + w = _WorkItem(f, fn, args, kwargs) + + self._work_queue.put(w) + self._adjust_thread_count() + return f + submit.__doc__ = _base.Executor.submit.__doc__ + + def _adjust_thread_count(self): + # When the executor gets lost, the weakref callback will wake up + # the worker threads. + def weakref_cb(_, q=self._work_queue): + q.put(None) + # TODO(bquinlan): Should avoid creating new threads if there are more + # idle threads than items in the work queue. + if len(self._threads) < self._max_workers: + t = threading.Thread(target=_worker, + args=(weakref.ref(self, weakref_cb), + self._work_queue)) + t.daemon = True + t.start() + self._threads.add(t) + _threads_queues[t] = self._work_queue + + def shutdown(self, wait=True): + with self._shutdown_lock: + self._shutdown = True + self._work_queue.put(None) + if wait: + for t in self._threads: + t.join() + shutdown.__doc__ = _base.Executor.shutdown.__doc__ diff --git a/ouroboros/configparser.py b/ouroboros/configparser.py new file mode 100644 index 0000000..4ee8307 --- /dev/null +++ b/ouroboros/configparser.py @@ -0,0 +1,1254 @@ +"""Configuration file parser. + +A configuration file consists of sections, lead by a "[section]" header, +and followed by "name: value" entries, with continuations and such in +the style of RFC 822. + +Intrinsic defaults can be specified by passing them into the +ConfigParser constructor as a dictionary. + +class: + +ConfigParser -- responsible for parsing a list of + configuration files, and managing the parsed database. + + methods: + + __init__(defaults=None, dict_type=_default_dict, allow_no_value=False, + delimiters=('=', ':'), comment_prefixes=('#', ';'), + inline_comment_prefixes=None, strict=True, + empty_lines_in_values=True): + Create the parser. When `defaults' is given, it is initialized into the + dictionary or intrinsic defaults. The keys must be strings, the values + must be appropriate for %()s string interpolation. + + When `dict_type' is given, it will be used to create the dictionary + objects for the list of sections, for the options within a section, and + for the default values. + + When `delimiters' is given, it will be used as the set of substrings + that divide keys from values. + + When `comment_prefixes' is given, it will be used as the set of + substrings that prefix comments in empty lines. Comments can be + indented. + + When `inline_comment_prefixes' is given, it will be used as the set of + substrings that prefix comments in non-empty lines. + + When `strict` is True, the parser won't allow for any section or option + duplicates while reading from a single source (file, string or + dictionary). Default is True. + + When `empty_lines_in_values' is False (default: True), each empty line + marks the end of an option. Otherwise, internal empty lines of + a multiline option are kept as part of the value. + + When `allow_no_value' is True (default: False), options without + values are accepted; the value presented for these is None. + + sections() + Return all the configuration section names, sans DEFAULT. + + has_section(section) + Return whether the given section exists. + + has_option(section, option) + Return whether the given option exists in the given section. + + options(section) + Return list of configuration options for the named section. + + read(filenames, encoding=None) + Read and parse the list of named configuration files, given by + name. A single filename is also allowed. Non-existing files + are ignored. Return list of successfully read files. + + read_file(f, filename=None) + Read and parse one configuration file, given as a file object. + The filename defaults to f.name; it is only used in error + messages (if f has no `name' attribute, the string `' is used). + + read_string(string) + Read configuration from a given string. + + read_dict(dictionary) + Read configuration from a dictionary. Keys are section names, + values are dictionaries with keys and values that should be present + in the section. If the used dictionary type preserves order, sections + and their keys will be added in order. Values are automatically + converted to strings. + + get(section, option, raw=False, vars=None, fallback=_UNSET) + Return a string value for the named option. All % interpolations are + expanded in the return values, based on the defaults passed into the + constructor and the DEFAULT section. Additional substitutions may be + provided using the `vars' argument, which must be a dictionary whose + contents override any pre-existing defaults. If `option' is a key in + `vars', the value from `vars' is used. + + getint(section, options, raw=False, vars=None, fallback=_UNSET) + Like get(), but convert value to an integer. + + getfloat(section, options, raw=False, vars=None, fallback=_UNSET) + Like get(), but convert value to a float. + + getboolean(section, options, raw=False, vars=None, fallback=_UNSET) + Like get(), but convert value to a boolean (currently case + insensitively defined as 0, false, no, off for False, and 1, true, + yes, on for True). Returns False or True. + + items(section=_UNSET, raw=False, vars=None) + If section is given, return a list of tuples with (name, value) for + each option in the section. Otherwise, return a list of tuples with + (section_name, section_proxy) for each section, including DEFAULTSECT. + + remove_section(section) + Remove the given file section and all its options. + + remove_option(section, option) + Remove the given option from the given section. + + set(section, option, value) + Set the given option. + + write(fp, space_around_delimiters=True) + Write the configuration state in .ini format. If + `space_around_delimiters' is True (the default), delimiters + between keys and values are surrounded by spaces. +""" + +from collections.abc import MutableMapping +from collections import OrderedDict as _default_dict, ChainMap as _ChainMap +import functools +import io +import itertools +import re +import sys +import warnings + +__all__ = ["NoSectionError", "DuplicateOptionError", "DuplicateSectionError", + "NoOptionError", "InterpolationError", "InterpolationDepthError", + "InterpolationSyntaxError", "ParsingError", + "MissingSectionHeaderError", + "ConfigParser", "SafeConfigParser", "RawConfigParser", + "DEFAULTSECT", "MAX_INTERPOLATION_DEPTH"] + +DEFAULTSECT = "DEFAULT" + +MAX_INTERPOLATION_DEPTH = 10 + + + +# exception classes +class Error(Exception): + """Base class for ConfigParser exceptions.""" + + def __init__(self, msg=''): + self.message = msg + Exception.__init__(self, msg) + + def __repr__(self): + return self.message + + __str__ = __repr__ + + +class NoSectionError(Error): + """Raised when no section matches a requested option.""" + + def __init__(self, section): + Error.__init__(self, 'No section: %r' % (section,)) + self.section = section + self.args = (section, ) + + +class DuplicateSectionError(Error): + """Raised when a section is repeated in an input source. + + Possible repetitions that raise this exception are: multiple creation + using the API or in strict parsers when a section is found more than once + in a single input file, string or dictionary. + """ + + def __init__(self, section, source=None, lineno=None): + msg = [repr(section), " already exists"] + if source is not None: + message = ["While reading from ", repr(source)] + if lineno is not None: + message.append(" [line {0:2d}]".format(lineno)) + message.append(": section ") + message.extend(msg) + msg = message + else: + msg.insert(0, "Section ") + Error.__init__(self, "".join(msg)) + self.section = section + self.source = source + self.lineno = lineno + self.args = (section, source, lineno) + + +class DuplicateOptionError(Error): + """Raised by strict parsers when an option is repeated in an input source. + + Current implementation raises this exception only when an option is found + more than once in a single file, string or dictionary. + """ + + def __init__(self, section, option, source=None, lineno=None): + msg = [repr(option), " in section ", repr(section), + " already exists"] + if source is not None: + message = ["While reading from ", repr(source)] + if lineno is not None: + message.append(" [line {0:2d}]".format(lineno)) + message.append(": option ") + message.extend(msg) + msg = message + else: + msg.insert(0, "Option ") + Error.__init__(self, "".join(msg)) + self.section = section + self.option = option + self.source = source + self.lineno = lineno + self.args = (section, option, source, lineno) + + +class NoOptionError(Error): + """A requested option was not found.""" + + def __init__(self, option, section): + Error.__init__(self, "No option %r in section: %r" % + (option, section)) + self.option = option + self.section = section + self.args = (option, section) + + +class InterpolationError(Error): + """Base class for interpolation-related exceptions.""" + + def __init__(self, option, section, msg): + Error.__init__(self, msg) + self.option = option + self.section = section + self.args = (option, section, msg) + + +class InterpolationMissingOptionError(InterpolationError): + """A string substitution required a setting which was not available.""" + + def __init__(self, option, section, rawval, reference): + msg = ("Bad value substitution:\n" + "\tsection: [%s]\n" + "\toption : %s\n" + "\tkey : %s\n" + "\trawval : %s\n" + % (section, option, reference, rawval)) + InterpolationError.__init__(self, option, section, msg) + self.reference = reference + self.args = (option, section, rawval, reference) + + +class InterpolationSyntaxError(InterpolationError): + """Raised when the source text contains invalid syntax. + + Current implementation raises this exception when the source text into + which substitutions are made does not conform to the required syntax. + """ + + +class InterpolationDepthError(InterpolationError): + """Raised when substitutions are nested too deeply.""" + + def __init__(self, option, section, rawval): + msg = ("Value interpolation too deeply recursive:\n" + "\tsection: [%s]\n" + "\toption : %s\n" + "\trawval : %s\n" + % (section, option, rawval)) + InterpolationError.__init__(self, option, section, msg) + self.args = (option, section, rawval) + + +class ParsingError(Error): + """Raised when a configuration file does not follow legal syntax.""" + + def __init__(self, source=None, filename=None): + # Exactly one of `source'/`filename' arguments has to be given. + # `filename' kept for compatibility. + if filename and source: + raise ValueError("Cannot specify both `filename' and `source'. " + "Use `source'.") + elif not filename and not source: + raise ValueError("Required argument `source' not given.") + elif filename: + source = filename + Error.__init__(self, 'Source contains parsing errors: %r' % source) + self.source = source + self.errors = [] + self.args = (source, ) + + @property + def filename(self): + """Deprecated, use `source'.""" + warnings.warn( + "The 'filename' attribute will be removed in future versions. " + "Use 'source' instead.", + DeprecationWarning, stacklevel=2 + ) + return self.source + + @filename.setter + def filename(self, value): + """Deprecated, user `source'.""" + warnings.warn( + "The 'filename' attribute will be removed in future versions. " + "Use 'source' instead.", + DeprecationWarning, stacklevel=2 + ) + self.source = value + + def append(self, lineno, line): + self.errors.append((lineno, line)) + self.message += '\n\t[line %2d]: %s' % (lineno, line) + + +class MissingSectionHeaderError(ParsingError): + """Raised when a key-value pair is found before any section header.""" + + def __init__(self, filename, lineno, line): + Error.__init__( + self, + 'File contains no section headers.\nfile: %r, line: %d\n%r' % + (filename, lineno, line)) + self.source = filename + self.lineno = lineno + self.line = line + self.args = (filename, lineno, line) + + +# Used in parser getters to indicate the default behaviour when a specific +# option is not found it to raise an exception. Created to enable `None' as +# a valid fallback value. +_UNSET = object() + + +class Interpolation: + """Dummy interpolation that passes the value through with no changes.""" + + def before_get(self, parser, section, option, value, defaults): + return value + + def before_set(self, parser, section, option, value): + return value + + def before_read(self, parser, section, option, value): + return value + + def before_write(self, parser, section, option, value): + return value + + +class BasicInterpolation(Interpolation): + """Interpolation as implemented in the classic ConfigParser. + + The option values can contain format strings which refer to other values in + the same section, or values in the special default section. + + For example: + + something: %(dir)s/whatever + + would resolve the "%(dir)s" to the value of dir. All reference + expansions are done late, on demand. If a user needs to use a bare % in + a configuration file, she can escape it by writing %%. Other % usage + is considered a user error and raises `InterpolationSyntaxError'.""" + + _KEYCRE = re.compile(r"%\(([^)]+)\)s") + + def before_get(self, parser, section, option, value, defaults): + L = [] + self._interpolate_some(parser, option, L, value, section, defaults, 1) + return ''.join(L) + + def before_set(self, parser, section, option, value): + tmp_value = value.replace('%%', '') # escaped percent signs + tmp_value = self._KEYCRE.sub('', tmp_value) # valid syntax + if '%' in tmp_value: + raise ValueError("invalid interpolation syntax in %r at " + "position %d" % (value, tmp_value.find('%'))) + return value + + def _interpolate_some(self, parser, option, accum, rest, section, map, + depth): + if depth > MAX_INTERPOLATION_DEPTH: + raise InterpolationDepthError(option, section, rest) + while rest: + p = rest.find("%") + if p < 0: + accum.append(rest) + return + if p > 0: + accum.append(rest[:p]) + rest = rest[p:] + # p is no longer used + c = rest[1:2] + if c == "%": + accum.append("%") + rest = rest[2:] + elif c == "(": + m = self._KEYCRE.match(rest) + if m is None: + raise InterpolationSyntaxError(option, section, + "bad interpolation variable reference %r" % rest) + var = parser.optionxform(m.group(1)) + rest = rest[m.end():] + try: + v = map[var] + except KeyError: + raise InterpolationMissingOptionError( + option, section, rest, var) + if "%" in v: + self._interpolate_some(parser, option, accum, v, + section, map, depth + 1) + else: + accum.append(v) + else: + raise InterpolationSyntaxError( + option, section, + "'%%' must be followed by '%%' or '(', " + "found: %r" % (rest,)) + + +class ExtendedInterpolation(Interpolation): + """Advanced variant of interpolation, supports the syntax used by + `zc.buildout'. Enables interpolation between sections.""" + + _KEYCRE = re.compile(r"\$\{([^}]+)\}") + + def before_get(self, parser, section, option, value, defaults): + L = [] + self._interpolate_some(parser, option, L, value, section, defaults, 1) + return ''.join(L) + + def before_set(self, parser, section, option, value): + tmp_value = value.replace('$$', '') # escaped dollar signs + tmp_value = self._KEYCRE.sub('', tmp_value) # valid syntax + if '$' in tmp_value: + raise ValueError("invalid interpolation syntax in %r at " + "position %d" % (value, tmp_value.find('$'))) + return value + + def _interpolate_some(self, parser, option, accum, rest, section, map, + depth): + if depth > MAX_INTERPOLATION_DEPTH: + raise InterpolationDepthError(option, section, rest) + while rest: + p = rest.find("$") + if p < 0: + accum.append(rest) + return + if p > 0: + accum.append(rest[:p]) + rest = rest[p:] + # p is no longer used + c = rest[1:2] + if c == "$": + accum.append("$") + rest = rest[2:] + elif c == "{": + m = self._KEYCRE.match(rest) + if m is None: + raise InterpolationSyntaxError(option, section, + "bad interpolation variable reference %r" % rest) + path = m.group(1).split(':') + rest = rest[m.end():] + sect = section + opt = option + try: + if len(path) == 1: + opt = parser.optionxform(path[0]) + v = map[opt] + elif len(path) == 2: + sect = path[0] + opt = parser.optionxform(path[1]) + v = parser.get(sect, opt, raw=True) + else: + raise InterpolationSyntaxError( + option, section, + "More than one ':' found: %r" % (rest,)) + except (KeyError, NoSectionError, NoOptionError): + raise InterpolationMissingOptionError( + option, section, rest, ":".join(path)) + if "$" in v: + self._interpolate_some(parser, opt, accum, v, sect, + dict(parser.items(sect, raw=True)), + depth + 1) + else: + accum.append(v) + else: + raise InterpolationSyntaxError( + option, section, + "'$' must be followed by '$' or '{', " + "found: %r" % (rest,)) + + +class LegacyInterpolation(Interpolation): + """Deprecated interpolation used in old versions of ConfigParser. + Use BasicInterpolation or ExtendedInterpolation instead.""" + + _KEYCRE = re.compile(r"%\(([^)]*)\)s|.") + + def before_get(self, parser, section, option, value, vars): + rawval = value + depth = MAX_INTERPOLATION_DEPTH + while depth: # Loop through this until it's done + depth -= 1 + if value and "%(" in value: + replace = functools.partial(self._interpolation_replace, + parser=parser) + value = self._KEYCRE.sub(replace, value) + try: + value = value % vars + except KeyError as e: + raise InterpolationMissingOptionError( + option, section, rawval, e.args[0]) + else: + break + if value and "%(" in value: + raise InterpolationDepthError(option, section, rawval) + return value + + def before_set(self, parser, section, option, value): + return value + + @staticmethod + def _interpolation_replace(match, parser): + s = match.group(1) + if s is None: + return match.group() + else: + return "%%(%s)s" % parser.optionxform(s) + + +class RawConfigParser(MutableMapping): + """ConfigParser that does not do interpolation.""" + + # Regular expressions for parsing section headers and options + _SECT_TMPL = r""" + \[ # [ + (?P
[^]]+) # very permissive! + \] # ] + """ + _OPT_TMPL = r""" + (?P