-
Notifications
You must be signed in to change notification settings - Fork 24
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
Showing
12 changed files
with
744 additions
and
133 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1 +1 @@ | ||
1.0.3 | ||
1.1.0 |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,160 @@ | ||
# -*- coding: UTF-8 -*- | ||
"""Generic baseN functions. | ||
""" | ||
from math import log | ||
from six import integer_types, string_types | ||
from string import printable | ||
from types import FunctionType | ||
|
||
from .__common__ import * | ||
|
||
|
||
# generic base en/decoding functions | ||
class BaseError(ValueError): | ||
pass | ||
|
||
|
||
class BaseDecodeError(BaseError): | ||
pass | ||
|
||
|
||
class BaseEncodeError(BaseError): | ||
pass | ||
|
||
|
||
def _generate_charset(n): | ||
""" | ||
Generate a characters set. | ||
:param n: size of charset | ||
""" | ||
if 1 < n <= 100: | ||
return printable[:n] | ||
elif 100 < n < 256: | ||
return "".join(chr(i) for i in range(n)) | ||
raise ValueError("Bad size of character set") | ||
|
||
|
||
def _get_charset(charset, p=""): | ||
""" | ||
Charaters set selection function. It allows to define charsets in many | ||
different ways. | ||
:param charset: charset object, can be a string (the charset itself), a | ||
function (that chooses the right charset depending on the | ||
input parameter) or a dictionary (either by exact key or by | ||
pattern matching) | ||
:param p: the parameter for choosing the charset | ||
""" | ||
# case 1: charset is a function, so return its result | ||
if isinstance(charset, FunctionType): | ||
return charset(p) | ||
# case 2: charset is a string, so return it | ||
elif isinstance(charset, string_types): | ||
return charset | ||
# case 3: charset is a dict with keys '' and 'inv', typically for a charset | ||
# using lowercase and uppercase characters that can be inverted | ||
elif isinstance(charset, dict) and list(charset.keys()) == ["", "inv"]: | ||
return charset["inv" if re.match(r"[-_]inv(erted)?$", p) else ""] | ||
# case 4: charset is a dict, but not with the specific keys '' and 'inv', so | ||
# consider it as pattern-charset pairs | ||
elif isinstance(charset, dict): | ||
# try to handle [p]arameter as a simple key | ||
try: | ||
return charset[p] | ||
except KeyError: | ||
pass | ||
# or handle [p]arameter as a pattern | ||
default, n = None, None | ||
for pattern, cset in charset.items(): | ||
n = len(cset) | ||
if pattern == "": | ||
default = cset | ||
continue | ||
if re.match(pattern, p): | ||
return cset | ||
# special case: the given [p]arameter can be the charset itself if | ||
# it has the right length | ||
p = re.sub(r"^[-_]+", "", p) | ||
if len(p) == n: | ||
return p | ||
# or simply rely on key '' | ||
if default is not None: | ||
return default | ||
raise ValueError("Bad charset descriptor") | ||
|
||
|
||
def base_encode(input, charset, errors="strict", exc=BaseEncodeError): | ||
""" | ||
Base-10 to base-N encoding. | ||
:param input: input (str or int) to be decoded | ||
:param charset: base-N characters set | ||
:param errors: errors handling marker | ||
:param exc: exception to be raised in case of error | ||
""" | ||
i = input if isinstance(input, integer_types) else s2i(input) | ||
n = len(charset) | ||
r = "" | ||
while i > 0: | ||
i, c = divmod(i, n) | ||
r = charset[c] + r | ||
return r | ||
|
||
|
||
def base_decode(input, charset, errors="strict", exc=BaseEncodeError): | ||
""" | ||
Base-N to base-10 decoding. | ||
:param input: input to be decoded | ||
:param charset: base-N characters set | ||
:param errors: errors handling marker | ||
:param exc: exception to be raised in case of error | ||
""" | ||
i, n = 0, len(charset) | ||
for k, c in enumerate(input): | ||
try: | ||
i = i * n + charset.index(c) | ||
except ValueError: | ||
if errors == "strict": | ||
raise exc("'base' codec can't decode character '{}' in position" | ||
" {}".format(c, k)) | ||
elif errors in ["ignore", "replace"]: | ||
continue | ||
else: | ||
raise ValueError("Unsupported error handling {}".format(errors)) | ||
return base_encode(i, [chr(j) for j in range(256)], errors, exc) | ||
|
||
|
||
def base(charset, pattern=None, pow2=False, | ||
encode_template=base_encode, decode_template=base_decode): | ||
""" | ||
Base-N codec factory. | ||
:param charset: charset selection function | ||
:param pattern: matching pattern for the codec name (first capturing group | ||
is used as the parameter for selecting the charset) | ||
:param pow2: whether the base codec's N is a power of 2 | ||
""" | ||
is_n = isinstance(charset, int) | ||
n = len(_generate_charset(charset) if is_n else _get_charset(charset)) | ||
nb = log(n, 2) | ||
if pow2 and nb != int(nb): | ||
raise BaseError("Bad charset ; {} is not a power of 2".format(n)) | ||
|
||
def encode(param=""): | ||
a = _generate_charset(n) if is_n else _get_charset(charset, param) | ||
def _encode(input, errors="strict"): | ||
return encode_template(input, a, errors), len(input) | ||
return _encode | ||
|
||
def decode(param=""): | ||
a = _generate_charset(n) if is_n else _get_charset(charset, param) | ||
def _decode(input, errors="strict"): | ||
return decode_template(input, a, errors), len(input) | ||
return _decode | ||
|
||
if pattern is None: | ||
pattern = "base{}".format(n) | ||
add("base{}".format(n), encode, decode, pattern) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,115 @@ | ||
# -*- coding: UTF-8 -*- | ||
"""BaseN functions with N a power of 2. | ||
""" | ||
from math import ceil, log | ||
|
||
from .__common__ import * | ||
from ._base import base, _get_charset, BaseError | ||
|
||
|
||
# base en/decoding functions for N a power of 2 | ||
class Base2NError(BaseError): | ||
pass | ||
|
||
|
||
class Base2NDecodeError(BaseError): | ||
pass | ||
|
||
|
||
class Base2NEncodeError(BaseError): | ||
pass | ||
|
||
|
||
def base2n(charset, pattern=None): | ||
""" | ||
Base-N codec factory for N a power of 2. | ||
:param charset: charset selection function | ||
:param pattern: matching pattern for the codec name (first capturing group | ||
is used as the parameter for selecting the charset) | ||
""" | ||
base(charset, pattern, True, base2n_encode, base2n_decode) | ||
|
||
|
||
def base2n_encode(string, charset, errors="strict", exc=Base2NEncodeError): | ||
""" | ||
8-bits characters to base-N encoding for N a power of 2. | ||
:param string: string to be decoded | ||
:param charset: base-N characters set | ||
:param errors: errors handling marker | ||
:param exc: exception to be raised in case of error | ||
""" | ||
bs, r, n = "", "", len(charset) | ||
# find the number of bits for the given character set and the quantum | ||
nb_out = int(log(n, 2)) | ||
q = nb_out | ||
while q % 8 != 0: | ||
q += nb_out | ||
# iterate over the characters, gathering bits to be mapped to the charset | ||
for i, c in enumerate(string): | ||
c = c if isinstance(c, int) else ord(c) | ||
bs += "{:0>8}".format(bin(c)[2:]) | ||
while len(bs) >= nb_out: | ||
r += charset[int(bs[:nb_out], 2)] | ||
bs = bs[nb_out:] | ||
if len(bs) > 0: | ||
for i in range(0, len(bs), nb_out): | ||
c = ("{:0<%d}" % nb_out).format(bs[i:i+nb_out]) | ||
p = len(c) - len(bs[i:i+nb_out]) | ||
r += charset[int(c, 2)] | ||
l = len(r) * nb_out | ||
while l % q != 0: | ||
l += nb_out | ||
return r + int(l / nb_out - len(r)) * "=" | ||
|
||
|
||
def base2n_decode(string, charset, errors="strict", exc=Base2NDecodeError): | ||
""" | ||
Base-N to 8-bits characters decoding for N a power of 2. | ||
:param string: string to be decoded | ||
:param charset: base-N characters set | ||
:param errors: errors handling marker | ||
:param exc: exception to be raised in case of error | ||
""" | ||
bs, r, n = "", "", len(charset) | ||
# find the number of bits for the given character set and the number of | ||
# padding characters | ||
nb_in = int(log(n, 2)) | ||
n_pad = len(string) - len(string.rstrip("=")) | ||
# iterate over the characters, mapping them to the character set and | ||
# converting the resulting bits to 8-bits characters | ||
for i, c in enumerate(string): | ||
if c == "=": | ||
bs += "0" * nb_in | ||
else: | ||
try: | ||
bs += ("{:0>%d}" % nb_in).format(bin(charset.index(c))[2:]) | ||
except ValueError: | ||
if errors == "strict": | ||
raise exc("'base' codec can't decode character '{}' in " | ||
"position {}".format(c, i)) | ||
elif errors == "replace": | ||
bs += "0" * nb_in | ||
elif errors == "ignore": | ||
continue | ||
else: | ||
raise ValueError("Unsupported error handling {}" | ||
.format(errors)) | ||
if len(bs) > 8: | ||
r += chr(int(bs[:8], 2)) | ||
bs = bs[8:] | ||
# if the number of bits is not multiple of 8 bits, it could mean a bad | ||
# padding | ||
if len(bs) != 8: | ||
if errors == "strict": | ||
raise Base2NDecodeError("Incorrect padding") | ||
elif errors in ["replace", "ignore"]: | ||
pass | ||
else: | ||
raise ValueError("Unsupported error handling {}".format(errors)) | ||
r += chr(int(bs, 2)) | ||
np = int(ceil(n_pad * nb_in / 8.0)) | ||
return r[:-np] if np > 0 else r |
Oops, something went wrong.