Skip to content

Commit

Permalink
Merge 244944f into 882582b
Browse files Browse the repository at this point in the history
  • Loading branch information
maurosilber committed Aug 21, 2020
2 parents 882582b + 244944f commit eab5434
Show file tree
Hide file tree
Showing 20 changed files with 299 additions and 243 deletions.
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
.*
*~
__pycache__
*egg-info*
Expand Down
14 changes: 14 additions & 0 deletions .pre-commit-config.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
repos:
- repo: https://github.com/ambv/black
rev: 19.10b0
hooks:
- id: black
- repo: https://github.com/timothycrosley/isort
rev: '5.4.2'
hooks:
- id: isort
additional_dependencies: ['toml']
- repo: https://gitlab.com/pycqa/flake8
rev: '3.8.3'
hooks:
- id: flake8
6 changes: 3 additions & 3 deletions .travis.yml
Original file line number Diff line number Diff line change
@@ -1,17 +1,17 @@
language: python

python:
- "3.3"
- "3.4"
- "3.5"
- "3.6"
- "3.7"
- "3.8"

env:
- EXTRAS="N"
- EXTRAS="Y"

install:
- if [ $EXTRAS == 'Y' ]; then pip install -r supported_pkgs.txt; fi
- if [ $EXTRAS == 'Y' ]; then pip install .[all]; fi
- pip install coveralls

script:
Expand Down
13 changes: 13 additions & 0 deletions pyproject.toml
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
[build-system]
requires = ["setuptools", "wheel", "setuptools_scm[toml]"]
build-backend = "setuptools.build_meta"

[tool.setuptools_scm]

[tool.isort]
line_length = 88 # Black default
multi_line_output = 3
include_trailing_comma = true

[tool.check-manifest]
ignore = [".pre-commit-config.yaml"]
34 changes: 16 additions & 18 deletions serialize/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,28 +9,26 @@
:license: BSD, see LICENSE for more details.
"""

from contextlib import suppress
from importlib import import_module

from .all import dump, dumps, load, loads, register_class

# Modules that help serialize use other packages.

_MODULES = ('bson',
'dill',
'json',
'msgpack',
'phpserialize',
'pickle',
'serpent',
'yaml')
_MODULES = (
"bson",
"dill",
"json",
"msgpack",
"phpserialize",
"pickle",
"serpent",
"yaml",
)

for name in _MODULES:
try:
import_module('.' + name, 'serialize')
except:
pass

# Others to consider in the future for specialized serialization:
# CSV, pandas.DATAFRAMES, hickle, hdf5

from .all import dump, dumps, load, loads, register_class
with suppress(ImportError):
import_module("." + name, "serialize")

__all__ = ['dump', 'dumps', 'load', 'loads', 'register_class']
__all__ = ["dump", "dumps", "load", "loads", "register_class"]
80 changes: 50 additions & 30 deletions serialize/all.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,13 +13,14 @@
from collections import namedtuple
from io import BytesIO
from os.path import splitext
from pathlib import Path

#: Stores the functions to convert custom classes to and from builtin types.
ClassHelper = namedtuple('ClassHelper', 'to_builtin from_builtin')
ClassHelper = namedtuple("ClassHelper", "to_builtin from_builtin")

#: Stores information and function about each format type.
Format = namedtuple('Format', 'extension dump dumps load loads')
UnavailableFormat = namedtuple('UnavailableFormat', 'extension msg')
Format = namedtuple("Format", "extension dump dumps load loads")
UnavailableFormat = namedtuple("UnavailableFormat", "extension msg")

#: Map unavailable formats to the corresponding error message.
# :type: str -> UnavailableFormat
Expand Down Expand Up @@ -52,9 +53,14 @@ def _get_format(fmt):
return FORMATS[fmt]

if fmt in UNAVAILABLE_FORMATS:
raise ValueError(("'%s' is an unavailable format. " % fmt) + UNAVAILABLE_FORMATS[fmt].msg)
raise ValueError(
("'%s' is an unavailable format. " % fmt) + UNAVAILABLE_FORMATS[fmt].msg
)

raise ValueError("'%s' is an unknown format. Valid options are %s" % (fmt, ', '.join(FORMATS.keys())))
raise ValueError(
"'%s' is an unknown format. Valid options are %s"
% (fmt, ", ".join(FORMATS.keys()))
)


def _get_format_from_ext(ext):
Expand All @@ -67,19 +73,19 @@ def _get_format_from_ext(ext):
if ext in FORMAT_BY_EXTENSION:
return FORMAT_BY_EXTENSION[ext]

valid = ', '.join(FORMAT_BY_EXTENSION.keys())
valid = ", ".join(FORMAT_BY_EXTENSION.keys())

raise ValueError("'%s' is an unknown extension. "
"Valid options are %s" % (ext, valid))
raise ValueError(
"'%s' is an unknown extension. " "Valid options are %s" % (ext, valid)
)


def encode_helper(obj, to_builtin):
"""Encode an object into a two element dict using a function
that can convert it to a builtin data type.
"""

return dict(__class_name__= str(obj.__class__),
__dumped_obj__=to_builtin(obj))
return dict(__class_name__=str(obj.__class__), __dumped_obj__=to_builtin(obj))


def encode(obj, defaultfunc=None):
Expand All @@ -98,8 +104,10 @@ def encode(obj, defaultfunc=None):


def _traverse_dict_ec(obj, ef, td):
return {traverse_and_encode(k, ef, td): traverse_and_encode(v, ef, td)
for k, v in obj.items()}
return {
traverse_and_encode(k, ef, td): traverse_and_encode(v, ef, td)
for k, v in obj.items()
}


def _traverse_list_ec(obj, ef, td):
Expand Down Expand Up @@ -145,30 +153,34 @@ def decode(dct, classes_by_name=None):
if not isinstance(dct, dict):
return dct

s = dct.get('__class_name__', None)
s = dct.get("__class_name__", None)
if s is None:
return dct

classes_by_name = classes_by_name or CLASSES_BY_NAME
try:
_, from_builtin = classes_by_name[s]
c = dct['__dumped_obj__']
c = dct["__dumped_obj__"]
except KeyError:
return dct

return from_builtin(c)


def _traverse_dict_dc(obj, df, td):
if '__class_name__' in obj:
if "__class_name__" in obj:
return df(obj)

return {traverse_and_decode(k, df, td): traverse_and_decode(v, df, td)
for k, v in obj.items()}
return {
traverse_and_decode(k, df, td): traverse_and_decode(v, df, td)
for k, v in obj.items()
}


def _traverse_list_dc(obj, df, td):
return [traverse_and_decode(el, df, td) for el in obj]


def _traverse_tuple_dc(obj, df, td):
return tuple(traverse_and_decode(el, df, td) for el in obj)

Expand Down Expand Up @@ -203,7 +215,9 @@ def traverse_and_decode(obj, decode_func=None, trav_dict=None):
MISSING = object()


def register_format(fmt, dumpser=None, loadser=None, dumper=None, loader=None, extension=MISSING):
def register_format(
fmt, dumpser=None, loadser=None, dumper=None, loader=None, extension=MISSING
):
"""Register an available serialization format.
`fmt` is a unique string identifying the format, such as `json`. Use a colon (`:`) to
Expand All @@ -228,57 +242,63 @@ def register_format(fmt, dumpser=None, loadser=None, dumper=None, loader=None, e

# Here we generate dumper/dumpser if they are not present.
if dumper and not dumpser:

def dumpser(obj):
buf = BytesIO()
dumper(obj, buf)
return buf.getvalue()

elif not dumper and dumpser:

def dumper(obj, fp):
fp.write(dumpser(obj))

elif not dumper and not dumpser:

def raiser(*args, **kwargs):
raise ValueError('dump/dumps is not defined for %s' % fmt)
raise ValueError("dump/dumps is not defined for %s" % fmt)

dumper = dumpser = raiser

# Here we generate loader/loadser if they are not present.
if loader and not loadser:

def loadser(serialized):
return loader(BytesIO(serialized))

elif not loader and loadser:

def loader(fp):
return loadser(fp.read())

elif not loader and not loadser:

def raiser(*args, **kwargs):
raise ValueError('load/loads is not defined for %s' % fmt)
raise ValueError("load/loads is not defined for %s" % fmt)

loader = loadser = raiser

if extension is MISSING:
extension = fmt.split(':', 1)[0]
extension = fmt.split(":", 1)[0]

FORMATS[fmt] = Format(extension, dumper, dumpser, loader, loadser)

if extension and extension not in FORMAT_BY_EXTENSION:
FORMAT_BY_EXTENSION[extension.lower()] = fmt


def register_unavailable(fmt, msg='', pkg='', extension=MISSING):
def register_unavailable(fmt, msg="", pkg="", extension=MISSING):
"""Register an unavailable serialization format.
Unavailable formats are those known by Serialize but that cannot be used
due to a missing requirement (e.g. the package that does the work).
"""
if pkg:
msg = 'This serialization format requires the %s package.' % pkg
msg = "This serialization format requires the %s package." % pkg

if extension is MISSING:
extension = fmt.split(':', 1)[0]
extension = fmt.split(":", 1)[0]

UNAVAILABLE_FORMATS[fmt] = UnavailableFormat(extension, msg)

Expand All @@ -300,11 +320,11 @@ def dump(obj, filename_or_file, fmt=None):
In the latter case the fmt is not need if it can be guessed from the extension.
"""

if isinstance(filename_or_file, str):
if isinstance(filename_or_file, (str, Path)):
if fmt is None:
_, ext = splitext(filename_or_file)
fmt = _get_format_from_ext(ext.strip('.'))
with open(filename_or_file, 'wb') as fp:
fmt = _get_format_from_ext(ext.strip("."))
with open(filename_or_file, "wb") as fp:
dump(obj, fp, fmt)
else:
_get_format(fmt).dump(obj, filename_or_file)
Expand All @@ -324,11 +344,11 @@ def load(filename_or_file, fmt=None):
In the latter case the fmt is not need if it can be guessed from the extension.
"""

if isinstance(filename_or_file, str):
if isinstance(filename_or_file, (str, Path)):
if fmt is None:
_, ext = splitext(filename_or_file)
fmt = _get_format_from_ext(ext.strip('.'))
with open(filename_or_file, 'rb') as fp:
fmt = _get_format_from_ext(ext.strip("."))
with open(filename_or_file, "rb") as fp:
return load(fp, fmt)

return _get_format(fmt).load(filename_or_file)
Expand Down
8 changes: 5 additions & 3 deletions serialize/bson.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,14 +16,15 @@
try:
import bson
except ImportError:
all.register_unavailable('bson', pkg='bson')
all.register_unavailable("bson", pkg="bson")
raise


# In the BSON format the top level object must be a dictionary.
# If necessary, we put the object in dummy dictionary
# under the key __bson_follow__


def dumps(obj):
if not isinstance(obj, dict):
obj = dict(__bson_follow__=obj)
Expand All @@ -32,6 +33,7 @@ def dumps(obj):

def loads(content):
obj = all.traverse_and_decode(bson.loads(content))
return obj.get('__bson_follow__', obj)
return obj.get("__bson_follow__", obj)


all.register_format('bson', dumps, loads)
all.register_format("bson", dumps, loads)
9 changes: 5 additions & 4 deletions serialize/dill.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,15 +11,15 @@
:license: BSD, see LICENSE for more details.
"""

from . import all
from . import pickle
from . import all, pickle

try:
import dill
except ImportError:
all.register_unavailable('dill', pkg='dill')
all.register_unavailable("dill", pkg="dill")
raise


class MyPickler(dill.Pickler):

dispatch_table = pickle.DispatchTable()
Expand All @@ -32,4 +32,5 @@ def dump(obj, fp):
def load(fp):
return dill.Unpickler(fp).load()

all.register_format('dill', dumper=dump, loader=load)

all.register_format("dill", dumper=dump, loader=load)

0 comments on commit eab5434

Please sign in to comment.