custom json (de)serialisation (pydantic#823)

* custom json (d)encoders, fix pydantic#714 * add docs
cuenca-mx · Jan 17, 2020 · 802cd85 · 802cd85
1 parent 7ea07b8
commit 802cd85
Show file tree

Hide file tree

Showing 11 changed files with 125 additions and 29 deletions.
diff --git a/changes/714-samuelcolvin.rst b/changes/714-samuelcolvin.rst
@@ -0,0 +1 @@
+Allow custom JSON decoding and encoding via ``json_loads`` and ``json_dumps`` ``Config`` properties.
diff --git a/docs/examples/json_orjson.py b/docs/examples/json_orjson.py
@@ -0,0 +1,21 @@
+from datetime import datetime
+import orjson
+from pydantic import BaseModel
+
+def orjson_dumps(v, *, default):
+    # orjson.dumps returns bytes, to match standard json.dumps we need to decode
+    return orjson.dumps(v, default=default).decode()
+
+class User(BaseModel):
+    id: int
+    name = 'John Doe'
+    signup_ts: datetime = None
+
+    class Config:
+        json_loads = orjson.loads
+        json_dumps = orjson_dumps
+
+
+user = User.parse_raw('{"id": 123, "signup_ts": 1234567890, "name": "John Doe"}')
+print(user.json())
+#> {"id":123,"signup_ts":"2009-02-13T23:31:30+00:00","name":"John Doe"}
diff --git a/docs/examples/json_ujson.py b/docs/examples/json_ujson.py
@@ -0,0 +1,15 @@
+from datetime import datetime
+import ujson
+from pydantic import BaseModel
+
+class User(BaseModel):
+    id: int
+    name = 'John Doe'
+    signup_ts: datetime = None
+
+    class Config:
+        json_loads = ujson.loads
+
+user = User.parse_raw('{"id": 123, "signup_ts": 1234567890, "name": "John Doe"}')
+print(user)
+#> User id=123 signup_ts=datetime.datetime(2009, 2, 13, 23, 31, 30, tzinfo=datetime.timezone.utc) name='John Doe'
diff --git a/docs/index.rst b/docs/index.rst
@@ -91,19 +91,20 @@ To test if *pydantic* is compiled run::
     import pydantic
     print('compiled:', pydantic.compiled)
 
-If you want *pydantic* to parse json faster you can add `ujson <https://pypi.python.org/pypi/ujson>`_
-as an optional dependency. Similarly *pydantic's* email validation relies on
-`email-validator <https://github.com/JoshData/python-email-validator>`_ ::
+If you require email validation you can add `email-validator <https://github.com/JoshData/python-email-validator>`_
+as an optional dependency. Similarly, use of ``Literal`` relies on
+`typing-extensions <https://pypi.org/project/typing-extensions/>`_::
 
-    pip install pydantic[ujson]
-    # or
     pip install pydantic[email]
+    # or
+    pip install pydantic[typing_extensions]
     # or just
-    pip install pydantic[ujson,email]
+    pip install pydantic[email,typing_extensions]
 
 Of course you can also install these requirements manually with ``pip install ...``.
 
-Pydantic is also available on `conda <https://www.anaconda.com>`_ under the `conda-forge <https://conda-forge.org>`_ channel::
+Pydantic is also available on `conda <https://www.anaconda.com>`_ under the `conda-forge <https://conda-forge.org>`_
+channel::
 
     conda install pydantic -c conda-forge
 
@@ -945,13 +946,14 @@ Options:
     Pass in a dictionary with keys matching the error messages you want to override (default: ``{}``)
 :arbitrary_types_allowed: whether to allow arbitrary user types for fields (they are validated simply by checking if the
     value is instance of that type). If ``False`` - ``RuntimeError`` will be raised on model declaration (default: ``False``)
-:json_encoders: customise the way types are encoded to json, see :ref:`JSON Serialisation <json_dump>` for more
-    details.
 :orm_mode: allows usage of :ref:`ORM mode <orm_mode>`
 :alias_generator: callable that takes field name and returns alias for it
 :keep_untouched: tuple of types (e. g. descriptors) that won't change during model creation and won't be
-  included in the model schemas.
+  included in the model schemas
 :schema_extra: takes a ``dict`` to extend/update the generated JSON Schema
+:json_loads: custom function for decoding JSON, see :ref:`custom JSON (de)serialisation <json_encode_decode>`
+:json_dumps: custom function for encoding JSON, see :ref:`custom JSON (de)serialisation <json_encode_decode>`
+:json_encoders: customise the way types are encoded to JSON, see :ref:`JSON Serialisation <json_dump>`
 
 .. warning::
 
@@ -1186,6 +1188,9 @@ Example:
 By default timedelta's are encoded as a simple float of total seconds. The ``timedelta_isoformat`` is provided
 as an optional alternative which implements ISO 8601 time diff encoding.
 
+See :ref:`below <json_encode_decode>` for details on how to use other libraries for more performant JSON encoding
+and decoding
+
 ``pickle.dumps(model)``
 ~~~~~~~~~~~~~~~~~~~~~~~
 
@@ -1213,6 +1218,29 @@ Of course same can be done on any depth level:
 
 Same goes for ``json`` and ``copy`` methods.
 
+.. _json_encode_decode:
+
+Custom JSON (de)serialisation
+.............................
+
+To improve the performance of encoding and decoding JSON, alternative JSON implementations can be used via the
+``json_loads`` and ``json_dumps`` properties of ``Config``, e.g. `ujson <https://pypi.python.org/pypi/ujson>`_.
+
+.. literalinclude:: examples/json_ujson.py
+
+(This script is complete, it should run "as is")
+
+``ujson`` generally cannot be used to dump JSON since it doesn't support encoding of objects like datetimes and does
+not accept a ``default`` fallback function argument. To do this you may use another library like
+`orjson <https://github.com/ijl/orjson>`_.
+
+.. literalinclude:: examples/json_orjson.py
+
+(This script is complete, it should run "as is")
+
+Note that ``orjson`` takes care of ``datetime`` encoding natively, making it faster than ``json.dumps`` but
+meaning you cannot always customise encoding using ``Config.json_encoders``.
+
 Abstract Base Classes
 .....................
 

diff --git a/pydantic/env_settings.py b/pydantic/env_settings.py
@@ -1,4 +1,3 @@
-import json
 import os
 from typing import Any, Dict, Optional, cast
 
@@ -47,7 +46,7 @@ def _build_environ(self) -> Dict[str, Optional[str]]:
             if env_val:
                 if field.is_complex():
                     try:
-                        env_val = json.loads(env_val)
+                        env_val = self.__config__.json_loads(env_val)  # type: ignore
                     except ValueError as e:
                         raise SettingsError(f'error parsing JSON for "{env_name}"') from e
                 d[field.alias] = env_val

diff --git a/pydantic/main.py b/pydantic/main.py
@@ -63,11 +63,13 @@ class BaseConfig:
     validate_assignment = False
     error_msg_templates: Dict[str, str] = {}
     arbitrary_types_allowed = False
-    json_encoders: Dict[AnyType, AnyCallable] = {}
     orm_mode: bool = False
     alias_generator: Optional[Callable[[str], str]] = None
     keep_untouched: Tuple[type, ...] = ()
     schema_extra: Dict[str, Any] = {}
+    json_loads: Callable[[str], Any] = json.loads
+    json_dumps: Callable[..., str] = json.dumps
+    json_encoders: Dict[AnyType, AnyCallable] = {}
 
     @classmethod
     def get_field_schema(cls, name: str) -> Dict[str, str]:
@@ -307,7 +309,7 @@ def json(
         data = self.dict(include=include, exclude=exclude, by_alias=by_alias, skip_defaults=skip_defaults)
         if self._custom_root_type:
             data = data['__root__']
-        return json.dumps(data, default=encoder, **dumps_kwargs)
+        return self.__config__.json_dumps(data, default=encoder, **dumps_kwargs)
 
     @classmethod
     def parse_obj(cls: Type['Model'], obj: Any) -> 'Model':
@@ -334,7 +336,12 @@ def parse_raw(
     ) -> 'Model':
         try:
             obj = load_str_bytes(
-                b, proto=proto, content_type=content_type, encoding=encoding, allow_pickle=allow_pickle
+                b,
+                proto=proto,
+                content_type=content_type,
+                encoding=encoding,
+                allow_pickle=allow_pickle,
+                json_loads=cls.__config__.json_loads,
             )
         except (ValueError, TypeError, UnicodeDecodeError) as e:
             raise ValidationError([ErrorWrapper(e, loc='__obj__')], cls)
@@ -437,7 +444,7 @@ def schema(cls, by_alias: bool = True) -> 'DictStrAny':
     def schema_json(cls, *, by_alias: bool = True, **dumps_kwargs: Any) -> str:
         from .json import pydantic_encoder
 
-        return json.dumps(cls.schema(by_alias=by_alias), default=pydantic_encoder, **dumps_kwargs)
+        return cls.__config__.json_dumps(cls.schema(by_alias=by_alias), default=pydantic_encoder, **dumps_kwargs)
 
     @classmethod
     def __get_validators__(cls) -> 'CallableGenerator':

diff --git a/pydantic/parse.py b/pydantic/parse.py
@@ -1,23 +1,25 @@
+import json
 import pickle
 from enum import Enum
 from pathlib import Path
-from typing import Any, Union
+from typing import Any, Callable, Union
 
 from .types import StrBytes
 
-try:
-    import ujson as json
-except ImportError:
-    import json  # type: ignore
-
 
 class Protocol(str, Enum):
     json = 'json'
     pickle = 'pickle'
 
 
 def load_str_bytes(
-    b: StrBytes, *, content_type: str = None, encoding: str = 'utf8', proto: Protocol = None, allow_pickle: bool = False
+    b: StrBytes,
+    *,
+    content_type: str = None,
+    encoding: str = 'utf8',
+    proto: Protocol = None,
+    allow_pickle: bool = False,
+    json_loads: Callable[[str], Any] = json.loads,
 ) -> Any:
     if proto is None and content_type:
         if content_type.endswith(('json', 'javascript')):
@@ -32,7 +34,7 @@ def load_str_bytes(
     if proto == Protocol.json:
         if isinstance(b, bytes):
             b = b.decode(encoding)
-        return json.loads(b)
+        return json_loads(b)
     elif proto == Protocol.pickle:
         if not allow_pickle:
             raise RuntimeError('Trying to decode with pickle with allow_pickle=False')

diff --git a/pydantic/validators.py b/pydantic/validators.py
@@ -1,4 +1,3 @@
-import json
 import re
 import sys
 from collections import OrderedDict
@@ -424,9 +423,9 @@ def constr_strip_whitespace(v: 'StrBytes', field: 'Field', config: 'BaseConfig')
     return v
 
 
-def validate_json(v: Any) -> Any:
+def validate_json(v: Any, config: 'BaseConfig') -> Any:
     try:
-        return json.loads(v)
+        return config.json_loads(v)  # type: ignore
     except ValueError:
         raise errors.JsonError()
     except TypeError:

diff --git a/requirements.txt b/requirements.txt
@@ -2,7 +2,6 @@
 -r docs/requirements.txt
 -r tests/requirements.txt
 
-ujson==1.35
 email-validator==1.0.4
 dataclasses==0.6; python_version < '3.7'
 typing-extensions==3.7.4
diff --git a/setup.py b/setup.py
@@ -99,7 +99,6 @@ def extra(self):
         'dataclasses>=0.6;python_version<"3.7"'
     ],
     extras_require={
-        'ujson': ['ujson>=1.35'],
         'email': ['email-validator>=1.0.3'],
         'typing_extensions': ['typing-extensions>=3.7.2']
     },

diff --git a/tests/test_json.py b/tests/test_json.py
@@ -167,3 +167,29 @@ class Model(BaseModel):
         __root__: List[str]
 
     assert Model(__root__=['a', 'b']).json() == '["a", "b"]'
+
+
+def test_custom_decode_encode():
+    load_calls, dump_calls = 0, 0
+
+    def custom_loads(s):
+        nonlocal load_calls
+        load_calls += 1
+        return json.loads(s.strip('$'))
+
+    def custom_dumps(s, default=None, **kwargs):
+        nonlocal dump_calls
+        dump_calls += 1
+        return json.dumps(s, default=default, indent=2)
+
+    class Model(BaseModel):
+        a: int
+        b: str
+
+        class Config:
+            json_loads = custom_loads
+            json_dumps = custom_dumps
+
+    m = Model.parse_raw('${"a": 1, "b": "foo"}$$')
+    assert m.dict() == {'a': 1, 'b': 'foo'}
+    assert m.json() == '{\n  "a": 1,\n  "b": "foo"\n}'