Switch from only evaulating the value schema from the first matching …

…key schema to evaluating all of them under an Any context in Object.
dgilland · Jul 24, 2018 · 3cf89e2 · 3cf89e2
1 parent d5f6bb8
commit 3cf89e2
Show file tree

Hide file tree

Showing 3 changed files with 115 additions and 40 deletions.
diff --git a/README.rst b/README.rst
@@ -273,7 +273,7 @@ There are two ways to set strict mode:
 1. Set ``strict=True`` when creating a ``Schema`` object (i.e., ``Schema(..., strict=True)``)
 2. Set ``strict=True`` when evaulating a schema (i.e. ``schema(..., strict=True)``)
 
-TIP: If ``Schema()`` was created with ``strict=True``, use ``schema(..., strict=False)`` to evaulate the schema in non-strict mode.
+**TIP:** If ``Schema()`` was created with ``strict=True``, use ``schema(..., strict=False)`` to evaulate the schema in non-strict mode.
 
 .. code-block:: python
 
@@ -560,7 +560,7 @@ Optional keys can define a default using the ``default`` argument:
     # SchemaResult(data={'b': 5, 'c': {}}, errors={})
 
 
-TIP: For mutable defaults, always use a callable that returns a new instance. For example, for ``{}`` use ``dict``, for ``[]`` use ``list``, etc. This prevents bugs where the same object is used for separate schema results that results in changes to one affecting all the others.
+**TIP:** For mutable defaults, always use a callable that returns a new instance. For example, for ``{}`` use ``dict``, for ``[]`` use ``list``, etc. This prevents bugs where the same object is used for separate schema results that results in changes to one affecting all the others.
 
 When determining how to handle extra keys (i.e. keys in the data but not matched in the schema), there are three modes:
 
@@ -588,7 +588,7 @@ The "extra" mode is set via ``Schema(..., extra=ALLOW_EXTRA|DENY_EXTRA|IGNORE_EX
     # SchemaResult(data={1: 1}, errors={'a': "bad key: not in [<class 'int'>]"})
 
 
-For some schemas, data keys may logically match multiple schema keys (e.g. ``{'a': int, str: str, (str, int): bool}``). However, value-based key schemas are treated differently than type-based key schemas when it comes to validation resolution. The value-based key schemas will take precedence over type-based and will essentially "swallow" a key-value pair so that the value-based key schema must pass (while other key-schemas are ignored for a particular data key):
+For some schemas, data keys may logically match multiple schema keys (e.g. ``{'a': int, str: str, (str, int): bool}``). However, value-based key schemas are treated differently than type-based or other key schemas when it comes to validation resolution. The value-based key schemas will take precedence over all others and will essentially "swallow" a key-value pair so that the value-based key schema must pass (while other key-schemas are ignored for a particular data key):
 
 .. code-block:: python
 
@@ -608,47 +608,31 @@ For some schemas, data keys may logically match multiple schema keys (e.g. ``{'a
     schema({'a': 1, 'x': 'y'})
     # SchemaResult(data={'a': 1, 'x': 'y'}, errors={})
 
-For the type-based key schemas (in the absence of a value-based key match) *all* key schemas will be checked against a data key in order of key schemas with the least number of tyeps (i.e. ``int`` before ``(int, str)``). However, once a data key validates against a key schema, that key schema "wins" and the data value will then need to validate against the corresponding key schema's value schema; all other key schemas will be ignored. In these situations, though, the schema can usually be rewritten to avoid the key schema conflicts altogether:
+
+For non-value-based key schemas (in the absence of a value-based key match) *all* key schemas will be checked. Each matching key schema's value schema will then be used with ``Any()`` when evaluating the data value. As long as at least one of the data-value schemas match, the data key-value will validate. However, be aware that multiple matching key schemas likely indicates that the schema can be rewritten so that keys will only match a single key schema. Generally, this is preferrable since it makes the schema more deterministic and probably more "correct".
 
 .. code-block:: python
 
     from schemable import Schema
 
+    item = {'a': 1, 'x': 'y', 1: False, 2.5: 10.0, 'b': True}
 
-    # Instead of this which gives bad results.
+    # Instead of this.
     Schema({
         'a': int,
         str: str,
         (str, int): bool,
         (int, float): float
-    })({'a': 1, 'x': 'y', 1: False, 2.5: 10.0, 'b': True})
-    # SchemaResult(
-    #    data={'a': 1, 1: False, 2.5: 10.0, 'b': True},
-    #    errors={'x': 'bad value: type error, expected bool but found str',
-    #            <class 'str'>: 'missing required key'})
-
-
-
-    # which can vary based on schema definition ordering.
-    Schema({
-        'a': int,
-        (int, float): float,
-        (str, int): bool,
-        str: str
-    })({'a': 1, 'x': 'y', 1: False, 'b': True})
-    # SchemaResult(
-    #    data={'a': 1, 2.5: 10.0, 'b': True},
-    #    errors={'x': 'bad value: type error, expected bool but found str',
-    #            1: 'bad value: type error, expected float but found bool',
-    #            <class 'str'>: 'missing required key'}
+    })(item)
+    # SchemaResult(data={'a': 1, 'x': 'y', 1: False, 2.5: 10.0, 'b': True}, errors={})
 
-    # Rewrite the schema to fix it.
+    # Rewrite the schema to this.
     Schema({
         'a': int,
         str: (str, bool),
         int: (bool, float),
         float: float
-    })({'a': 1, 'x': 'y', 1: False, 2.5: 10.0, 'b': True})
+    })(item)
     # SchemaResult(data={'a': 1, 'x': 'y', 1: False, 2.5: 10.0, 'b': True}, errors={})
 
 

diff --git a/src/schemable/schemable.py b/src/schemable/schemable.py
@@ -296,25 +296,35 @@ def __call__(self, obj):
         seen = set()
 
         for key, value in obj.items():
-            value_schema = None
+            # It's possible that a key may apply to multiple key schemas (e.g.
+            # {str: str, (str, int): int}). In most cases, these schemas should
+            # be rewritten so that the schema key types are exclusive but we
+            # can still handle this scenario by keeping track of all value
+            # schemas whose key schema matches. We can then check each value
+            # schema and if any of the value schemas match, then the key/value
+            # will be considered valid.
+            value_schemas = ()
 
-            # Try to find the most relevant schema to evaulate for a key since
-            # multiple key schemas could be a candidate (e.g. schemas 'a' and
-            # str would both apply to key 'a' but we want to use the most
-            # specific one).
             if key in self.schema:
-                value_schema = self.schema[key]
+                # The exception to the above about trying multiple value
+                # schemas is when there is a named key schema
+                # (e.g. {'a': str, str: int}) where only the named key schema
+                # should apply (in this case, only check that key 'a' has type
+                # `str` while ignoring the key `str` with type `int`).
+                value_schemas += (self.schema[key],)
             else:
-                # TODO: Warn/error if multiple key schemas match? Generally,
-                # indicates schema may need to be rewritten to to only match a
-                # single key schema.
+                # For all other key schemas, we'll compose a list of value
+                # schemas to validate against. Basically, we'll treat it like
+                # an Any() schema (e.g. {str: str, (str, int): int} would be
+                # like {(str, int): Any(str, int)}.
                 for key_schema in self.schema:
                     if not key_schema(key).errors:
-                        value_schema = self.schema[key_schema]
+                        # Don't add duplicate value schemas.
+                        if self.schema[key_schema] not in value_schemas:
+                            value_schemas += (self.schema[key_schema],)
                         seen.add(key_schema)
-                        break
 
-            if value_schema is None:
+            if not value_schemas:
                 # None of the key schemas match this obj key so need to check
                 # the "extra" policy to determine what to do with it. If the
                 # extra policy is anything other than ALLOW or DENY, then we
@@ -334,17 +344,31 @@ def __call__(self, obj):
             # key violations.
             seen.add(key)
 
+            # In the event that we have multiple value schemas due to `key`
+            # matching multiple key schemas, we will apply the Any() validator
+            # and return its results; otherwise, we'll just validate against
+            # the one value schema.
+            # NOTE: We could just apply Any() in all cases but we'll get a
+            # slight performance improvement by not wrapping it. Generally, the
+            # multiple value schemas should be a rarity so better to use the
+            # more direct route since it applies in most cases.
+            if len(value_schemas) == 1:
+                value_schema = value_schemas[0]
+            else:
+                value_schema = Any(*value_schemas)
+
             value_result = value_schema(value)
 
             if value_result.errors:
-                # If errors is a string, then we want to wrap it with custom
+                # If errors is a string, then we want to wrap it with a custom
                 # message; otherwise, errors is a dict of other errors so we
                 # just assign it.
                 error = value_result.errors
                 if isinstance(value_result.errors, str):
                     error = 'bad value: {}'.format(error)
                 errors[key] = error
 
+            # Ensure data is partially/fullly loaded.
             if value_result.data is not None or not value_result.errors:
                 data[key] = value_result.data
 
@@ -358,6 +382,9 @@ def __call__(self, obj):
             for key, default in self.defaults.items():
                 data.setdefault(key, default)
 
+        # Ensure data is None when it's empty and there are errors or if no
+        # errors, then when data doesn't equal `obj` (this covers the case when
+        # data=={} and obj=={}).
         if not data and (errors or data != obj):
             data = None
 

diff --git a/tests/test_schemable.py b/tests/test_schemable.py
@@ -574,10 +574,74 @@ def test_dict(case):
 ])
 def test_dict_extra(case):
     assert_schema_case(case)
+
+
+@parametrize('case', [
+    dict(
+        schema=OrderedDict([
+            ('a', int),
+            (str, str)
+        ]),
+        data={'a': 1, 'b': 'c'},
+        expected_data={'a': 1, 'b': 'c'},
+        expected_errors={}
+    ),
+    dict(
+        schema=OrderedDict([
+            ('a', int),
+            (str, str)
+        ]),
+        data={'a': 'd', 'b': 'c'},
+        expected_data={'b': 'c'},
+        expected_errors={'a': ('bad value: type error, '
+                               'expected int but found str')}
+    ),
+    dict(
+        schema=OrderedDict([
+            (str, str),
+            ((str, int), int),
+            ((int, str), bool),
+            ((float, str), float)
+        ]),
+        data={
+            'str': 'a',
+            '(str, int)': 1,
+            '(int, str)': True,
+            '(float, str)': 2.5
+        },
+        expected_data={
+            'str': 'a',
+            '(str, int)': 1,
+            '(int, str)': True,
+            '(float, str)': 2.5
+        },
+        expected_errors={}
+    ),
+    dict(
+        schema=OrderedDict([
+            (str, str),
+            ((str, int), int),
+            ((int, str), bool),
+            ((float, str), float)
+        ]),
+        data={'str': None},
+        expected_data=None,
+        expected_errors={'str': ('bad value: type error, '
+                                 'expected float but found NoneType')}
+    ),
     dict(
+        schema=OrderedDict([
+            ((str, int), int),
+            ((int, str), bool),
+            ((float, str), float),
+            (str, {'a': int})
+        ]),
+        data={'str': {}},
         expected_data=None,
+        expected_errors={'str': {'a': 'missing required key'}}
     ),
 ])
+def test_dict_resolution_order(case):
     assert_schema_case(case)