Skip to content

Commit

Permalink
Implement dict type detection (semi-manual)
Browse files Browse the repository at this point in the history
  • Loading branch information
bogdandm committed Nov 18, 2018
1 parent a784a63 commit 4cf6697
Show file tree
Hide file tree
Showing 5 changed files with 91,228 additions and 11 deletions.
2 changes: 1 addition & 1 deletion json_to_models/dynamic_typing/__init__.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
from .base import (
BaseType, ImportPathList, MetaData, NoneType, Unknown, UnknownType, get_hash_string
)
from .complex import ComplexType, DList, DOptional, DTuple, DUnion, SingleType
from .complex import ComplexType, DDict, DList, DOptional, DTuple, DUnion, SingleType
from .models_meta import AbsoluteModelRef, ModelMeta, ModelPtr
from .string_datetime import IsoDateString, IsoDatetimeString, IsoTimeString, register_datetime_classes
from .string_serializable import (
Expand Down
12 changes: 11 additions & 1 deletion json_to_models/dynamic_typing/complex.py
Original file line number Diff line number Diff line change
Expand Up @@ -150,8 +150,8 @@ def __init__(self, *types: Union[type, BaseType, dict]):
else:
h = get_hash_string(t)
if h not in hashes:
hashes.add(h)
unique_types.append(t)
hashes.add(h)
super().__init__(*unique_types)

def _extract_nested_types(self):
Expand Down Expand Up @@ -188,3 +188,13 @@ def to_typing_code(self) -> Tuple[ImportPathList, str]:
[*imports, ('typing', 'List')],
f"List[{nested}]"
)


class DDict(SingleType):
# Dict is single type because keys of JSON dict are always strings.
def to_typing_code(self) -> Tuple[ImportPathList, str]:
imports, nested = metadata_to_typing(self.type)
return (
[*imports, ('typing', 'Dict')],
f"Dict[str, {nested}]"
)
52 changes: 43 additions & 9 deletions json_to_models/generator.py
Original file line number Diff line number Diff line change
@@ -1,10 +1,12 @@
import keyword
import re
from collections import OrderedDict
from enum import Enum
from typing import Any, Callable, List, Optional, Union
from typing import Any, Callable, List, Optional, Pattern, Union

from unidecode import unidecode

from .dynamic_typing import (ComplexType, DList, DOptional, DUnion, MetaData, ModelPtr, NoneType, SingleType,
from .dynamic_typing import (ComplexType, DDict, DList, DOptional, DUnion, MetaData, ModelPtr, NoneType, SingleType,
StringSerializable, StringSerializableRegistry, Unknown, registry)


Expand All @@ -27,11 +29,29 @@ def __str__(self):
return self.value


keywords_set = set(keyword.kwlist)


class MetadataGenerator:
CONVERTER_TYPE = Optional[Callable[[str], Any]]

def __init__(self, str_types_registry: StringSerializableRegistry = None):
def __init__(
self,
str_types_registry: StringSerializableRegistry = None,
dict_keys_regex: List[Union[Pattern, str]] = None,
dict_keys_fields: List[str] = None
):
"""
:param str_types_registry: StringSerializableRegistry instance. Default registry will be used if None passed .
:param dict_keys_regex: List of RegExpressions (compiled or not).
If all keys of some dict are match one of them then this dict will be marked as dict field
but not nested model.
:param dict_keys_fields: List of model fields names that will be marked as dict field
"""
self.str_types_registry = str_types_registry if str_types_registry is not None else registry
self.dict_keys_regex = [re.compile(r) for r in dict_keys_regex] if dict_keys_regex else []
self.dict_keys_fields = set(dict_keys_fields or ())

def generate(self, *data_variants: dict) -> dict:
"""
Expand All @@ -51,7 +71,10 @@ def _convert(self, data: dict):
# ! _detect_type function can crash at some complex data sets if value is unicode with some characters (maybe German)
# Crash does not produce any useful logs and can occur any time after bad string was processed
# It can be reproduced on real_apis tests (openlibrary API)
fields[key] = self._detect_type(value if not isinstance(value, str) else unidecode(value))
convert_dict = key not in self.dict_keys_fields
if key in keywords_set:
key += "_"
fields[key] = self._detect_type(value if not isinstance(value, str) else unidecode(value), convert_dict)
return fields

def _detect_type(self, value, convert_dict=True) -> MetaData:
Expand All @@ -69,10 +92,7 @@ def _detect_type(self, value, convert_dict=True) -> MetaData:
# List trying to yield nested type
elif isinstance(value, list):
if value:
types = []
for item in value:
t = self._detect_type(item, convert_dict)
types.append(t)
types = [self._detect_type(item) for item in value]
if len(types) > 1:
union = DUnion(*types)
if len(union.types) == 1:
Expand All @@ -85,10 +105,24 @@ def _detect_type(self, value, convert_dict=True) -> MetaData:

# Dict should be processed as another model if convert_dict is enabled
elif isinstance(value, dict):
for reg in self.dict_keys_regex:
if all(map(reg.match, value.keys())):
convert_dict = False
break

if convert_dict:
return self._convert(value)
else:
return dict
types = [self._detect_type(item) for item in value.values()]
if len(types) > 1:
union = DUnion(*types)
if len(union.types) == 1:
return DDict(*union.types)
return DDict(union)
elif types:
return DDict(*types)
else:
return DDict(Unknown)

# null interpreted as is and will be processed later on Union merge stage
elif value is None:
Expand Down
Loading

0 comments on commit 4cf6697

Please sign in to comment.