Skip to content

Commit

Permalink
feat: use record type cache registry
Browse files Browse the repository at this point in the history
  • Loading branch information
bpiwowar committed Mar 4, 2024
1 parent c621ec6 commit 21b31ee
Show file tree
Hide file tree
Showing 2 changed files with 25 additions and 14 deletions.
35 changes: 23 additions & 12 deletions src/datamaestro/record.py
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,7 @@ class Record:

items: Items

def __init__(self, *items: Union[Items, T], override=False, pickled=False):
def __init__(self, *items: Union[Items, T], override=False):
self.items = {}

if len(items) == 1 and isinstance(items[0], dict):
Expand All @@ -48,10 +48,7 @@ def __init__(self, *items: Union[Items, T], override=False, pickled=False):
)
self.items[base] = entry

if pickled:
self.itemtypes = None
else:
self.validate()
self.validate()

def __str__(self):
return (
Expand Down Expand Up @@ -133,7 +130,7 @@ def update(self, *items: T) -> "Record":
# --- Class methods and variables

itemtypes: ClassVar[Optional[Set[Type[T]]]] = []
"""For specific records, this is the list of types. The list is empty when
"""For specific records, this is the list of types. The value is null when
no validation is used (e.g. pickled records created on the fly)"""

__trueclass__: ClassVar[Optional[Type["Record"]]] = None
Expand Down Expand Up @@ -169,11 +166,25 @@ def from_types(cls, name: str, *itemtypes: Type[T], module: str = None):
(cls,),
{
**extra_dict,
"itemtypes": cls._subclass(*itemtypes),
"itemtypes": frozenset(cls._subclass(*itemtypes)),
"__trueclass__": cls.__trueclass__ or cls,
},
)

__RECORD_TYPES_CACHE__: Dict[frozenset, Type["Record"]] = {}

@staticmethod
def fromitemtypes(itemtypes: Set[T]):
if recordtype := Record.__RECORD_TYPES_CACHE__.get(itemtypes, None):
return recordtype

recordtype = Record.from_types(
"_".join(itemtype.__name__ for itemtype in itemtypes), *itemtypes
)
Record.__RECORD_TYPES_CACHE__[itemtypes] = recordtype
assert False, recordtype
return recordtype


def recordtypes(*types: List[Type[T]]):
"""Adds types for a new record class"""
Expand Down Expand Up @@ -211,12 +222,12 @@ def get(self, record_type: Type[Record]):
return updated_type

def update(self, record: Record, *items: Item):
cls = record.__class__
if record.is_pickled() and not self._warning:
logging.warning(
"Updating unpickled records is not recommended"
" (no more record checking, and potential speed issues)"
"Updating unpickled records is not recommended" " (speed issues)"
)
itemtypes = frozenset(record.items.keys())
cls = Record.fromitemtypes(itemtypes)

return self.get(record.__class__)(
*record.items.values(), *items, override=True, pickled=record.is_pickled()
)
return self.get(cls)(*record.items.values(), *items, override=True)
4 changes: 2 additions & 2 deletions src/datamaestro/test/test_record.py
Original file line number Diff line number Diff line change
Expand Up @@ -103,5 +103,5 @@ def test_record_pickled():

r = cache.update(r, CItem(4))

# The result should still be not pickled
assert r.is_pickled()
# The result should not be pickled
assert not r.is_pickled()

0 comments on commit 21b31ee

Please sign in to comment.