Skip to content

Commit

Permalink
refactor: improve document update method (#3112)
Browse files Browse the repository at this point in the history
  • Loading branch information
bwanglzu committed Aug 6, 2021
1 parent c69d97b commit 2aee836
Showing 1 changed file with 19 additions and 41 deletions.
60 changes: 19 additions & 41 deletions jina/types/document/__init__.py
Expand Up @@ -345,64 +345,42 @@ def tags(self, value: Union[Dict, StructView]):
else:
raise TypeError(f'{value!r} is not supported.')

def _update(
def update(
self,
source: 'Document',
destination: 'Document',
fields: Optional[List[str]] = None,
) -> None:
"""Merge fields specified in ``fields`` from source to destination.
"""Updates fields specified in ``fields`` from the source to current Document.
:param source: source :class:`Document` object.
:param destination: the destination :class:`Document` object to be merged into.
:param fields: a list of field names that included from destination document
:param source: The :class:`Document` we want to update from as source. The current
:class:`Document` is referred as destination.
:param fields: a list of field names that we want to update, if not specified,
use all present fields in source.
.. note::
*. if ``fields`` is empty, then destination is overridden by the source completely.
*. ``destination`` will be modified in place, ``source`` will be unchanged.
*. the ``fields`` has value in destination while not in source will be preserved.
*. if ``fields`` are empty, then all present fields in source will be merged into current document.
* `tags` will be updated like a python :attr:`dict`.
*. the current :class:`Document` will be modified in place, ``source`` will be unchanged.
*. if current document has more fields than :attr:`source`, these extra fields wll be preserved.
"""
# We do a safe update: only update existent (value being set) fields from source.
fields_can_be_updated = []
# ListFields returns a list of (FieldDescriptor, value) tuples for present fields.
present_fields = source._pb_body.ListFields()
for field_descriptor, _ in present_fields:
fields_can_be_updated.append(field_descriptor.name)
present_fields = [
field_descriptor.name
for field_descriptor, _ in source._pb_body.ListFields()
]
if not fields:
fields = fields_can_be_updated # if `fields` empty, update all fields.
fields = present_fields # if `fields` empty, update all present fields.
for field in fields:
if (
field == 'tags'
): # For the tags, stay consistent with the python update method.
destination._pb_body.tags.update(source.tags)
self._pb_body.tags.update(source.tags)
else:
destination._pb_body.ClearField(field)
self._pb_body.ClearField(field)
try:
setattr(destination, field, getattr(source, field))
setattr(self, field, getattr(source, field))
except AttributeError:
setattr(destination._pb_body, field, getattr(source, field))

def update(
self,
source: 'Document',
fields: Optional[List[str]] = None,
) -> None:
"""Updates fields specified in ``fields`` from the source to current Document.
:param source: source :class:`Document` object.
:param fields: a list of field names that included from the current document,
if not specified, merge all fields.
.. note::
*. ``destination`` will be modified in place, ``source`` will be unchanged
"""
if fields and not isinstance(fields, list):
raise TypeError('Parameter `fields` must be list of str')
self._update(
source,
self,
fields=fields,
)
setattr(self._pb_body, field, getattr(source, field))

@property
def content_hash(self) -> str:
Expand Down

0 comments on commit 2aee836

Please sign in to comment.