Skip to content

Commit

Permalink
Merge branch 'master' into ci-lint-docstring
Browse files Browse the repository at this point in the history
  • Loading branch information
cristianmtr committed Feb 23, 2021
2 parents 91e2621 + 74c68c5 commit 64f0507
Show file tree
Hide file tree
Showing 4 changed files with 11 additions and 32 deletions.
5 changes: 3 additions & 2 deletions .github/workflows/ci.yml
Expand Up @@ -88,13 +88,14 @@ jobs:
echo ====================================================================================
echo DOCSTRINGS LINT: checking ${{ steps.file_changes.outputs.all }}
echo ------------------------------------------------------------------------------------
echo 'removing files under /tests...'
arrVar=()
# we ignore tests files
for changed_file in ${{ steps.file_changes.outputs.all }}; do
echo ${changed_file}
if [[ ${changed_file}} != tests/* ]]; then
echo keeping ${changed_file}
arrVar+=(${changed_file})
done
fi
done
# required in order to get the status of all the files at once
darglint -v 2 -s sphinx "${arrVar[@]}" & pydocstyle --select=D101,D102,D103 "${arrVar[@]}"
Expand Down
2 changes: 1 addition & 1 deletion README.md
Expand Up @@ -129,7 +129,7 @@ This downloads [people image dataset](https://www.kaggle.com/ahmadahmadzada/imag
### 🥚 Fundamentals

#### CRUD Functions
<a href="https://mybinder.org/v2/gh/jina-ai/jupyter-notebooks/main?filepath=basic-crud-functions.ipynb"><img align="right" src="https://github.com/jina-ai/jina/blob/master/.github/badges/run-badge.svg?raw=true"/></a>
<a href="https://mybinder.org/v2/gh/jina-ai/jupyter-notebooks/main?filepath=basic-basic-crud-functions.ipynb"><img align="right" src="https://github.com/jina-ai/jina/blob/master/.github/badges/run-badge.svg?raw=true"/></a>

First we look at basic CRUD operations. In Jina, CRUD corresponds to four functions: `index` (create), `search` (read), `update`, and `delete`. With Documents below as an example:
```python
Expand Down
34 changes: 6 additions & 28 deletions jina/types/document/__init__.py
Expand Up @@ -7,7 +7,7 @@
import urllib.request
import warnings
from hashlib import blake2b
from typing import Union, Dict, Optional, TypeVar, Any, Callable, Sequence, Tuple
from typing import Union, Dict, Optional, TypeVar, Any, Tuple

import numpy as np
from google.protobuf import json_format
Expand All @@ -16,15 +16,15 @@
from .converters import png_to_buffer, to_datauri, guess_mime, to_image_blob
from ..mixin import ProtoTypeMixin
from ..ndarray.generic import NdArray
from ..querylang.queryset.dunderkey import dunder_get
from ..score import NamedScore
from ..sets.chunk import ChunkSet
from ..sets.match import MatchSet
from ..querylang.queryset.dunderkey import dunder_get
from ...excepts import BadDocType
from ...helper import is_url, typename, random_identity, download_mermaid_url
from ...importer import ImportExtensions
from ...proto import jina_pb2
from ...logging import default_logger
from ...proto import jina_pb2

__all__ = ['Document', 'DocumentContentType', 'DocumentSourceType']
DIGEST_SIZE = 8
Expand All @@ -36,6 +36,8 @@
_document_fields = set(list(jina_pb2.DocumentProto().DESCRIPTOR.fields_by_camelcase_name) + list(
jina_pb2.DocumentProto().DESCRIPTOR.fields_by_name))

_all_mime_types = set(mimetypes.types_map.values())


class Document(ProtoTypeMixin):
"""
Expand Down Expand Up @@ -571,7 +573,7 @@ def mime_type(self, value: str):
:param value: the acceptable MIME type, raise ``ValueError`` when MIME type is not
recognizable.
"""
if value in mimetypes.types_map.values():
if value in _all_mime_types:
self._pb_body.mime_type = value
elif value:
# given but not recognizable, do best guess
Expand Down Expand Up @@ -780,30 +782,6 @@ def CopyFrom(self, doc: 'Document'):
"""Copy the content of target :param:doc into current document."""
self._pb_body.CopyFrom(doc.proto)

def traverse(self, traversal_path: str, callback_fn: Callable, *args, **kwargs) -> None:
"""Traverse leaves of the document."""
from ..sets import DocumentSet
self._traverse_rec(DocumentSet([self]), None, None, traversal_path, callback_fn, *args, **kwargs)

def _traverse_rec(self, docs: Sequence['Document'], parent_doc: Optional['Document'],
parent_edge_type: Optional[str], traversal_path: str, callback_fn: Callable, *args, **kwargs):
if traversal_path:
next_edge = traversal_path[0]
for doc in docs:
if next_edge == 'm':
self._traverse_rec(
doc.matches, doc, 'matches', traversal_path[1:], callback_fn, *args, **kwargs
)
elif next_edge == 'c':
self._traverse_rec(
doc.chunks, doc, 'chunks', traversal_path[1:], callback_fn, *args, **kwargs
)
else:
raise ValueError(f'"{next_edge}" in "{traversal_path}" is not a valid traversal path')
else:
for d in docs:
callback_fn(d, parent_doc, parent_edge_type, *args, **kwargs)

def __mermaid_str__(self):
results = []
from google.protobuf.json_format import MessageToDict
Expand Down
2 changes: 1 addition & 1 deletion tests/integration/test_helloworld.py
Expand Up @@ -33,7 +33,7 @@ def check_hello_world_results(html_path: str):
assert len(evaluation_results) == 2
# not exact to avoid instability, but enough accurate to current results to raise some alarms
assert float(evaluation_results[0]) > 50.0
assert float(evaluation_results[1]) > 0.5
assert float(evaluation_results[1]) >= 0.5


@pytest.mark.timeout(360)
Expand Down

0 comments on commit 64f0507

Please sign in to comment.