Merge branch 'master' into ci-lint-docstring

jina-ai · Feb 23, 2021 · 64f0507 · 64f0507
2 parents 91e2621 + 74c68c5
commit 64f0507
Show file tree

Hide file tree

Showing 4 changed files with 11 additions and 32 deletions.
diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml
@@ -88,13 +88,14 @@ jobs:
           echo ====================================================================================
           echo DOCSTRINGS LINT: checking ${{ steps.file_changes.outputs.all }}
           echo ------------------------------------------------------------------------------------
+          echo 'removing files under /tests...'
           arrVar=()
           # we ignore tests files
           for changed_file in ${{ steps.file_changes.outputs.all }}; do
-            echo ${changed_file}
             if [[ ${changed_file}} != tests/* ]]; then
+              echo keeping ${changed_file}
               arrVar+=(${changed_file})
-            done
+            fi
           done
           # required in order to get the status of all the files at once
           darglint -v 2 -s sphinx "${arrVar[@]}" & pydocstyle --select=D101,D102,D103 "${arrVar[@]}"

diff --git a/README.md b/README.md
@@ -129,7 +129,7 @@ This downloads [people image dataset](https://www.kaggle.com/ahmadahmadzada/imag
 ### 🥚 Fundamentals
 
 #### CRUD Functions
-<a href="https://mybinder.org/v2/gh/jina-ai/jupyter-notebooks/main?filepath=basic-crud-functions.ipynb"><img align="right" src="https://github.com/jina-ai/jina/blob/master/.github/badges/run-badge.svg?raw=true"/></a>
+<a href="https://mybinder.org/v2/gh/jina-ai/jupyter-notebooks/main?filepath=basic-basic-crud-functions.ipynb"><img align="right" src="https://github.com/jina-ai/jina/blob/master/.github/badges/run-badge.svg?raw=true"/></a>
 
 First we look at basic CRUD operations. In Jina, CRUD corresponds to four functions: `index` (create), `search` (read), `update`, and `delete`. With Documents below as an example:
 ```python

diff --git a/jina/types/document/__init__.py b/jina/types/document/__init__.py
@@ -7,7 +7,7 @@
 import urllib.request
 import warnings
 from hashlib import blake2b
-from typing import Union, Dict, Optional, TypeVar, Any, Callable, Sequence, Tuple
+from typing import Union, Dict, Optional, TypeVar, Any, Tuple
 
 import numpy as np
 from google.protobuf import json_format
@@ -16,15 +16,15 @@
 from .converters import png_to_buffer, to_datauri, guess_mime, to_image_blob
 from ..mixin import ProtoTypeMixin
 from ..ndarray.generic import NdArray
+from ..querylang.queryset.dunderkey import dunder_get
 from ..score import NamedScore
 from ..sets.chunk import ChunkSet
 from ..sets.match import MatchSet
-from ..querylang.queryset.dunderkey import dunder_get
 from ...excepts import BadDocType
 from ...helper import is_url, typename, random_identity, download_mermaid_url
 from ...importer import ImportExtensions
-from ...proto import jina_pb2
 from ...logging import default_logger
+from ...proto import jina_pb2
 
 __all__ = ['Document', 'DocumentContentType', 'DocumentSourceType']
 DIGEST_SIZE = 8
@@ -36,6 +36,8 @@
 _document_fields = set(list(jina_pb2.DocumentProto().DESCRIPTOR.fields_by_camelcase_name) + list(
     jina_pb2.DocumentProto().DESCRIPTOR.fields_by_name))
 
+_all_mime_types = set(mimetypes.types_map.values())
+
 
 class Document(ProtoTypeMixin):
     """
@@ -571,7 +573,7 @@ def mime_type(self, value: str):
         :param value: the acceptable MIME type, raise ``ValueError`` when MIME type is not
                 recognizable.
         """
-        if value in mimetypes.types_map.values():
+        if value in _all_mime_types:
             self._pb_body.mime_type = value
         elif value:
             # given but not recognizable, do best guess
@@ -780,30 +782,6 @@ def CopyFrom(self, doc: 'Document'):
         """Copy the content of target :param:doc into current document."""
         self._pb_body.CopyFrom(doc.proto)
 
-    def traverse(self, traversal_path: str, callback_fn: Callable, *args, **kwargs) -> None:
-        """Traverse leaves of the document."""
-        from ..sets import DocumentSet
-        self._traverse_rec(DocumentSet([self]), None, None, traversal_path, callback_fn, *args, **kwargs)
-
-    def _traverse_rec(self, docs: Sequence['Document'], parent_doc: Optional['Document'],
-                      parent_edge_type: Optional[str], traversal_path: str, callback_fn: Callable, *args, **kwargs):
-        if traversal_path:
-            next_edge = traversal_path[0]
-            for doc in docs:
-                if next_edge == 'm':
-                    self._traverse_rec(
-                        doc.matches, doc, 'matches', traversal_path[1:], callback_fn, *args, **kwargs
-                    )
-                elif next_edge == 'c':
-                    self._traverse_rec(
-                        doc.chunks, doc, 'chunks', traversal_path[1:], callback_fn, *args, **kwargs
-                    )
-                else:
-                    raise ValueError(f'"{next_edge}" in "{traversal_path}" is not a valid traversal path')
-        else:
-            for d in docs:
-                callback_fn(d, parent_doc, parent_edge_type, *args, **kwargs)
-
     def __mermaid_str__(self):
         results = []
         from google.protobuf.json_format import MessageToDict

diff --git a/tests/integration/test_helloworld.py b/tests/integration/test_helloworld.py
@@ -33,7 +33,7 @@ def check_hello_world_results(html_path: str):
     assert len(evaluation_results) == 2
     # not exact to avoid instability, but enough accurate to current results to raise some alarms
     assert float(evaluation_results[0]) > 50.0
-    assert float(evaluation_results[1]) > 0.5
+    assert float(evaluation_results[1]) >= 0.5
 
 
 @pytest.mark.timeout(360)