Skip to content

Commit

Permalink
⚠️ Refactor rename length attribute to siblings - breaking change (#2198
Browse files Browse the repository at this point in the history
)

* refactor: rename length attribute to siblings

* refactor: delete length from document

* fix: black exclude reformat

* fix: black exclude reformat

* fix: black exclude reformat
  • Loading branch information
florian-hoenicke committed Mar 19, 2021
1 parent f7e3355 commit 762f619
Show file tree
Hide file tree
Showing 14 changed files with 1,438 additions and 2,780 deletions.
7 changes: 5 additions & 2 deletions jina/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -60,7 +60,7 @@

# do not change this line manually
# this is managed by proto/build-proto.sh and updated on every execution
__proto_version__ = '0.0.78'
__proto_version__ = '0.0.79'

__uptime__ = _datetime.datetime.now().isoformat()

Expand Down Expand Up @@ -143,9 +143,12 @@

def _set_nofile(nofile_atleast=4096):
"""
sets nofile soft limit to at least 4096, useful for running matlplotlib/seaborn on
Set nofile soft limit to at least 4096, useful for running matlplotlib/seaborn on
parallel executing plot generators vs. Ubuntu default ulimit -n 1024 or OS X El Captian 256
temporary setting extinguishing with Python session.
:param nofile_atleast: nofile soft limit
:return: nofile soft limit and nofile hard limit
"""

try:
Expand Down
2 changes: 1 addition & 1 deletion jina/clients/request/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -48,7 +48,7 @@ def request_generator(
:param kwargs: additional arguments
:yield: request
"""
_kwargs = dict(mime_type=mime_type, length=request_size, weight=1.0)
_kwargs = dict(mime_type=mime_type, weight=1.0)

try:
if not isinstance(data, Iterable):
Expand Down
2 changes: 1 addition & 1 deletion jina/clients/request/asyncio.py
Original file line number Diff line number Diff line change
Expand Up @@ -36,7 +36,7 @@ async def request_generator(
:param kwargs: additional key word arguments
:yield: request
"""
_kwargs = dict(mime_type=mime_type, length=request_size, weight=1.0)
_kwargs = dict(mime_type=mime_type, weight=1.0)

try:
with ImportExtensions(required=True):
Expand Down
3 changes: 1 addition & 2 deletions jina/drivers/segment.py
Original file line number Diff line number Diff line change
Expand Up @@ -54,9 +54,8 @@ def _apply_all(self, docs: 'DocumentSet', *args, **kwargs):

@staticmethod
def _add_chunks(doc, chunks):
num_siblings = len(chunks)
for chunk in chunks:
with Document(length=num_siblings, **chunk) as c:
with Document(**chunk) as c:
if not c.mime_type:
c.mime_type = doc.mime_type
doc.chunks.append(c)
3 changes: 0 additions & 3 deletions jina/proto/jina.proto
Original file line number Diff line number Diff line change
Expand Up @@ -106,9 +106,6 @@ message DocumentProto {
// the weight of this document
float weight = 5;

// total number of siblings of this document (docs that are in the same granularity and parent_id)
uint32 length = 6;

// the top-k matched Docs on the same level (recursive structure)
repeated DocumentProto matches = 8;

Expand Down
4,051 changes: 1,381 additions & 2,670 deletions jina/proto/jina_pb2.py

Large diffs are not rendered by default.

62 changes: 25 additions & 37 deletions jina/proto/jina_pb2_grpc.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,10 +17,10 @@ def __init__(self, channel):
channel: A grpc.Channel.
"""
self.Call = channel.stream_stream(
'/jina.JinaRPC/Call',
request_serializer=jina__pb2.RequestProto.SerializeToString,
response_deserializer=jina__pb2.RequestProto.FromString,
)
'/jina.JinaRPC/Call',
request_serializer=jina__pb2.RequestProto.SerializeToString,
response_deserializer=jina__pb2.RequestProto.FromString,
)


class JinaRPCServicer(object):
Expand All @@ -29,57 +29,45 @@ class JinaRPCServicer(object):
"""

def Call(self, request_iterator, context):
"""Pass in a Request and a filled Request with matches will be returned."""
"""Pass in a Request and a filled Request with matches will be returned.
"""
context.set_code(grpc.StatusCode.UNIMPLEMENTED)
context.set_details('Method not implemented!')
raise NotImplementedError('Method not implemented!')


def add_JinaRPCServicer_to_server(servicer, server):
rpc_method_handlers = {
'Call': grpc.stream_stream_rpc_method_handler(
servicer.Call,
request_deserializer=jina__pb2.RequestProto.FromString,
response_serializer=jina__pb2.RequestProto.SerializeToString,
),
'Call': grpc.stream_stream_rpc_method_handler(
servicer.Call,
request_deserializer=jina__pb2.RequestProto.FromString,
response_serializer=jina__pb2.RequestProto.SerializeToString,
),
}
generic_handler = grpc.method_handlers_generic_handler(
'jina.JinaRPC', rpc_method_handlers
)
'jina.JinaRPC', rpc_method_handlers)
server.add_generic_rpc_handlers((generic_handler,))


# This class is part of an EXPERIMENTAL API.
# This class is part of an EXPERIMENTAL API.
class JinaRPC(object):
"""*
jina gRPC service.
"""

@staticmethod
def Call(
request_iterator,
target,
options=(),
channel_credentials=None,
call_credentials=None,
insecure=False,
compression=None,
wait_for_ready=None,
timeout=None,
metadata=None,
):
return grpc.experimental.stream_stream(
request_iterator,
def Call(request_iterator,
target,
'/jina.JinaRPC/Call',
options=(),
channel_credentials=None,
call_credentials=None,
insecure=False,
compression=None,
wait_for_ready=None,
timeout=None,
metadata=None):
return grpc.experimental.stream_stream(request_iterator, target, '/jina.JinaRPC/Call',
jina__pb2.RequestProto.SerializeToString,
jina__pb2.RequestProto.FromString,
options,
channel_credentials,
insecure,
call_credentials,
compression,
wait_for_ready,
timeout,
metadata,
)
options, channel_credentials,
insecure, call_credentials, compression, wait_for_ready, timeout, metadata)
18 changes: 0 additions & 18 deletions jina/types/document/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -204,24 +204,6 @@ def __init__(
self.set_attrs(**kwargs)
self._mermaid_id = random_identity() #: for mermaid visualize id

@property
def length(self) -> int:
"""
The number of siblings of the :class:``Document``
.. # noqa: DAR201
:getter: number of siblings
:setter: number of siblings
:type: int
"""
# TODO(Han): rename this to siblings as this shadows the built-in `length`

return self._pb_body.length

@length.setter
def length(self, value: int):
self._pb_body.length = value

@property
def weight(self) -> float:
"""
Expand Down
9 changes: 0 additions & 9 deletions tests/unit/clients/python/test_request.py
Original file line number Diff line number Diff line change
Expand Up @@ -88,7 +88,6 @@ def random_lines(num_lines):
request = next(req)
assert len(request.index.docs) == 100
for index, doc in enumerate(request.index.docs, 1):
assert doc.length == 100
assert doc.mime_type == 'text/plain'
assert doc.text == f'i\'m dummy doc {index}'

Expand All @@ -102,7 +101,6 @@ def random_lines(num_lines):
request = next(req)
assert len(request.index.docs) == 100
for index, doc in enumerate(request.index.docs, 1):
assert doc.length == 100
assert doc.mime_type == 'text/plain'
assert doc.text == f'i\'m dummy doc {index}'

Expand All @@ -117,7 +115,6 @@ def random_lines(num_lines):
request = next(req)
assert len(request.index.docs) == 100
for index, doc in enumerate(request.index.docs, 1):
assert doc.length == 100
assert doc.mime_type == 'text/plain'
assert doc.text == f'https://github.com i\'m dummy doc {index}'

Expand All @@ -132,7 +129,6 @@ def random_lines(num_lines):
request = next(req)
assert len(request.index.docs) == 100
for index, doc in enumerate(request.index.docs, 1):
assert doc.length == 100
assert doc.text == f'i\'m dummy doc {index}'
assert doc.mime_type == 'text/plain'

Expand All @@ -152,7 +148,6 @@ def random_docs(num_docs):
request = next(req)
assert len(request.index.docs) == 100
for index, doc in enumerate(request.index.docs, 1):
assert doc.length == 100
assert doc.mime_type == 'mime_type'
assert doc.text == f'i\'m dummy doc {index}'
assert doc.offset == 1000
Expand Down Expand Up @@ -226,13 +221,11 @@ def test_request_generate_numpy_arrays():
request = next(req)
assert len(request.index.docs) == 5
for index, doc in enumerate(request.index.docs, 1):
assert doc.length == 5
assert NdArray(doc.blob).value.shape == (10,)

request = next(req)
assert len(request.index.docs) == 5
for index, doc in enumerate(request.index.docs, 1):
assert doc.length == 5
assert NdArray(doc.blob).value.shape == (10,)


Expand All @@ -248,11 +241,9 @@ def generator():
request = next(req)
assert len(request.index.docs) == 5
for index, doc in enumerate(request.index.docs, 1):
assert doc.length == 5
assert NdArray(doc.blob).value.shape == (10,)

request = next(req)
assert len(request.index.docs) == 5
for index, doc in enumerate(request.index.docs, 1):
assert doc.length == 5
assert NdArray(doc.blob).value.shape == (10,)
Original file line number Diff line number Diff line change
Expand Up @@ -50,14 +50,14 @@ def docs(self):
class MockLengthRanker(Chunk2DocRanker):
def __init__(self, *args, **kwargs):
super().__init__(
query_required_keys=('length',),
match_required_keys=('length',),
query_required_keys=('weight',),
match_required_keys=('weight',),
*args,
**kwargs
)

def score(self, match_idx, query_chunk_meta, match_chunk_meta, *args, **kwargs):
return match_chunk_meta[match_idx[0][self.COL_DOC_CHUNK_ID]]['length']
return match_chunk_meta[match_idx[0][self.COL_DOC_CHUNK_ID]]['weight']


def create_document_to_score_same_depth_level():
Expand All @@ -70,7 +70,7 @@ def create_document_to_score_same_depth_level():
doc = Document()
doc.id = 1

for match_id, parent_id, match_score, match_length in [
for match_id, parent_id, match_score, weight in [
(2, 20, 30, 3),
(3, 20, 40, 4),
(4, 30, 20, 2),
Expand All @@ -79,7 +79,7 @@ def create_document_to_score_same_depth_level():
match = Document()
match.id = match_id
match.parent_id = parent_id
match.length = match_length
match.weight = weight
match.score = NamedScore(value=match_score, ref_id=doc.id)
doc.matches.append(match)
return doc
Expand All @@ -94,9 +94,9 @@ def test_collect_matches2doc_ranker_driver_mock_ranker():
dm = list(doc.matches)
assert len(dm) == 2
assert dm[0].id == '20'
assert dm[0].score.value == 3
assert dm[0].score.value == 3.0
assert dm[1].id == '30'
assert dm[1].score.value == 2
assert dm[1].score.value == 2.0
for match in dm:
# match score is computed w.r.t to doc.id
assert match.score.ref_id == doc.id
Expand Down
10 changes: 4 additions & 6 deletions tests/unit/drivers/rank/aggregate/test_chunk2doc_rank_drivers.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,14 +22,14 @@ def score(self, match_idx, query_chunk_meta, match_chunk_meta, *args, **kwargs):
class MockLengthRanker(Chunk2DocRanker):
def __init__(self, *args, **kwargs):
super().__init__(
query_required_keys=['length'],
match_required_keys=['length'],
query_required_keys=['weight'],
match_required_keys=['weight'],
*args,
**kwargs
)

def score(self, match_idx, query_chunk_meta, match_chunk_meta, *args, **kwargs):
return match_chunk_meta[match_idx[0][self.COL_DOC_CHUNK_ID]]['length']
return match_chunk_meta[match_idx[0][self.COL_DOC_CHUNK_ID]]['weight']


class MockPriceDiscountRanker(Chunk2DocRanker):
Expand Down Expand Up @@ -84,11 +84,11 @@ def create_document_to_score():
match.id = str(match_id)
parent_id = 10 * int(match_id)
match.parent_id = str(parent_id)
match.length = int(match_id)
# to be used by MaxRanker and MinRanker
match.score = NamedScore(value=int(match_id), ref_id=chunk.id)
match.tags['price'] = match.score.value
match.tags['discount'] = DISCOUNT_VAL
match.weight = 2 * int(chunk_id) + m
chunk.matches.append(match)
doc.chunks.append(chunk)
return doc
Expand Down Expand Up @@ -117,7 +117,6 @@ def create_chunk_matches_to_score():
match.parent_id = str(parent_id)
match.score = NamedScore(value=score_value, ref_id=chunk.id)
match.id = str(10 * int(parent_id) + score_value)
match.length = 4
chunk.matches.append(match)
doc.chunks.append(chunk)
return doc
Expand Down Expand Up @@ -150,7 +149,6 @@ def create_chunk_chunk_matches_to_score():
match.parent_id = str(parent_id)
match.score = NamedScore(value=score_value, ref_id=chunk_chunk.id)
match.id = str(10 * parent_id + score_value)
match.length = 4
chunk_chunk.matches.append(match)
chunk.chunks.append(chunk_chunk)
doc.chunks.append(chunk)
Expand Down

0 comments on commit 762f619

Please sign in to comment.