Skip to content

Commit

Permalink
feat(document): add data_uri2blob converter (#1982)
Browse files Browse the repository at this point in the history
* feat(document): add data_uri2blob converter
  • Loading branch information
hanxiao committed Feb 19, 2021
1 parent cb32309 commit b2c2a97
Show file tree
Hide file tree
Showing 3 changed files with 36 additions and 2 deletions.
16 changes: 16 additions & 0 deletions jina/drivers/convertdriver.py
Original file line number Diff line number Diff line change
Expand Up @@ -33,54 +33,70 @@ def _apply_all(

class URI2Buffer(ConvertDriver):
"""Driver to convert URI to buffer"""

def __init__(self, convert_fn: str = 'convert_uri_to_buffer', *args, **kwargs):
super().__init__(convert_fn, *args, **kwargs)


class URI2DataURI(ConvertDriver):
"""Driver to convert URI to data URI
"""

def __init__(self, convert_fn: str = 'convert_uri_to_data_uri', *args, **kwargs):
super().__init__(convert_fn, *args, **kwargs)


class Buffer2URI(ConvertDriver):
"""Driver to convert buffer to URI
"""

def __init__(self, convert_fn: str = 'convert_buffer_to_uri', *args, **kwargs):
super().__init__(convert_fn, *args, **kwargs)


class BufferImage2Blob(ConvertDriver):
"""Driver to convert image buffer to blob
"""

def __init__(self, convert_fn: str = 'convert_buffer_image_to_blob', *args, **kwargs):
super().__init__(convert_fn, *args, **kwargs)


class URI2Blob(ConvertDriver):
"""Driver to convert URI to blob
"""

def __init__(self, convert_fn: str = 'convert_uri_to_blob', *args, **kwargs):
super().__init__(convert_fn, *args, **kwargs)


class DataURI2Blob(ConvertDriver):
"""Driver to convert Data URI to image blob
"""

def __init__(self, convert_fn: str = 'convert_data_uri_to_blob', *args, **kwargs):
super().__init__(convert_fn, *args, **kwargs)


class Text2URI(ConvertDriver):
"""Driver to convert text to URI
"""

def __init__(self, convert_fn: str = 'convert_text_to_uri', *args, **kwargs):
super().__init__(convert_fn, *args, **kwargs)


class URI2Text(ConvertDriver):
"""Driver to convert URI to text
"""

def __init__(self, convert_fn: str = 'convert_uri_to_text', *args, **kwargs):
super().__init__(convert_fn, *args, **kwargs)


class Blob2PngURI(ConvertDriver):
"""Driver to convert blob to URI
"""

def __init__(self, convert_fn: str = 'convert_blob_to_uri', *args, **kwargs):
super().__init__(convert_fn, *args, **kwargs)
13 changes: 12 additions & 1 deletion jina/types/document/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -510,7 +510,7 @@ def buffer(self) -> bytes:
def buffer(self, value: bytes):
"""Set the ``buffer`` to :param:`value`."""
self._pb_body.buffer = value
if value:
if value and not self._pb_body.mime_type:
with ImportExtensions(required=False,
pkg_name='python-magic',
help_text=f'can not sniff the MIME type '
Expand Down Expand Up @@ -685,6 +685,17 @@ def convert_uri_to_blob(self, color_axis: int = -1, uri_prefix: str = None, **kw
"""
self.blob = to_image_blob((uri_prefix + self.uri) if uri_prefix else self.uri, color_axis)

def convert_data_uri_to_blob(self, color_axis: int = -1, **kwargs):
""" Convert data URI to image blob
:param color_axis: the axis id of the color channel, ``-1`` indicates the color channel info at the last axis
:param kwargs: reserved for maximum compatibility when using with ConvertDriver
"""
req = urllib.request.Request(self.uri, headers={'User-Agent': 'Mozilla/5.0'})
with urllib.request.urlopen(req) as fp:
buffer = fp.read()
self.blob = to_image_blob(io.BytesIO(buffer), color_axis)

def convert_uri_to_buffer(self, **kwargs):
"""Convert uri to buffer
Internally it downloads from the URI and set :attr:`buffer`.
Expand Down
9 changes: 8 additions & 1 deletion tests/unit/types/document/test_converters.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,6 @@

import numpy as np
import pytest

from jina import Document

cur_dir = os.path.dirname(os.path.abspath(__file__))
Expand All @@ -15,6 +14,14 @@ def test_uri_to_blob():
assert doc.blob.shape == (85, 152, 3) # h,w,c


def test_datauri_to_blob():
doc = Document(uri=os.path.join(cur_dir, 'test.png'))
doc.convert_uri_to_data_uri()
doc.convert_data_uri_to_blob()
assert isinstance(doc.blob, np.ndarray)
assert doc.blob.shape == (85, 152, 3) # h,w,c


def test_buffer_to_blob():
doc = Document(uri=os.path.join(cur_dir, 'test.png'))
doc.convert_uri_to_buffer()
Expand Down

0 comments on commit b2c2a97

Please sign in to comment.