Skip to content

Commit

Permalink
feat(array): add jpg support to converters (#140)
Browse files Browse the repository at this point in the history
  • Loading branch information
hanxiao committed Feb 24, 2022
1 parent 8bb3dc0 commit e5a4ed5
Show file tree
Hide file tree
Showing 2 changed files with 45 additions and 16 deletions.
52 changes: 40 additions & 12 deletions docarray/document/mixins/image.py
@@ -1,6 +1,7 @@
import base64
import io
import struct
import warnings
from typing import Optional, Tuple, Union, BinaryIO, TYPE_CHECKING

import numpy as np
Expand Down Expand Up @@ -48,25 +49,34 @@ def convert_blob_to_image_tensor(
self.tensor = tensor
return self

def convert_image_tensor_to_uri(self: 'T', channel_axis: int = -1) -> 'T':
def convert_image_tensor_to_uri(
self: 'T', channel_axis: int = -1, image_format: str = 'png'
) -> 'T':
"""Assuming :attr:`.tensor` is a _valid_ image, set :attr:`uri` accordingly
:param channel_axis: the axis id of the color channel, ``-1`` indicates the color channel info at the last axis
:param image_format: either `png` or `jpeg`
:return: itself after processed
"""

tensor = _move_channel_axis(self.tensor, original_channel_axis=channel_axis)
png_bytes = _to_png_buffer(tensor)
self.uri = 'data:image/png;base64,' + base64.b64encode(png_bytes).decode()
_bytes = _to_image_buffer(tensor, image_format)
self.uri = (
f'data:image/{image_format};base64,' + base64.b64encode(_bytes).decode()
)
return self

def convert_image_tensor_to_blob(self: 'T', channel_axis: int = -1) -> 'T':
def convert_image_tensor_to_blob(
self: 'T', channel_axis: int = -1, image_format: str = 'png'
) -> 'T':
"""Assuming :attr:`.tensor` is a _valid_ image, set :attr:`blob` accordingly
:param channel_axis: the axis id of the color channel, ``-1`` indicates the color channel info at the last axis
:param image_format: either `png` or `jpeg`
:return: itself after processed
"""
tensor = _move_channel_axis(self.tensor, original_channel_axis=channel_axis)
self.blob = _to_png_buffer(tensor)
self.blob = _to_image_buffer(tensor, image_format)
return self

def set_image_tensor_shape(
Expand Down Expand Up @@ -102,18 +112,27 @@ def save_image_tensor_to_file(
self: 'T',
file: Union[str, BinaryIO],
channel_axis: int = -1,
image_format: str = 'png',
) -> 'T':
"""Save :attr:`.tensor` into a file
:param file: File or filename to which the data is saved.
:param channel_axis: the axis id of the color channel, ``-1`` indicates the color channel info at the last axis
:param image_format: either `png` or `jpeg`
:return: itself after processed
"""

if isinstance(file, str) and not file.endswith(image_format.lower()):
warnings.warn(
f'your output file extension `{file}` does not match your output image format `{image_format}`. '
f'This may result unreadable image file. Please double check your file name or `image_format`.'
)

fp = _get_file_context(file)
with fp:
tensor = _move_channel_axis(self.tensor, channel_axis, -1)
buffer = _to_png_buffer(tensor)
buffer = _to_image_buffer(tensor, image_format)
fp.write(buffer)
return self

Expand Down Expand Up @@ -322,39 +341,48 @@ def _to_image_tensor(
return np.array(raw_img)


def _to_png_buffer(arr: 'np.ndarray') -> bytes:
def _to_image_buffer(arr: 'np.ndarray', image_format: str) -> bytes:
"""
Convert png to buffer bytes.
Convert image-ndarray to buffer bytes.
:param arr: Data representations of the png.
:param image_format: `png` or `jpeg`
:return: Png in buffer bytes.
..note::
if both :attr:`width` and :attr:`height` were provided, will not resize. Otherwise, will get image size
by :attr:`arr` shape and apply resize method :attr:`resize_method`.
"""

if image_format not in ('png', 'jpeg', 'jpg'):
raise ValueError(
f'image_format must be either `png` or `jpeg`, receiving `{image_format}`'
)
if image_format == 'jpg':
image_format = 'jpeg' # unify it to ISO standard

arr = arr.astype(np.uint8).squeeze()

if arr.ndim == 1:
# note this should be only used for MNIST/FashionMNIST dataset, because of the nature of these two datasets
# no other image data should flattened into 1-dim array.
png_bytes = _png_to_buffer_1d(arr, 28, 28)
image_bytes = _png_to_buffer_1d(arr, 28, 28)
elif arr.ndim == 2:
from PIL import Image

im = Image.fromarray(arr).convert('L')
png_bytes = _pillow_image_to_buffer(im, image_format='PNG')
image_bytes = _pillow_image_to_buffer(im, image_format=image_format.upper())
elif arr.ndim == 3:
from PIL import Image

im = Image.fromarray(arr).convert('RGB')
png_bytes = _pillow_image_to_buffer(im, image_format='PNG')
image_bytes = _pillow_image_to_buffer(im, image_format=image_format.upper())
else:
raise ValueError(
f'{arr.shape} ndarray can not be converted into an image buffer.'
)

return png_bytes
return image_bytes


def _png_to_buffer_1d(arr: 'np.ndarray', width: int, height: int) -> bytes:
Expand Down
9 changes: 5 additions & 4 deletions tests/unit/document/test_converters.py
Expand Up @@ -100,15 +100,16 @@ def test_image_normalize(shape, channel_axis):
([32, 28, 1], -1, 32, 28), # h, w, c, (greyscale)
],
)
def test_convert_image_tensor_to_uri(arr_size, channel_axis, width, height):
@pytest.mark.parametrize('format', ['png', 'jpeg'])
def test_convert_image_tensor_to_uri(arr_size, channel_axis, width, height, format):
doc = Document(content=np.random.randint(0, 255, arr_size))
assert doc.tensor.any()
assert not doc.uri
doc.set_image_tensor_shape(channel_axis=channel_axis, shape=(width, height))

doc.convert_image_tensor_to_uri(channel_axis=channel_axis)
assert doc.uri.startswith('data:image/png;base64,')
assert doc.mime_type == 'image/png'
doc.convert_image_tensor_to_uri(channel_axis=channel_axis, image_format=format)
assert doc.uri.startswith(f'data:image/{format};base64,')
assert doc.mime_type == f'image/{format}'
assert doc.tensor.any() # assure after conversion tensor still exist.


Expand Down

0 comments on commit e5a4ed5

Please sign in to comment.