In [1]:
import base64
import io
import json
import numpy as np
import numpy.testing as npt
import pandas as pd
import tempfile
import time
import uuid
import pyarrow as pa

In [2]:
# make the image

# for testing different scenarios.
X_RES = 6144
Y_RES = 4096


def mk_image():
    """Create a random image of size X_RES x Y_RES."""
    return np.random.randint(0, 256, (X_RES, Y_RES, 3), dtype=np.uint8)

In [3]:
# fixed shape tensor array

fixed_arr_input_schema = pa.schema([
    pa.field("image", pa.fixed_shape_tensor(pa.uint8(), [X_RES, Y_RES, 3])),
])

fixed_arr_output_schema = pa.schema([
    pa.field("res1", pa.fixed_shape_tensor(pa.uint8(), [X_RES, Y_RES, 3])),
    pa.field("res2", pa.fixed_shape_tensor(pa.uint8(), [X_RES, Y_RES, 3])),
    pa.field("res3", pa.fixed_shape_tensor(pa.uint8(), [X_RES, Y_RES, 3])),
    pa.field("res4", pa.fixed_shape_tensor(pa.uint8(), [X_RES, Y_RES, 3])),
])

image = mk_image()
image.shape = (1,) + image.shape
print(f"img.shape: {image.shape}")
data = pa.Table.from_pydict({ "image": pa.FixedShapeTensorArray.from_numpy_ndarray(image)}, fixed_arr_input_schema)
display(data)

img.shape: (1, 6144, 4096, 3)


pyarrow.Table
image: extension<arrow.fixed_shape_tensor[value_type=uint8, shape=[6144,4096,3]]>
----
image: [[[206,115,88,91,84,...,100,177,189,34,81]]]

In [4]:
# from flattened array

list_input_schema = pa.schema([
    pa.field("image", pa.list_(pa.uint8())),
])

list_output_schema = pa.schema([
    pa.field("res1", pa.list_(pa.uint8())),
    pa.field("res2", pa.list_(pa.uint8())),
    pa.field("res3", pa.list_(pa.uint8())),
    pa.field("res4", pa.list_(pa.uint8())),
])

img_flattened = image.flatten()

data = pa.Table.from_pylist([{"image": img_flattened}])
data

pyarrow.Table
image: list<item: uint8>
  child 0, item: uint8
----
image: [[[206,115,88,91,84,...,100,177,189,34,81]]]

In [5]:
# dataframe list

input_data_list = pd.DataFrame({"image": [image.tolist()]})
input_data_list

Unnamed: 0,image
0,"[[[[206, 115, 88], [91, 84, 214], [4, 122, 13]..."


In [6]:
list_input_schema = pa.schema([
    pa.field("image", pa.list_(pa.list_(pa.list_(pa.list_(pa.int64()))))),
])

input_table_list = pa.Table.from_pydict({"image": [image.tolist()]}, list_input_schema)
input_table_list

pyarrow.Table
image: list<item: list<item: list<item: list<item: int64>>>>
  child 0, item: list<item: list<item: list<item: int64>>>
      child 0, item: list<item: list<item: int64>>
          child 0, item: list<item: int64>
              child 0, item: int64
----
image: [[[[[[206,115,88],[91,84,214],...,[126,65,121],[226,206,88]],[[99,63,248],[242,129,169],...,[160,187,4],[93,56,111]],...,[[169,183,183],[51,196,173],...,[249,193,130],[121,160,127]],[[126,86,127],[42,123,252],...,[223,100,177],[189,34,81]]]]]]

In [7]:
# dataframe numpy

input_data_numpy = pd.DataFrame({"image": [np.array(image)]})
input_data_numpy

Unnamed: 0,image
0,"[[[[206 115 88], [ 91 84 214], [ 4 122 13]..."


In [8]:
input_data_table_numpy = pa.Table.from_pandas(input_data_numpy)
input_data_table_numpy


ArrowInvalid: ('Can only convert 1-dimensional array values', 'Conversion failed for column image with type object')