In [131]:
import base64
import io
import json
import numpy as np
import numpy.testing as npt
import pandas as pd
import tempfile
import time
import uuid
import pyarrow as pa
import sys
import os

In [132]:
# make the image

# for testing different scenarios.
X_RES = 6144
Y_RES = 4096


def mk_image():
    """Create a random image of size X_RES x Y_RES."""
    return np.random.randint(0, 256, (X_RES, Y_RES, 3), dtype=np.uint16)

def mk_image_16():
    """Create a random image of size X_RES x Y_RES."""
    return np.random.randint(0, 256, (X_RES, Y_RES, 3), dtype=np.uint16)

In [133]:
# dataframe list

image = mk_image()
unmodified_dataframe = pd.DataFrame({"image": [image.tolist()]})
display(unmodified_dataframe)

Unnamed: 0,image
0,"[[[77, 23, 187], [70, 111, 16], [187, 65, 47],..."


In [134]:
image

array([[[ 77,  23, 187],
        [ 70, 111,  16],
        [187,  65,  47],
        ...,
        [191, 112, 119],
        [ 72, 225,  41],
        [127, 172, 243]],

       [[156, 188,  66],
        [ 64, 248, 147],
        [204, 116, 186],
        ...,
        [193, 252,  85],
        [ 45, 110,  93],
        [ 41,  31, 147]],

       [[ 41, 147, 233],
        [ 23, 178, 195],
        [172, 143,  13],
        ...,
        [ 47,  59, 210],
        [236,  23,  65],
        [  7, 105, 218]],

       ...,

       [[192,  32, 218],
        [ 29, 142, 123],
        [ 18, 245, 181],
        ...,
        [250, 197,  20],
        [219, 198,  80],
        [175, 237,   2]],

       [[119, 209,   8],
        [ 63,  67, 233],
        [129, 201, 235],
        ...,
        [209,  55,  24],
        [ 98, 215,  99],
        [240,  83, 171]],

       [[109,  57, 129],
        [248, 157, 181],
        [245,  16,  20],
        ...,
        [130, 203, 129],
        [ 63, 245, 150],
        [104, 230,  51]]

In [135]:
data = pd.DataFrame({"prompt": ["What is Wallaroo.AI?"], "max_tokens": [200]})
display(data)

Unnamed: 0,prompt,max_tokens
0,What is Wallaroo.AI?,200


In [136]:
# apache arrow table

list_input_schema = pa.schema([
    pa.field('image', pa.list_(pa.list_(pa.list_(pa.uint16())))),
])

unmodified_arrow_table = pa.Table.from_pydict({"image": [image.tolist()]}, list_input_schema)
display(unmodified_arrow_table)

pyarrow.Table
image: list<item: list<item: list<item: uint16>>>
  child 0, item: list<item: list<item: uint16>>
      child 0, item: list<item: uint16>
          child 0, item: uint16
----
image: [[[[[77,23,187],[70,111,16],...,[72,225,41],[127,172,243]],[[156,188,66],[64,248,147],...,[45,110,93],[41,31,147]],...,[[119,209,8],[63,67,233],...,[98,215,99],[240,83,171]],[[109,57,129],[248,157,181],...,[63,245,150],[104,230,51]]]]]

In [153]:
img_flattened = image.flatten()
img_flattened

array([ 77,  23, 187, ..., 104, 230,  51], dtype=uint16)

In [None]:
# input schema
input_schema = pa.schema([
    pa.field('image', pa.list_(pa.uint16())),
    pa.field('dim0', pa.int64()),
    pa.field('dim1', pa.int64()),
])

input_schema = pa.schema([
    pa.field('image', pa.list_(pa.uint16()))
    ]
)

# output schema
output_schema = pa.schema([
    pa.field('image', pa.list_(pa.uint16())),
    pa.field('virtual_stain', pa.list_(pa.uint8()))
])

# "dim0": image.shape[0],
#                                                     "dim1": image.shape[1]
display(img_flattened)
display(image.shape[0])
display(image.shape[1])

dim0 = image.shape[0]

# table_with_flattened_array = pa.Table.from_pydict({"image": img_flattened,
#                                                    "dim0": 64})
table_with_flattened_array = pa.Table.from_pydict({"image": img_flattened})
display(table_with_flattened_array)

array([ 77,  23, 187, ..., 104, 230,  51], dtype=uint16)

6144

4096

pyarrow.Table
image: uint16
----
image: [[77,23,187,70,111,...,245,150,104,230,51]]

In [127]:
# fixed shape tensor array

fixed_arr_input_schema = pa.schema([
    pa.field("image", pa.fixed_shape_tensor(pa.uint8(), [X_RES, Y_RES, 3])),
])

fixed_arr_output_schema = pa.schema([
    pa.field("res1", pa.fixed_shape_tensor(pa.uint8(), [X_RES, Y_RES, 3])),
    pa.field("res2", pa.fixed_shape_tensor(pa.uint8(), [X_RES, Y_RES, 3])),
    pa.field("res3", pa.fixed_shape_tensor(pa.uint8(), [X_RES, Y_RES, 3])),
    pa.field("res4", pa.fixed_shape_tensor(pa.uint8(), [X_RES, Y_RES, 3])),
])

input_schema = pa.schema([
    pa.field('image', pa.fixed_shape_tensor(pa.uint16(), [6144, 4096, 3])),
])

# output schema

output_schema = pa.schema([
    pa.field('image', pa.fixed_shape_tensor(pa.uint16(), [6144, 4096, 3])),
    pa.field('virtual_stain', pa.fixed_shape_tensor(pa.uint8(), [6144, 4096, 3]))
])


image = mk_image()
image.shape = (1,) + image.shape
table_with_fixed_shape_tensor_array = pa.Table.from_pydict({ "image": pa.FixedShapeTensorArray.from_numpy_ndarray(image)}, input_schema)
display(table_with_fixed_shape_tensor_array)

pyarrow.Table
image: extension<arrow.fixed_shape_tensor[value_type=uint16, shape=[6144,4096,3]]>
----
image: [[[27,159,139,114,107,...,135,175,104,193,228]]]