In [11]:
import glob
import random
import json
import v3io.dataplane

def get_image_list(image_dir):
    """
    Get a list of images from a given path.
    """
    image_list = glob.glob(f"{image_dir}/**/*.jpg", recursive=True)
    random.seed(42)
    random.shuffle(image_list)
    return image_list

def encode_batch(image_batch):
    payload  = [{"shard_id" : 0, "data" : json.dumps(image_batch)}]
    return payload
    
def upload_to_stream(stream, image_dir, batch_size=32):
    """
    Upload images to a V3io stream from local directory.
    """
    image_list = get_image_list(image_dir=image_dir)
    
    image_batch = [f"/stream/{image_list[i].split('data/')[-1]}" for i in range(batch_size)]

    payload = encode_batch(image_batch)
    
    v3io_client = v3io.dataplane.Client()
    return v3io_client.stream.put_records(container="bigdata",
                                          stream_path=stream,
                                          records=payload)

In [18]:
resp = upload_to_stream("dogs_vs_cats/stream", "/v3io/bigdata/dogs_vs_cats/data/", batch_size=1024)
print(resp.status_code)

200