# 1. Format Images

_created by Austin Poor_

This notebook

In [1]:
import io
from pathlib import Path

import ray
import boto3
import numpy as np
from PIL import Image

In [2]:
ray.init();

2021-05-03 21:56:54,295	INFO resource_spec.py:231 -- Starting Ray with 3.56 GiB memory available for workers and up to 1.8 GiB for objects. You can adjust these settings with ray.init(memory=<bytes>, object_store_memory=<bytes>).
2021-05-03 21:56:55,207	INFO services.py:1193 -- View the Ray dashboard at [1m[32mlocalhost:8266[39m[22m


In [3]:
s3 = boto3.client("s3")

In [4]:
SOURCE_BUCKET = "apoor-raw-movie-stills"
DEST_BUCKET = "apoor-clean-movie-stills"

DEST_SIZE = (300, 300)

In [5]:
def has_alpha(img: Image) -> bool:
    try: img.getchannel("A")
    except: return False
    else: return True
    
def strip_alpha(img: Image) -> Image:
    if not has_alpha(img): return img
    else: return img.convert(mode="RGB")
    
def resize_image(img: Image, size: tuple) -> Image:
    return img.resize(size)

def convert_key(k: str) -> str:
    return str(Path(k).with_suffix(".jpg"))

In [6]:
def get_image_keys(bucket: str) -> [str]:
    s3 = boto3.client("s3")
    keys = []
    start_after = ""
    while True:
        res = s3.list_objects_v2(Bucket=bucket, StartAfter=start_after)
        kres = [o["Key"] for o in res.get("Contents",[])]
        keys.extend(kres)
        if not res["IsTruncated"]: break
        else: start_after = kres[-1]
    return keys

def download_image(bucket: str, key: str) -> Image:
    s3 = boto3.client("s3")
    obj = s3.get_object(Bucket=bucket,Key=key)
    return Image.open(obj["Body"])

def upload_image(img: Image, bucket: str, key: str):
    s3 = boto3.client("s3")
    with io.BytesIO() as output:
        img.save(output, format="JPEG")
        output.seek(0)
        s3.upload_fileobj(output, bucket, key)

In [7]:
@ray.remote
def parse_image(key: str, b_src: str, b_dest: str, img_size: tuple):
    img = download_image(b_src, key)
    img = strip_alpha(img)
    img = resize_image(img, img_size)
    new_key = convert_key(key)
    upload_image(img, b_dest, new_key)

In [8]:
futures = [
    parse_image.remote(k, SOURCE_BUCKET, DEST_BUCKET, DEST_SIZE)
    for k in get_image_keys(SOURCE_BUCKET)
]
ray.get(futures);