# Data Ingesting

In [None]:

!curl http://images.cocodataset.org/zips/val2014.zip > images.zip
!curl http://images.cocodataset.org/annotations/annotations_trainval2014.zip >> anns.zip

!unzip images.zip
!unzip anns.zip

!rm images.zip
!rm anns.zip


# Data Loading

In [None]:
import json
import pandas as pd


with open("annotations/captions_val2014.json", 'r') as f:
    coco_data = json.load(f)

images = {image['id']: image for image in coco_data['images']}
annotations = coco_data['annotations']

data = []
for ann in annotations:
    image_id = ann['image_id']
    caption = ann['caption']

    if image_id in images:
        image_info = images[image_id]
        image_name = image_info['file_name']
        width = image_info['width']
        height = image_info['height']

        data.append([image_name, width, height, caption])

df = pd.DataFrame(data, columns=['image_name', 'width', 'height', 'caption'])

# Images Preprocessing Functions

In [None]:
from typing import Callable
from PIL import Image

def transform_image_inplace(image_path: str, transform_fn: Callable[[Image.Image], Image.Image]) -> None:
    with Image.open(image_path) as img:
        transform_fn(img).save(image_path)

def rotate_image_right(image: Image.Image) -> Image.Image:
    return image.rotate(-90, expand=True)

def resize_image(image: Image.Image, target_width: int, target_height: int) -> Image.Image:
    return image.resize((target_width, target_height), Image.LANCZOS)

def get_filtered_images(df: pd.DataFrame, target_width: int, target_height: int) -> pd.Series:
    col = df["image_name"]
    return col[(df["width"] == target_width) & (df["height"] == target_height)]

def get_filtered_images_in(df: pd.DataFrame, target_widths: list[int], target_heights: list[int]) -> pd.Series:
    col = df["image_name"]
    return col[(df["width"].apply(lambda w: w in target_widths) ) & (df["height"].apply(lambda h: h in target_heights))]

In [None]:
from functools import partial
import os

IMAGES_DIR = "val2014"

perfect_images = get_filtered_images(df, 640, 480)
height_resize_images = get_filtered_images_in(df, [640], [426, 427, 428])
flip_axis_images = get_filtered_images(df, 480, 640)
height_resize_and_flip_axis_images = get_filtered_images(df, 427, 640)

def get_image_path(image_name: str) -> str:
    return os.path.join(IMAGES_DIR, image_name)


# Simple Speed Test

In [None]:
import time
from concurrent.futures import ThreadPoolExecutor
from functools import partial

def process_height_resize_image(image_name):
    transform_image_inplace(get_image_path(image_name), partial(resize_image, target_width=640, target_height=480))

def process_flip_axis_image(image_name):
    transform_image_inplace(get_image_path(image_name), rotate_image_right)

def process_combined_transform(image_name):
    def resize_and_rotate(img):
        resized_img = partial(resize_image, target_width=640, target_height=480)(img)
        return rotate_image_right(resized_img)

    transform_image_inplace(get_image_path(image_name), resize_and_rotate)

st = time.time()

with ThreadPoolExecutor() as executor:
    executor.map(process_height_resize_image, height_resize_images[:10])
    executor.map(process_flip_axis_image, flip_axis_images[:10])
    executor.map(process_combined_transform, height_resize_images[:10])

end = time.time()

print((end - st) / 60)
