In [172]:
from pathlib import Path
from typing import List, Dict, Tuple
from PIL import Image, ImageEnhance, ImageOps, ImageDraw, ImageFilter
from pydantic import BaseModel
from enum import Enum
import pandas as pd
import random

In [173]:
class AugmType(Enum):
    H_FLIP = "h_flip"
    V_FLIP = "v_flip"
    CROP = "crop"
    PAD = "padding"
    BRIGHT = "brightness"
    CONTRAST = "contrast"
    SAT = "saturation"
    ROT = "rotate"
    FLASH = "flash"
    BW = "bw"


class Label(Enum):
    DOG = "dog"
    BIKE = "bike"
    BALL = "ball"
    WATER = "water"    



class ImageDir(BaseModel):
    img_stem: str
    label: Label
    caption: str
    imgs_paths: Dict[str, List[AugmType]]
    

In [174]:
flicker_dir = Path("../data/flicker")
flicker_imgs_dir = flicker_dir / "images"
caption_csv_path = flicker_dir / "captions.csv"
augmented_dir = Path("../data/augmented")


In [175]:
df = pd.read_csv(caption_csv_path)
df.columns

Index(['image_path', 'label', 'caption'], dtype='object')

In [169]:
def load_image(img_path: Path) -> Image.Image:
    return Image.open(img_path).convert('RGB')

def random_horizontal_flip(img: Image.Image, flip: float) -> Tuple[float, Image.Image]:
    random_flip = random.random()
    if random_flip < flip:
        return random_flip, ImageOps.mirror(img)
    return random_flip, img

def random_vertical_flip(img: Image.Image, flip: float) -> Tuple[float, Image.Image]:
    random_flip = random.random()
    if random_flip < flip:
        return random_flip, ImageOps.flip(img)  
    return random_flip, img

def random_crop(img: Image.Image, scale: float) -> Tuple[Tuple[int], Image.Image]:
    w, h = img.size
    crop_w = int(w * scale)
    crop_h = int(h * scale)

    x = random.randint(0, w - crop_w)
    y = random.randint(0, h - crop_h)

    cropped = img.crop((x, y, x+crop_w, y+crop_h))
    return ((x, y), cropped.resize((w,h), Image.BILINEAR))

def random_padding(img: Image.Image, padding_range: int) -> Tuple[int, Image.Image]:
    pad = random.randint(0, padding_range)
    w, h = img.size
    padded = ImageOps.expand(img, border=pad, fill=(0,0,0))
    return (pad, padded.resize((w,h), Image.BILINEAR))

def random_brightness(img: Image.Image, min_range: int, max_range: int) -> Tuple[float, Image.Image]:
    amount = random.uniform(min_range, max_range)
    factor = 1 + (amount / 100.0)
    enhancer = ImageEnhance.Brightness(img)
    return (factor, enhancer.enhance(factor))


def random_contrast(img: Image.Image, min_range: int, max_range: int) -> Tuple[float, Image.Image]:
    amount = random.uniform(min_range, max_range)
    factor = 1 + (amount / 100.0)
    enhancer = ImageEnhance.Contrast(img)
    return (factor, enhancer.enhance(factor))


def random_saturation(img: Image.Image, min_range: int, max_range: int) -> Tuple[float, Image.Image]:
    amount = random.uniform(min_range, max_range)
    factor = 1 + (amount / 100.0)
    enhancer = ImageEnhance.Color(img)
    return (factor, enhancer.enhance(factor))


def random_rotate(img: Image.Image, min_angle: int, max_angle: int) -> Tuple[float, Image.Image]:
    angle = random.uniform(min_angle, max_angle)
    return (angle, img.rotate(angle, resample=Image.BILINEAR, expand=True).resize(img.size))


def random_flash(img: Image.Image, max_radius: float, max_intensity: float) -> Image.Image:
    w, h= img.size

    flash_mask = Image.new("L", (w, h), 0)
    draw = ImageDraw.Draw(flash_mask)

    cx = random.randint(0, w)
    cy = random.randint(0, h)

    radius = int(min(w,h) * random.uniform(0.1, max_radius))

    intensity = int(255 * random.uniform(0.3, max_intensity))

    draw.ellipse((cx - radius, cy - radius, cx + radius, cy + radius), fill=intensity)

    flash_mask = flash_mask.filter(ImageFilter.GaussianBlur(radius / 2))
    white_layer = Image.new("RGB", (w, h), (255, 255, 255))

    return Image.composite(white_layer, img, flash_mask)

def random_black_and_white(img: Image.Image) -> Tuple[float, Image.Image]:    
    bw = ImageOps.grayscale(img).convert("RGB")
    return bw
    

def sequence_aug_spatial(img: Image.Image, prob: float) -> Image.Image:
    list_aug : List[str] = []
    if random.random() < prob:
        i, img = random_horizontal_flip(img, flip=0.5)
        list_aug.append("h_flip")
    if random.random() < prob:
        i, img = random_vertical_flip(img, flip=0.5)
        list_aug.append("v_flip")
    if random.random() < prob:
        i, img = random_crop(img, scale=0.9)
        list_aug.append("crop")
    # if random.random() < prob:
    #     img = random_padding(img, padding_range=20)

    if random.random() < prob:
        i, img = random_rotate(img, -10, 10)
        list_aug.append("rotate")

    return list_aug, img

def sequence_aug_colors(img: Image.Image, prob: float) -> Image.Image:
    list_aug : List[str] = []
    if random.random() < 0.3:
        img = random_black_and_white(img)
        list_aug.append("bw")
        return list_aug, img
    if random.random() < prob:
        img = random_flash(img, 0.4, 1.7)
        list_aug.append(("flash"))
    if random.random() < prob:
        i, img = random_brightness(img, -20, 40)
        list_aug.append("brightness")
    if random.random() < prob:
        i, img = random_saturation(img, -20, 40)
        list_aug.append("saturation")
    if random.random() < prob:
        i, img = random_contrast(img, 0, 40)
        list_aug.append("contrast")

    return list_aug, img







In [170]:
def to_augm_enum_list(list_str: List[str]) -> List[AugmType]:
    return [AugmType(s) for s in list_str]


In [171]:
for index, row in df.iterrows(): 
    img_path = Path(row["image_path"])
    label = Label(row["label"])
    caption = row["caption"]
    path = flicker_dir / img_path
    
    type_dir = augmented_dir / label.value
    type_dir.mkdir(parents=True, exist_ok=True)

    img_dir = type_dir / img_path.stem
    img_dir.mkdir(parents=True, exist_ok=True)
    
    img = load_image(path)
    new_img_path = img_dir / f"{img_path.stem}.jpg"
    img.save(new_img_path)

    list_spatial_aug, spatial_aug_img = sequence_aug_spatial(img, 0.7)
    img_spatial_aug_path = img_dir / f"{img_path.stem}_spatial.jpg"
    spatial_aug_img.save(img_spatial_aug_path)

    list_color_aug, color_aug_img = sequence_aug_colors(img, 0.7)
    img_color_aug_path = img_dir / f"{img_path.stem}_color.jpg"
    color_aug_img.save(img_color_aug_path)

    spatial_enum = to_augm_enum_list(list_spatial_aug)
    color_enum = to_augm_enum_list(list_color_aug)

    imgs_paths = {
        str(new_img_path): [],
        str(img_spatial_aug_path): spatial_enum,
        str(img_color_aug_path): color_enum,
    }

    img_infos = ImageDir(
        img_stem=img_path.stem,
        label=label,
        caption=caption,
        imgs_paths=imgs_paths
    )

    infos_path = img_dir / "infos.json"
    infos_path.write_text(img_infos.model_dump_json(indent=4))


In [184]:
import json


rows = []
for type_dir in augmented_dir.iterdir():
    for img_dir in type_dir.iterdir():
        json_path = Path(f"{img_dir}/infos.json")
        with open(json_path, "r", encoding="utf-8") as f:
            data = json.load(f)
            imgs_infos = ImageDir(**data)
        
        for k, v in imgs_infos.imgs_paths.items():
            rows.append({
                "image_path": Path(k).name,
                "label": imgs_infos.label,
                "caption": imgs_infos.caption,

            })

df = pd.DataFrame(rows)
print(df)

pd_output_path = Path(f"{augmented_dir}/metadata.csv")

df.to_csv(pd_output_path, index=False)


                 image_path        label  \
0             water_070.jpg  Label.WATER   
1     water_070_spatial.jpg  Label.WATER   
2       water_070_color.jpg  Label.WATER   
3             water_084.jpg  Label.WATER   
4     water_084_spatial.jpg  Label.WATER   
...                     ...          ...   
1795   ball_008_spatial.jpg   Label.BALL   
1796     ball_008_color.jpg   Label.BALL   
1797           ball_030.jpg   Label.BALL   
1798   ball_030_spatial.jpg   Label.BALL   
1799     ball_030_color.jpg   Label.BALL   

                                                caption  
0     A kayaker wearing a blue wetsuit and black hel...  
1     A kayaker wearing a blue wetsuit and black hel...  
2     A kayaker wearing a blue wetsuit and black hel...  
3     A woman is walking past an artificial waterfall .  
4     A woman is walking past an artificial waterfall .  
...                                                 ...  
1795                    a boy in white plays baseball .  
1796   