In [1]:
import os
import cv2
import numpy as np
import pandas as pd
from PIL import Image, ImageEnhance
from random import randint, uniform, choice
from tqdm import tqdm

In [2]:
# === Config ===
INPUT_DIR = "synthetic_motifs/images"
OUTPUT_DIR = "synthetic_motifs/augmented"
N_VARIANTS = 10
IMG_SIZE = 224  # Output crop size

In [3]:
os.makedirs(OUTPUT_DIR, exist_ok=True)

In [4]:
# Load original metadata
df = pd.read_csv("synthetic_motifs/metadata.csv")

augmented_records = []

In [5]:
def random_augment(img_np):
    # Random rotation
    angle = randint(0, 360)
    M = cv2.getRotationMatrix2D((img_np.shape[1]//2, img_np.shape[0]//2), angle, 1)
    img = cv2.warpAffine(img_np, M, (img_np.shape[1], img_np.shape[0]), borderValue=0)

    # Random crop
    max_offset = img.shape[0] - IMG_SIZE
    x = randint(0, max_offset)
    y = randint(0, max_offset)
    img = img[y:y+IMG_SIZE, x:x+IMG_SIZE]

    # Random brightness/contrast
    brightness = uniform(0.8, 1.2)
    contrast = uniform(0.8, 1.2)
    img = np.clip(img * contrast + (brightness - 1) * 128, 0, 255).astype(np.uint8)

    # Add Gaussian noise
    if uniform(0, 1) > 0.7:
        noise = np.random.normal(0, 10, img.shape).astype(np.uint8)
        img = cv2.add(img, noise)

    # Flip
    if uniform(0, 1) > 0.5:
        img = cv2.flip(img, 1)

    return img

In [6]:
motif_metadata_map = {
    "zigzag": {
        "culture": "Mierzanowice", "phase": "2300–1800 BCE", "region": "Central Poland"
    },
    "chevron": {
        "culture": "Trzciniec", "phase": "1900–1200 BCE", "region": "East-Central Europe"
    },
    "spiral": {
        "culture": "Pre-Lusatian", "phase": "1500–1300 BCE", "region": "Lower Silesia"
    },
    "crosshatch": {
        "culture": "Funnelbeaker", "phase": "3400–2800 BCE", "region": "North Poland"
    },
    "dotted": {
        "culture": "Corded Ware", "phase": "2900–2300 BCE", "region": "Masovia"
    },
    "horizontal_lines": {
        "culture": "Unetice", "phase": "2200–1600 BCE", "region": "Southwestern Poland"
    },
    "vertical_lines": {
        "culture": "Globular Amphora", "phase": "3100–2700 BCE", "region": "Eastern Poland"
    },
    "meander": {
        "culture": "Lusatian", "phase": "1300–500 BCE", "region": "Western Poland"
    },
    "wave": {
        "culture": "Złota Culture", "phase": "3000–2600 BCE", "region": "Vistula Basin"
    },
    "comb": {
        "culture": "Narva", "phase": "4200–3200 BCE", "region": "Baltic Coast"
    }
}

decoration_map = {
    "zigzag": "incised", "chevron": "incised", "spiral": "incised",
    "crosshatch": "incised", "dotted": "impressed",
    "horizontal_lines": "incised", "vertical_lines": "incised",
    "meander": "incised", "wave": "painted", "comb": "corded"
}

vessel_parts = ["rim", "neck", "shoulder", "body", "base"]
vessel_types = ["bowl", "beaker", "amphora", "jar"]
chronological_phases = ["3000–2500 BCE", "2500–2000 BCE", "2000–1800 BCE"]

In [7]:
# === Augment ===
for _, row in tqdm(df.iterrows(), total=len(df)):
    fname = row['image_id']
    motif = row['motif_style']
    orig_img_path = os.path.join(INPUT_DIR, fname)
    base_img = cv2.imread(orig_img_path, cv2.IMREAD_GRAYSCALE)

    for i in range(N_VARIANTS):
        aug_img = random_augment(base_img)
        aug_fname = f"{fname[:-4]}_aug{i}.png"
        aug_path = os.path.join(OUTPUT_DIR, aug_fname)
        cv2.imwrite(aug_path, aug_img)

        # Get motif-specific metadata
        meta = motif_metadata_map.get(motif, {})
        culture = meta.get("culture", "UnknownCulture")
        phase = meta.get("phase", "UnknownPeriod")
        region = meta.get("region", "UnknownRegion")

        augmented_records.append({
            "image_id": aug_fname,
            "original_image": fname,
            "motif_style": motif,
            "decoration_type": decoration_map.get(motif, "unknown"),
            "vessel_part": choice(vessel_parts),
            "vessel_type": choice(vessel_types),
            "culture": culture,
            "chronological_phase": phase,
            "region": region,
            "site": f"VirtualSite_{randint(1,5):03d}",
            "source": "synthetic_generator",
            "annotator": "auto",
            "comments": "synthetic sample"
        })

100%|██████████| 100/100 [00:02<00:00, 43.12it/s]


In [8]:
# Save augmented metadata
aug_df = pd.DataFrame(augmented_records)
aug_df.to_csv(os.path.join(OUTPUT_DIR, "augmented_metadata.csv"), index=False)

print("✅ Augmentation complete. Total images:", len(aug_df))


✅ Augmentation complete. Total images: 1000
