<a href="https://colab.research.google.com/github/i-anshumanbaghmare/ImgCap-LoRA/blob/main/LoRA_dataset_Processing.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Character Configuration

In [1]:
import os
import zipfile
import shutil
from PIL import Image
import gradio as gr

# -----------------------------
# Configuration
# -----------------------------

DATA_IMG_DIR = "data/images"
DATA_CAP_DIR = "data/captions"
TEMP_DIR = "temp"

VALID_EXTS = (".jpg", ".jpeg", ".png", ".webp")

RESIZE_PRESETS = {
    "Face (512x512)": (512, 512),
    "Half Body (512x768)": (512, 768),
    "Full Body (768x1024)": (768, 1024)
}

os.makedirs(DATA_IMG_DIR, exist_ok=True)
os.makedirs(DATA_CAP_DIR, exist_ok=True)
os.makedirs(TEMP_DIR, exist_ok=True)

# -----------------------------
# Global State (Simple & Safe)
# -----------------------------

image_list = []
current_index = 0
character_name = "person"


# Function Directory

#### Zip file Extractor

In [2]:
def extract_zip(zip_path):
    global image_list, current_index             # From out of the function

    if os.path.exists(TEMP_DIR):
        shutil.rmtree(TEMP_DIR)                  # Delete if already exist
    os.makedirs(TEMP_DIR, exist_ok=True)         # Create Temporary Folder

    with zipfile.ZipFile(zip_path, 'r') as z:
        z.extractall(TEMP_DIR)                   # Extract Zip file into Temporary Folder

    image_list = []
    for root, _, files in os.walk(TEMP_DIR):
        for f in files:
            if f.lower().endswith(VALID_EXTS):             # Extract only image
                image_list.append(os.path.join(root, f))   #

    image_list.sort()
    current_index = 0

    if len(image_list) == 0:
        raise ValueError("No valid images found in ZIP")

    return load_current_image()


#### Image Load

In [3]:
def load_current_image():
    if not image_list:
        return None
    return Image.open(image_list[current_index]).convert("RGB")

def next_image():
    global current_index
    if current_index < len(image_list) - 1:
        current_index += 1
    return load_current_image()

def prev_image():
    global current_index
    if current_index > 0:
        current_index -= 1
    return load_current_image()

#### Saving Logics

In [4]:

def save_processed(
    img,
    body_type,
    caption_extra,
    resize_preset,
    resize_method
):
    idx = str(current_index + 1).zfill(4)
    body_tag = body_type.lower().replace(" ", "")
    filename = f"{idx}_{character_name}_{body_tag}.jpg"

    # Resize
    if resize_preset != "No Resize":
        size = RESIZE_PRESETS[resize_preset]
        resample_map = {
            "Lanczos": Image.LANCZOS,
            "Bicubic": Image.BICUBIC,
            "Area": Image.BOX
        }
        img = img.resize(size, resample=resample_map[resize_method])

    img.save(os.path.join(DATA_IMG_DIR, filename), quality=95)

    caption = f"a photo of {character_name} person"
    if caption_extra.strip():
        caption += ", " + caption_extra.strip()

    with open(os.path.join(DATA_CAP_DIR, filename.replace(".jpg", ".txt")), "w") as f:
        f.write(caption)

    return f"Saved {filename}"

# Gradio Interface

In [6]:

with gr.Blocks(title="Identity LoRA Dataset Builder") as app:
    gr.Markdown("## LoRA Dataset Builder")

    with gr.Row():
        zip_input = gr.File(label="Upload ZIP (images only)", file_types=[".zip"])
        name_input = gr.Textbox(label="Character Name")

    load_btn = gr.Button("Load Dataset")

    with gr.Row():
        image_display = gr.Image(label="Current Image", interactive=True)
        with gr.Column():
            body_type = gr.Radio(
                ["Face", "Half", "Full"],
                label="Body Type",
                value="Face"
            )
            caption_extra = gr.Textbox(
                label="Optional Caption Add-on",
                placeholder="standing, natural light"
            )
            resize_preset = gr.Dropdown(
                ["No Resize"] + list(RESIZE_PRESETS.keys()),
                label="Resize Preset",
                value="Face (512x512)"
            )
            resize_method = gr.Radio(
                ["Lanczos", "Bicubic", "Area"],
                label="Resize Method",
                value="Lanczos"
            )

    with gr.Row():
        prev_btn = gr.Button("Previous")
        next_btn = gr.Button("Next")
        save_btn = gr.Button("Save Image + Caption")

    status = gr.Textbox(label="Status", interactive=False)

    # -----------------------------
    # Bindings
    # -----------------------------

    def set_character_name(name):
        global character_name
        character_name = name.strip().lower()

    name_input.change(set_character_name, name_input, None)

    load_btn.click(extract_zip, zip_input, image_display)
    next_btn.click(next_image, None, image_display)
    prev_btn.click(prev_image, None, image_display)
    save_btn.click(
        save_processed,
        [image_display, body_type, caption_extra, resize_preset, resize_method],
        status
    )

app.launch()

It looks like you are running Gradio on a hosted Jupyter notebook, which requires `share=True`. Automatically setting `share=True` (you can turn this off by setting `share=False` in `launch()` explicitly).

Colab notebook detected. To show errors in colab notebook, set debug=True in launch()
* Running on public URL: https://805116c2aaa2c9a6cd.gradio.live

This share link expires in 1 week. For free permanent hosting and GPU upgrades, run `gradio deploy` from the terminal in the working directory to deploy to Hugging Face Spaces (https://huggingface.co/spaces)




# Debugging

In [8]:
import os
import zipfile
import shutil
from PIL import Image
import gradio as gr

# -----------------------------
# Configuration
# -----------------------------
DATA_IMG_DIR = "data/images"
DATA_CAP_DIR = "data/captions"
TEMP_DIR = "temp"
VALID_EXTS = (".jpg", ".jpeg", ".png", ".webp")
RESIZE_PRESETS = {
    "Face (512x512)": (512, 512),
    "Half Body (512x768)": (512, 768),
    "Full Body (768x1024)": (768, 1024)
}
os.makedirs(DATA_IMG_DIR, exist_ok=True)
os.makedirs(DATA_CAP_DIR, exist_ok=True)
os.makedirs(TEMP_DIR, exist_ok=True)

# -----------------------------
# Global State
# -----------------------------
image_list = []
current_index = 0
character_name = "person"

# -----------------------------
# Utility Functions (with debugging & error handling)
# -----------------------------
def extract_zip(zip_file_obj):
    global image_list, current_index
    if zip_file_obj is None:
        return None, "Error: No ZIP file uploaded."

    print(f"[DEBUG] Extracting ZIP: {zip_file_obj.name}")

    try:
        # Clear and recreate temp directory
        if os.path.exists(TEMP_DIR):
            shutil.rmtree(TEMP_DIR)
        os.makedirs(TEMP_DIR, exist_ok=True)

        # Extract using the .name attribute (actual tempfile path)
        with zipfile.ZipFile(zip_file_obj.name, 'r') as z:
            z.extractall(TEMP_DIR)

        # Collect images
        image_list = []
        for root, _, files in os.walk(TEMP_DIR):
            for f in files:
                if f.lower().endswith(VALID_EXTS):
                    image_list.append(os.path.join(root, f))

        image_list.sort()
        current_index = 0
        print(f"[DEBUG] Found {len(image_list)} valid images.")

        if len(image_list) == 0:
            return None, "Error: No valid images found in ZIP."

        return load_current_image(), f"Loaded {len(image_list)} images."

    except zipfile.BadZipFile:
        return None, "Error: Invalid or corrupted ZIP file."
    except Exception as e:
        print(f"[ERROR] extract_zip failed: {type(e).__name__}: {e}")
        return None, f"Error during extraction: {str(e)}"

def load_current_image():
    if not image_list or current_index >= len(image_list):
        return None
    try:
        img_path = image_list[current_index]
        print(f"[DEBUG] Loading image {current_index + 1}/{len(image_list)}: {img_path}")
        return Image.open(img_path).convert("RGB")
    except Exception as e:
        print(f"[ERROR] Failed to load image: {e}")
        return None

def next_image():
    global current_index
    if image_list and current_index < len(image_list) - 1:
        current_index += 1
        print(f"[DEBUG] Next -> index {current_index}")
    return load_current_image()

def prev_image():
    global current_index
    if image_list and current_index > 0:
        current_index -= 1
        print(f"[DEBUG] Prev -> index {current_index}")
    return load_current_image()

import numpy as np  # Add this import at the top of your script if not already there

def save_processed(img, body_type, caption_extra, resize_preset, resize_method):
    if isinstance(img, np.ndarray):
      final_img = Image.fromarray(img)
    if img is None:
      return "Error: No image currently displayed."
    if not image_list:
      return "Error: No dataset loaded."

    debug_lines = []
    debug_lines.append(f"[DEBUG] Pillow version: {Image.__version__}")
    debug_lines.append(f"[DEBUG] Input 'img' type: {type(img)}")
    debug_lines.append(f"[DEBUG] Input 'img' shape/dims: {getattr(img, 'shape', 'N/A') if hasattr(img, 'shape') else 'N/A'}")
    debug_lines.append(f"[DEBUG] Selected resize preset: {resize_preset}")
    debug_lines.append(f"[DEBUG] Selected resize method: {resize_method}")

    # -----------------------------
    # Convert to PIL Image (the fix!)
    # -----------------------------
    try:
        if isinstance(img, np.ndarray):
            debug_lines.append("[DEBUG] Converting NumPy array to PIL Image")
            final_img = Image.fromarray(img)
        elif isinstance(img, str):  # File path
            debug_lines.append("[DEBUG] Loading image from path")
            final_img = Image.open(img).convert("RGB")
        elif isinstance(img, Image.Image):
            debug_lines.append("[DEBUG] Input is already PIL Image")
            final_img = img.copy()
        else:
            raise ValueError(f"Unsupported image type: {type(img)}")

        debug_lines.append(f"[DEBUG] Converted to PIL: {final_img.mode} {final_img.size}")

    except Exception as conv_err:
        return "\n".join(debug_lines + [f"[FATAL] Conversion failed: {type(conv_err).__name__}: {conv_err}"])

    try:
        idx = str(current_index + 1).zfill(4)
        body_tag = body_type.lower().replace(" ", "")
        filename = f"{idx}_{character_name}_{body_tag}.jpg"

        # -----------------------------
        # Resize logic (now safe on PIL)
        # -----------------------------
        if resize_preset != "No Resize":
            size = RESIZE_PRESETS[resize_preset]
            debug_lines.append(f"[DEBUG] Target size: {size}")

            # Use enum for max compatibility (Pillow 9+ / 11.3.0)
            resample_map = {
                "Lanczos": Image.Resampling.LANCZOS,
                "Bicubic": Image.Resampling.BICUBIC,
            }
            resample_filter = resample_map.get(resize_method, Image.Resampling.BICUBIC)

            debug_lines.append(f"[DEBUG] Using filter: {resize_method} â†’ {resample_filter}")
            debug_lines.append(f"[DEBUG] Calling PIL.resize(size={size}, resample={resample_filter})")

            # Now it's guaranteed PIL, so this works
            final_img = final_img.resize(size, resample=resample_filter)
            debug_lines.append("[DEBUG] Resize succeeded!")

        # -----------------------------
        # Save image
        # -----------------------------
        img_path = os.path.join(DATA_IMG_DIR, filename)
        final_img.save(img_path, quality=95, optimize=True)
        debug_lines.append(f"[DEBUG] Image saved: {img_path}")

        # -----------------------------
        # Save caption
        # -----------------------------
        caption = f"a photo of {character_name} person"
        if caption_extra and caption_extra.strip():
            caption += ", " + caption_extra.strip()

        caption_path = os.path.join(DATA_CAP_DIR, filename.replace(".jpg", ".txt"))
        with open(caption_path, "w", encoding="utf-8") as f:
            f.write(caption)
        debug_lines.append(f"[DEBUG] Caption saved: {caption_path}")

        return f"Successfully saved {filename}\n\n" + "\n".join(debug_lines)

    except Exception as e:
        error_msg = f"Unexpected error after conversion: {type(e).__name__}: {str(e)}"
        debug_lines.append(f"[FATAL] {error_msg}")
        print("\n".join(debug_lines))  # Console logs for Spaces
        return "\n".join(debug_lines)


def set_character_name(name):
    global character_name
    character_name = name.strip().lower() or "person"
    print(f"[DEBUG] Character name set to: {character_name}")

# -----------------------------
# Gradio Interface
# -----------------------------
with gr.Blocks(title="Identity LoRA Dataset Builder") as app:
    gr.Markdown("## Identity / DreamBooth LoRA Dataset Builder")

    with gr.Row():
        zip_input = gr.File(label="Upload ZIP (images only)", file_types=[".zip"])
        name_input = gr.Textbox(label="Character Name", value="anshu")

    load_btn = gr.Button("Load Dataset")

    with gr.Row():
        image_display = gr.Image(label="Current Image", interactive=False)
        with gr.Column():
            body_type = gr.Radio(["Face", "Half", "Full"], label="Body Type", value="Face")
            caption_extra = gr.Textbox(label="Caption Add-on", placeholder="pose, background, light")
            resize_preset = gr.Dropdown(["No Resize"] + list(RESIZE_PRESETS.keys()), label="Resize Preset", value="Face (512x512)")
            resize_method = gr.Radio(["Lanczos", "Bicubic"], label="Resize Method", value="Lanczos")

    with gr.Row():
        prev_btn = gr.Button("Previous")
        next_btn = gr.Button("Next")
        save_btn = gr.Button("Save Image + Caption")

    status = gr.Textbox(label="Status", interactive=False)

    # Bindings
    name_input.change(set_character_name, name_input, None)
    load_btn.click(extract_zip, zip_input, [image_display, status])
    next_btn.click(next_image, None, image_display)
    prev_btn.click(prev_image, None, image_display)
    save_btn.click(save_processed, [image_display, body_type, caption_extra, resize_preset, resize_method], status)

# -----------------------------
# Launch
# -----------------------------
app.launch()

It looks like you are running Gradio on a hosted Jupyter notebook, which requires `share=True`. Automatically setting `share=True` (you can turn this off by setting `share=False` in `launch()` explicitly).

Colab notebook detected. To show errors in colab notebook, set debug=True in launch()
* Running on public URL: https://33bd66b6918c19569f.gradio.live

This share link expires in 1 week. For free permanent hosting and GPU upgrades, run `gradio deploy` from the terminal in the working directory to deploy to Hugging Face Spaces (https://huggingface.co/spaces)


