# Luna Avatar Generator - Flux Kontext Image-to-Image Pipeline

Generates **4,870** Luna avatar variations using **Flux.1 Kontext Dev** on a free Colab T4 GPU.

**Prerequisites:**
1. A [Hugging Face account](https://huggingface.co/join) with access to [FLUX.1-Kontext-dev](https://huggingface.co/black-forest-labs/FLUX.1-Kontext-dev) (accept the license on the model page)
2. An HF access token stored in **Colab Secrets** as `HF_TOKEN` (Settings gear icon > Secrets > Add `HF_TOKEN`)
3. A `BASE_IMAGES/` folder uploaded to your Google Drive root with 23 reference images
4. `prompt_manifest.py` uploaded to your Drive root (or `Drive/colab/` folder)

**How it works:** Takes 23 base reference images of Luna (9 regular outfits + 14 costumes),
then applies pose/emotion prompts via image-to-image editing. Regular outfit prompts are
multiplied by 4 hairstyle variants (hair down, messy bun, ponytail, braid). Costume prompts
are thematic and not multiplied.

**Priority order:** `dress.jpg` images generate first (396 images), then other outfits, then costumes.

**Parallel mode:** Set `CHUNK_INDEX` and `TOTAL_CHUNKS` in cell 4 to split across multiple Colab instances.

**Resume-safe** - skips images that already exist on Drive. Just re-run after timeout.

Results are saved to Google Drive at `/MyDrive/luna_avatars/` with YAML config output.

In [None]:
#@title 1. Setup Environment & Install Dependencies
#@markdown Installs Flux Kontext pipeline and quantization libraries. ~3-4 min on first run.
#@markdown
#@markdown **IMPORTANT:** You must have a Hugging Face account and accept the
#@markdown [FLUX.1-Kontext-dev license](https://huggingface.co/black-forest-labs/FLUX.1-Kontext-dev)
#@markdown before running this notebook. Store your HF token in Colab Secrets as `HF_TOKEN`.

import subprocess
import sys
import os

# Install diffusers from main (Kontext support) + quantization deps
subprocess.check_call([
    sys.executable, '-m', 'pip', 'install', '-q',
    'git+https://github.com/huggingface/diffusers.git',
    'transformers', 'accelerate', 'sentencepiece', 'protobuf',
    'safetensors', 'Pillow', 'pyyaml', 'huggingface_hub',
    'optimum-quanto',
])

# Authenticate with Hugging Face (FLUX.1-Kontext-dev is a gated model)
from huggingface_hub import login
try:
    from google.colab import userdata
    hf_token = userdata.get('HF_TOKEN')
    login(token=hf_token)
    print('Authenticated with HF token from Colab Secrets.')
except Exception:
    # Fall back to interactive login if secret not found
    print('HF_TOKEN not found in Colab Secrets. Trying interactive login...')
    login()

# Verify GPU
import torch
if torch.cuda.is_available():
    gpu_name = torch.cuda.get_device_name(0)
    gpu_mem = torch.cuda.get_device_properties(0).total_memory / (1024**3)
    print(f'GPU: {gpu_name} ({gpu_mem:.1f} GB)')
    print(f'CUDA: {torch.version.cuda}')
    print(f'PyTorch: {torch.__version__}')
else:
    raise RuntimeError(
        'No GPU detected! Go to Runtime > Change runtime type > T4 GPU'
    )

print('\nEnvironment ready.')

In [None]:
#@title 2. Mount Google Drive & Upload Base Images
#@markdown Mounts Drive and copies base images to the working directory.
#@markdown **IMPORTANT:** Upload the `BASE_IMAGES/` folder to your Google Drive root first!
#@markdown Also upload `prompt_manifest.py` to your Drive root (or the `colab/` folder).

from google.colab import drive
drive.mount('/content/drive')

OUTPUT_ROOT = '/content/drive/MyDrive/luna_avatars'
BASE_IMAGE_DIR = '/content/drive/MyDrive/BASE_IMAGES'
LOCAL_BASE_DIR = '/content/base_images'

os.makedirs(OUTPUT_ROOT, exist_ok=True)
os.makedirs(LOCAL_BASE_DIR, exist_ok=True)

# Copy base images to local storage for faster access (supports jpg, png, webp)
import shutil
if os.path.isdir(BASE_IMAGE_DIR):
    for f in os.listdir(BASE_IMAGE_DIR):
        if f.lower().endswith(('.jpg', '.jpeg', '.png', '.webp')):
            src = os.path.join(BASE_IMAGE_DIR, f)
            dst = os.path.join(LOCAL_BASE_DIR, f)
            shutil.copy2(src, dst)
            print(f'  Copied: {f}')
    print(f'\n{len(os.listdir(LOCAL_BASE_DIR))} base images ready.')
else:
    print(f'WARNING: {BASE_IMAGE_DIR} not found on Drive!')
    print('Please upload your BASE_IMAGES folder to Google Drive root.')
    print('Expected: 9 regular outfit images + 14 costume images (23 total)')

# Copy prompt_manifest.py to working directory for import
MANIFEST_LOCATIONS = [
    '/content/drive/MyDrive/prompt_manifest.py',
    '/content/drive/MyDrive/colab/prompt_manifest.py',
    '/content/drive/MyDrive/Luna/prompt_manifest.py',
    '/content/drive/MyDrive/Luna/colab/prompt_manifest.py',
]
manifest_found = False
for loc in MANIFEST_LOCATIONS:
    if os.path.exists(loc):
        shutil.copy2(loc, '/content/prompt_manifest.py')
        print(f'\nCopied prompt_manifest.py from {loc}')
        manifest_found = True
        break
if not manifest_found:
    print('\nWARNING: prompt_manifest.py not found on Drive!')
    print('Upload it to your Drive root or Drive/colab/ folder.')

# Progress tracking
PROGRESS_FILE = os.path.join(OUTPUT_ROOT, '_progress.txt')
STATUS_FILE = os.path.join(OUTPUT_ROOT, '_status.txt')

def update_status(msg):
    with open(STATUS_FILE, 'w') as f:
        f.write(msg)
    print(msg)

def log_progress(outfit, filename, idx, total):
    with open(PROGRESS_FILE, 'a') as f:
        f.write(f'{idx}/{total} | {outfit}/{filename}\n')

update_status('MOUNTED')

In [None]:
#@title 3. Load Flux.1 Kontext Dev (Quantized for T4)
#@markdown Loads Flux Kontext with FP8 quantization via optimum-quanto.
#@markdown This fits on the free T4 GPU (15 GB VRAM).

import torch
import gc
from diffusers import FluxKontextPipeline, FluxTransformer2DModel
from transformers import T5EncoderModel
from optimum.quanto import freeze, qfloat8, quantize

update_status('LOADING_MODEL')

MODEL_ID = 'black-forest-labs/FLUX.1-Kontext-dev'
DTYPE = torch.bfloat16

# Load and quantize transformer (largest component)
print('Loading transformer...')
transformer = FluxTransformer2DModel.from_pretrained(
    MODEL_ID, subfolder='transformer', torch_dtype=DTYPE
)
print('Quantizing transformer to FP8...')
quantize(transformer, weights=qfloat8)
freeze(transformer)

# Load and quantize T5 text encoder
print('Loading T5 text encoder...')
text_encoder_2 = T5EncoderModel.from_pretrained(
    MODEL_ID, subfolder='text_encoder_2', torch_dtype=DTYPE
)
print('Quantizing T5 to FP8...')
quantize(text_encoder_2, weights=qfloat8)
freeze(text_encoder_2)

# Build the pipeline with pre-quantized components (avoids double-loading into VRAM)
print('Building pipeline...')
pipe = FluxKontextPipeline.from_pretrained(
    MODEL_ID,
    transformer=transformer,
    text_encoder_2=text_encoder_2,
    torch_dtype=DTYPE,
)
pipe.enable_model_cpu_offload()

# Free loading overhead
gc.collect()
torch.cuda.empty_cache()

vram_used = torch.cuda.memory_allocated() / (1024**3)
print(f'\nPipeline ready. VRAM: {vram_used:.2f} GB')
update_status('MODEL_READY')

In [None]:
#@title 4. Load Base Images & Import Manifest
#@markdown Loads all 23 base reference images and imports the prompt manifest.
#@markdown
#@markdown **Parallel mode:** To split across multiple Colab instances, set
#@markdown `TOTAL_CHUNKS` to the number of instances and `CHUNK_INDEX` to this
#@markdown instance's index (0-based). Each instance gets a different slice.
#@markdown Leave both at 0 for single-instance mode (generates everything).

from PIL import Image
import sys

# â”€â”€ PARALLEL CONFIG â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€
# Set these to split work across multiple Colab instances:
#   CHUNK_INDEX = 0, TOTAL_CHUNKS = 0  -> single instance, all 4870 images
#   CHUNK_INDEX = 0, TOTAL_CHUNKS = 3  -> instance 1 of 3 (~1600 images)
#   CHUNK_INDEX = 1, TOTAL_CHUNKS = 3  -> instance 2 of 3 (~1600 images)
#   CHUNK_INDEX = 2, TOTAL_CHUNKS = 3  -> instance 3 of 3 (~1600 images)
CHUNK_INDEX = 0   #@param {type:"integer"}
TOTAL_CHUNKS = 0  #@param {type:"integer"}

# Import prompt_manifest (copied to /content/ in cell 2)
sys.path.insert(0, '/content')
from prompt_manifest import BASE_IMAGES, MANIFEST as FULL_MANIFEST, MASTER_PROMPTS, HAIRSTYLE_VARIANTS, get_chunk

# Apply chunking if configured
if TOTAL_CHUNKS > 1:
    MANIFEST = get_chunk(CHUNK_INDEX, TOTAL_CHUNKS)
    print(f'PARALLEL MODE: Chunk {CHUNK_INDEX + 1} of {TOTAL_CHUNKS}')
    print(f'This instance: {len(MANIFEST)} images (of {len(FULL_MANIFEST)} total)')
else:
    MANIFEST = FULL_MANIFEST
    print(f'SINGLE MODE: All {len(MANIFEST)} images')

# Show priority: first base_key in this chunk's manifest
from collections import Counter
base_order = []
for m in MANIFEST:
    if m['base_key'] not in base_order:
        base_order.append(m['base_key'])
base_counts = Counter(m['base_key'] for m in MANIFEST)
print(f'\nGeneration order for this {"chunk" if TOTAL_CHUNKS > 1 else "run"}:')
cumulative = 0
for key in base_order[:5]:
    count = base_counts[key]
    cumulative += count
    print(f'  {key}: {count} images (cumulative: {cumulative})')
if len(base_order) > 5:
    print(f'  ... and {len(base_order) - 5} more base images')

# Load only the base images needed for this chunk
needed_bases = set(m['base_key'] for m in MANIFEST)
loaded_bases = {}
for key, info in BASE_IMAGES.items():
    if key not in needed_bases:
        continue
    path = os.path.join(LOCAL_BASE_DIR, info['file'])
    if os.path.exists(path):
        img = Image.open(path).convert('RGB')
        loaded_bases[key] = img
        category = info.get('category', 'unknown')
        print(f'  [{category}] {key}: {img.size[0]}x{img.size[1]}')
    else:
        print(f'  WARNING: Missing {info["file"]}!')

print(f'\nLoaded {len(loaded_bases)}/{len(needed_bases)} needed base images.')

In [None]:
#@title 5. Manifest Statistics
#@markdown Shows breakdown of this run's generation entries.

from collections import Counter

mode = f'Chunk {CHUNK_INDEX + 1}/{TOTAL_CHUNKS}' if TOTAL_CHUNKS > 1 else 'Single instance'
print(f'Mode: {mode}')
print(f'Images to generate: {len(MANIFEST)}')
print()

# Per outfit
outfit_counts = Counter(m['output_dir'] for m in MANIFEST)
print('By outfit:')
for outfit, count in outfit_counts.most_common():
    print(f'  {outfit}: {count} images')

# Per emotion
print()
emotion_counts = Counter(m['emotion'] for m in MANIFEST)
print('By emotion:')
for emotion, count in emotion_counts.most_common():
    print(f'  {emotion}: {count} images')

# Per hairstyle
print()
hair_counts = Counter(m['hairstyle'] for m in MANIFEST)
print('By hairstyle:')
for hair, count in hair_counts.most_common():
    print(f'  {hair}: {count} images')

# Estimate time
EST_SECONDS_PER_IMAGE = 25  # Flux Kontext on T4 with FP8
total_hours = (len(MANIFEST) * EST_SECONDS_PER_IMAGE) / 3600
print(f'\nEstimated time: {total_hours:.1f} hours at ~{EST_SECONDS_PER_IMAGE}s/image')
if TOTAL_CHUNKS <= 1:
    print(f'({total_hours / 12:.0f} sessions at 12h each, resume support included)')
    # Show dress priority
    dress_count = sum(1 for m in MANIFEST if m['base_key'] == 'dress')
    dress_hours = (dress_count * EST_SECONDS_PER_IMAGE) / 3600
    print(f'\ndress.jpg images: {dress_count} (first {dress_hours:.1f}h of generation)')
    print('These generate FIRST - if the session times out, at least dress is done!')

In [None]:
#@title 6. Batch Generation Engine
#@markdown Generates all images using Flux Kontext image-to-image editing.
#@markdown **Resume-safe:** skips images that already exist on Drive.

import time
from pathlib import Path

# Generation settings
GUIDANCE_SCALE = 2.5          # Kontext recommended
NUM_INFERENCE_STEPS = 28      # Quality/speed balance
BASE_SEED = 42


def get_completed():
    """Find already-generated images for resume support."""
    done = set()
    for d in Path(OUTPUT_ROOT).iterdir():
        if d.is_dir() and not d.name.startswith('_'):
            for f in d.glob('*.png'):
                done.add(f'{d.name}/{f.name}')
    return done


def generate_one(base_image, prompt, seed):
    """Generate a single avatar via Kontext image-to-image."""
    result = pipe(
        image=base_image,
        prompt=prompt,
        guidance_scale=GUIDANCE_SCALE,
        num_inference_steps=NUM_INFERENCE_STEPS,
        generator=torch.Generator().manual_seed(seed),
    )
    return result.images[0]


def run_batch():
    """Run the full batch with resume support."""
    completed = get_completed()
    total = len(MANIFEST)
    generated = 0
    skipped = 0
    errors = []
    all_meta = []

    update_status(f'GENERATING 0/{total}')

    for idx, entry in enumerate(MANIFEST, 1):
        rel_path = f'{entry["output_dir"]}/{entry["output_filename"]}'

        # Resume: skip existing
        if rel_path in completed:
            skipped += 1
            all_meta.append({'path': rel_path, 'tags': entry['tags']})
            if idx % 50 == 0:
                print(f'[{idx}/{total}] Skipping existing...')
            continue

        # Get base image
        base_key = entry['base_key']
        if base_key not in loaded_bases:
            errors.append(f'{rel_path}: missing base image {base_key}')
            continue

        base_img = loaded_bases[base_key]
        seed = BASE_SEED + idx

        # Ensure output dir
        outfit_dir = os.path.join(OUTPUT_ROOT, entry['output_dir'])
        os.makedirs(outfit_dir, exist_ok=True)

        hairstyle = entry.get('hairstyle', 'original')
        print(f'[{idx}/{total}] {rel_path} (base: {base_key}, hair: {hairstyle})')
        print(f'  Prompt: {entry["prompt_text"][:100]}...')

        try:
            t0 = time.time()
            image = generate_one(base_img, entry['prompt_text'], seed)
            elapsed = time.time() - t0

            save_path = os.path.join(outfit_dir, entry['output_filename'])
            image.save(save_path, 'PNG')

            generated += 1
            log_progress(entry['output_dir'], entry['output_filename'], idx, total)
            all_meta.append({'path': rel_path, 'tags': entry['tags']})
            print(f'  Done in {elapsed:.1f}s')

            torch.cuda.empty_cache()

        except Exception as e:
            errors.append(f'{rel_path}: {e}')
            print(f'  ERROR: {e}')

        if idx % 10 == 0:
            update_status(f'GENERATING {idx}/{total}')

    # Write YAML config
    cfg_path = os.path.join(OUTPUT_ROOT, '_image_config.yaml')
    with open(cfg_path, 'w') as f:
        f.write('# Generated avatar config - paste into pyagentvox.yaml\n')
        f.write('images:\n')
        for m in all_meta:
            f.write(f'- path: {m["path"]}\n')
            f.write('  tags:\n')
            for t in m['tags']:
                f.write(f'  - {t}\n')

    if errors:
        with open(os.path.join(OUTPUT_ROOT, '_errors.txt'), 'w') as f:
            f.write('\n'.join(errors))

    print(f'\n{"=" * 60}')
    print(f'BATCH COMPLETE')
    print(f'  Generated: {generated}')
    print(f'  Skipped: {skipped}')
    print(f'  Errors: {len(errors)}')
    print(f'  Total: {total}')
    print(f'{"=" * 60}')
    update_status(f'COMPLETE {generated}/{total}')
    return generated, skipped, errors


generated, skipped, errors = run_batch()

In [None]:
#@title 7. Preview Grid
#@markdown Visual overview of all generated images.

import math
from PIL import Image as PILImage

def make_grid(root, thumb=128, cols=10):
    imgs = []
    for d in sorted(Path(root).iterdir()):
        if d.is_dir() and not d.name.startswith('_'):
            for f in sorted(d.glob('*.png')):
                try:
                    im = PILImage.open(f)
                    im.thumbnail((thumb, thumb))
                    imgs.append(im)
                except Exception:
                    pass
    if not imgs:
        print('No images to preview.')
        return
    rows = math.ceil(len(imgs) / cols)
    grid = PILImage.new('RGB', (cols * thumb, rows * thumb), (30, 30, 30))
    for i, im in enumerate(imgs):
        r, c = divmod(i, cols)
        x = c * thumb + (thumb - im.width) // 2
        y = r * thumb + (thumb - im.height) // 2
        grid.paste(im, (x, y))
    grid.save(os.path.join(root, '_preview.png'))
    print(f'{len(imgs)} images in {rows}x{cols} grid')
    from IPython.display import display
    display(grid)

make_grid(OUTPUT_ROOT)
update_status('DONE')

In [None]:
#@title 8. Download ZIP
#@markdown Creates a ZIP archive on Drive and offers browser download.

import shutil

zip_path = '/content/luna_avatars'
shutil.make_archive(zip_path, 'zip', OUTPUT_ROOT)
zip_file = f'{zip_path}.zip'
size_mb = os.path.getsize(zip_file) / (1024 * 1024)
print(f'ZIP: {zip_file} ({size_mb:.1f} MB)')

shutil.copy2(zip_file, os.path.join(OUTPUT_ROOT, 'luna_avatars.zip'))
print('Copied to Drive.')

try:
    from google.colab import files
    files.download(zip_file)
except Exception:
    print('Download from Google Drive instead.')