In [1]:
# prompt: mount drive

from google.colab import drive
drive.mount('/content/drive')


Mounted at /content/drive


In [24]:
# Import necessary libraries
import os
import torch
import logging
from PIL import Image
from pathlib import Path
from torchvision import transforms
from torch.utils.data import Dataset
from diffusers import (
    StableDiffusionXLPipeline,
    DPMSolverMultistepScheduler,
    DDPMScheduler,
    AutoencoderKL, # Importing AutoencoderKL
    UNet2DConditionModel, # Importing UNet2DConditionModel
)
from diffusers.optimization import get_scheduler
from diffusers.training_utils import EMAModel
from transformers import CLIPTokenizer, CLIPTextModel, CLIPTextModelWithProjection # Importing CLIPTextModel, CLIPTextModelWithProjection
from huggingface_hub import notebook_login
from tqdm.auto import tqdm

# Setup logging
logging.basicConfig(level=logging.INFO)
logger = logging.getLogger(__name__)

In [4]:
class Config:
    # Model settings
    model_id = "stabilityai/stable-diffusion-3.5-medium"
    instance_prompt = "photo of Eesha"  # Replace 'xyz' with your identifier
    class_prompt = "photo of a person"

    # Paths - Get from Google Drive
    output_dir = "/content/drive/MyDrive/dreambooth-model"  # Example path, adjust as needed
    instance_data_dir = "/content/drive/MyDrive/training-images"  # Example path, adjust as needed

    # Training settings
    num_training_steps = 1000
    learning_rate = 1e-6
    train_batch_size = 1
    gradient_accumulation_steps = 1
    image_size = 512
    mixed_precision = "fp16"  # or "no" for full precision

    # Device configuration
    device = "cuda" if torch.cuda.is_available() else "cpu"

config = Config()

# Create output directory
os.makedirs(config.output_dir, exist_ok=True)

In [5]:
class PersonalDataset(Dataset):
    def __init__(self, instance_data_dir, instance_prompt, tokenizer, size=512):
        self.instance_data_dir = Path(instance_data_dir)
        self.instance_prompt = instance_prompt
        self.tokenizer = tokenizer
        self.size = size

        self.image_paths = [f for f in self.instance_data_dir.iterdir()
                           if f.suffix.lower() in ['.jpg', '.jpeg', '.png', '.webp']]

        if len(self.image_paths) == 0:
            raise ValueError(f"No images found in {instance_data_dir}")

        logger.info(f"Found {len(self.image_paths)} images in {instance_data_dir}")

        self.transform = transforms.Compose([
            transforms.Resize(size, interpolation=transforms.InterpolationMode.BILINEAR),
            transforms.CenterCrop(size),
            transforms.ToTensor(),
            transforms.Normalize([0.5], [0.5]),
        ])

    def __len__(self):
        return len(self.image_paths)

    def __getitem__(self, idx):
        image_path = self.image_paths[idx]
        try:
            image = Image.open(image_path).convert('RGB')
            image = self.transform(image)

            example = {
                "input_ids": self.tokenizer(
                    self.instance_prompt,
                    padding="max_length",
                    truncation=True,
                    max_length=self.tokenizer.model_max_length,
                    return_tensors="pt",
                ).input_ids[0],
                "images": image,
            }
            return example
        except Exception as e:
            logger.error(f"Error loading image {image_path}: {e}")
            raise

In [48]:
def setup_model():

    model_id = "stabilityai/stable-diffusion-3.5-medium"

    # Initialize the pipeline
    pipeline = StableDiffusionXLPipeline.from_pretrained(
        model_id,
        torch_dtype=torch.float16,
        use_safetensors=True
    )

    # Move to GPU if available
    if torch.cuda.is_available():
        pipeline = pipeline.to("cuda")

    return pipeline

In [44]:
def prepare_dataset(pipeline):
    dataset = PersonalDataset(
        instance_data_dir=config.instance_data_dir,
        instance_prompt=config.instance_prompt,
        tokenizer=pipeline.tokenizer,
        size=config.image_size
    )
    return dataset

In [45]:
def training_function(pipeline, dataset):
    # Prepare optimizer
    optimizer = torch.optim.AdamW(
        pipeline.unet.parameters(),
        lr=config.learning_rate,
    )

    # Prepare scheduler
    lr_scheduler = get_scheduler(
        "constant",
        optimizer=optimizer,
        num_warmup_steps=0,
        num_training_steps=config.num_training_steps,
    )

    # Progress bar
    progress_bar = tqdm(range(config.num_training_steps))
    progress_bar.set_description("Steps")
    global_step = 0

    # Training loop
    for step in range(config.num_training_steps):
        pipeline.train()

        # Get training sample
        batch = dataset[step % len(dataset)]

        # Forward pass
        loss = pipeline(
            batch["input_ids"].unsqueeze(0).to(config.device),
            batch["images"].unsqueeze(0).to(config.device),
            return_dict=True
        ).loss

        # Backward pass
        loss.backward()

        if (step + 1) % config.gradient_accumulation_steps == 0:
            optimizer.step()
            lr_scheduler.step()
            optimizer.zero_grad()

            # Update progress bar
            progress_bar.update(1)
            global_step += 1

            # Log progress
            if global_step % 10 == 0:
                logger.info(f"Step {global_step}: loss = {loss.detach().item():.4f}")

            # Save checkpoint
            if global_step % 100 == 0:
                pipeline.save_pretrained(os.path.join(config.output_dir, f"checkpoint-{global_step}"))

    # Save final model
    pipeline.save_pretrained(config.output_dir)
    return pipeline

In [46]:
def generate_images(pipeline, prompt, num_images=1):
    """Generate images using the fine-tuned model"""
    images = pipeline(
        prompt,
        num_inference_steps=50,
        guidance_scale=7.5,
        num_images_per_prompt=num_images
    ).images

    # Save images
    os.makedirs("generated_images", exist_ok=True)
    for i, image in enumerate(images):
        image.save(f"generated_images/generated_{i}.png")

    return images

In [47]:
pipeline = setup_model()


A mixture of fp16 and non-fp16 filenames will be loaded.
Loaded fp16 filenames:
[text_encoder_3/model.fp16-00002-of-00002.safetensors, text_encoder/model.fp16.safetensors, text_encoder_3/model.fp16-00001-of-00002.safetensors, text_encoder_2/model.fp16.safetensors]
Loaded non-fp16 filenames:
[transformer/diffusion_pytorch_model.safetensors, vae/diffusion_pytorch_model.safetensors
If this behavior is not expected, please check your folder structure.


Loading pipeline components...:   0%|          | 0/6 [00:00<?, ?it/s]

ValueError: Pipeline <class 'diffusers.pipelines.stable_diffusion_xl.pipeline_stable_diffusion_xl.StableDiffusionXLPipeline'> expected {'image_encoder', 'feature_extractor', 'vae', 'tokenizer_2', 'scheduler', 'text_encoder_2', 'unet', 'tokenizer', 'text_encoder'}, but only {'vae', 'tokenizer_2', 'scheduler', 'text_encoder_2', 'tokenizer', 'text_encoder'} were passed.

In [11]:
# Prepare dataset
dataset = prepare_dataset(pipeline)

# Train model
logger.info("Starting training...")
pipeline = training_function(pipeline, dataset)

# Generate test image
logger.info("Generating test image...")
test_prompt = f"professional photo of {config.instance_prompt}, high quality, detailed face"
generate_images(pipeline, test_prompt)

logger.info("Training complete!")


NameError: name 'pipeline' is not defined

In [49]:
setup_model()

Fetching 26 files:   0%|          | 0/26 [00:00<?, ?it/s]

(…)t_encoder_3/model.safetensors.index.json:   0%|          | 0.00/19.9k [00:00<?, ?B/s]

Loading pipeline components...:   0%|          | 0/6 [00:00<?, ?it/s]

ValueError: Pipeline <class 'diffusers.pipelines.stable_diffusion_xl.pipeline_stable_diffusion_xl.StableDiffusionXLPipeline'> expected {'image_encoder', 'feature_extractor', 'vae', 'tokenizer_2', 'scheduler', 'text_encoder_2', 'unet', 'tokenizer', 'text_encoder'}, but only {'vae', 'tokenizer_2', 'scheduler', 'text_encoder_2', 'tokenizer', 'text_encoder'} were passed.