In [34]:
import torch
from pathlib import Path
from PIL import Image
import numpy as np
from diffusers import DiffusionPipeline

# ========== CONFIG ==========
MODEL_CONFIGS = {
    "strong": {
        "model_id": "google/ncsnpp-ffhq-256",
        "steps_to_save": [0, 250, 500, 750, 1000],
        "image_size": 256
    },
    "weak": {
        "model_id": "google/ddpm-ema-celebahq-256",
        "num_steps": 300,
        "save_every": 50,
        "image_size": 256
    }
}

SEED = 42
OUTPUT_DIR = Path("correct_saves")
# ============================

def setup_pipeline(model_id, device):
    """Initialize pipeline"""
    pipe = DiffusionPipeline.from_pretrained(model_id)
    return pipe.to(device)

def create_noise(pipe, device, seed=42, target_size=None):
    """Create initial noise tensor"""
    generator = torch.Generator(device="cpu").manual_seed(seed)

    # Get dimensions from UNet or use defaults
    try:
        in_channels = pipe.unet.config.in_channels
        sample_size = getattr(pipe.unet.config, 'sample_size', target_size or 256)
    except:
        in_channels = 3
        sample_size = target_size or 256

    return torch.randn((1, in_channels, sample_size, sample_size),
                      generator=generator).to(device)

def latents_to_image(latents):
    """Convert tensor to PIL Image"""
    tensor = latents.detach().cpu()
    if tensor.ndim == 4:
        tensor = tensor.squeeze(0)

    # Normalize to [0,1]
    tensor = (tensor - tensor.min()) / (tensor.max() - tensor.min() + 1e-8)
    tensor = (tensor * 255).clamp(0, 255).byte()

    if tensor.shape[0] == 1:  # Grayscale to RGB
        tensor = tensor.repeat(3, 1, 1)
    elif tensor.shape[0] > 3:  # Take first 3 channels
        tensor = tensor[:3]

    return Image.fromarray(tensor.permute(1, 2, 0).numpy())

def save_step(latents, step, output_dir, image_size=256):
    """Save latents and image for current step"""
    output_dir.mkdir(parents=True, exist_ok=True)

    # Save tensor
    np.save(output_dir / f"step_{step:04d}.npy", latents.cpu().numpy())

    # Save image
    img = latents_to_image(latents)
    img = img.resize((image_size, image_size), Image.LANCZOS)
    img.save(output_dir / f"step_{step:04d}.png")
    print(f" Saved step {step}")

def save_image_directly(image, step, output_dir, image_size=256):
    """Save PIL Image directly"""
    output_dir.mkdir(parents=True, exist_ok=True)

    # Resize and save
    image = image.resize((image_size, image_size), Image.LANCZOS)
    image.save(output_dir / f"step_{step:04d}.png")
    print(f" Saved step {step} (direct image)")

def run_strong_model_correct(pipe, output_dir, image_size):
    """
    Correct implementation for ScoreSDE models
    """
    print(" Running ScoreSDE model with correct implementation")

    output_dir.mkdir(parents=True, exist_ok=True)

    steps_to_save = [0, 250, 500, 750, 1000]
    print(f" Will save steps: {steps_to_save}")

    # Create and save initial noise
    initial_noise = create_noise(pipe, pipe.device, SEED, image_size)
    save_step(initial_noise, 0, output_dir, image_size)
    print(" Saved initial noise")

    # Generate images for steps 500 and 1000
    for target_steps in [250, 500, 750, 1000]:
        print(f"\n Generating with {target_steps} steps...")

        try:
            # Use the pipeline directly with fixed seed
            generator = torch.Generator(device="cpu").manual_seed(SEED)

            # Run the pipeline
            result = pipe(
                num_inference_steps=target_steps,
                generator=generator,
                output_type="pil"
            )

            # Extract the image
            if hasattr(result, 'images') and result.images:
                image = result.images[0]
                save_image_directly(image, target_steps, output_dir, image_size)
                print(f" Step {target_steps} completed successfully")
            else:
                print(f" No images in result for step {target_steps}")

        except Exception as e:
            print(f" Error generating step {target_steps}: {e}")
            # Save the initial noise as placeholder
            save_step(initial_noise, target_steps, output_dir, image_size)
            print(f"  Saved initial noise as placeholder for step {target_steps}")

    return True

def run_weak_model_normal(pipe, latents, num_steps, save_every, output_dir, image_size):
    """Run weak model with normal interval saving"""
    print(" Running DDPM model with normal intervals")

    output_dir.mkdir(parents=True, exist_ok=True)

    # Save initial state
    save_step(latents, 0, output_dir, image_size)

    scheduler = pipe.scheduler
    unet = pipe.unet

    scheduler.set_timesteps(num_steps)
    current_latents = latents.clone()

    for i, t in enumerate(scheduler.timesteps):
        with torch.no_grad():
            # Predict noise
            noise_pred = unet(current_latents, t, return_dict=False)[0]

            # Update latents
            current_latents = scheduler.step(noise_pred, t, current_latents, return_dict=False)[0]

        # Save intermediate
        current_step = i + 1
        if save_every > 0 and current_step % save_every == 0:
            save_step(current_latents, current_step, output_dir, image_size)
            print(f" Progress: {current_step}/{num_steps} steps")

    # Save final
    save_step(current_latents, num_steps, output_dir, image_size)

    return current_latents

def create_timeline_image(output_dir, model_name):
    """Create a horizontal timeline of all intermediate steps"""
    step_files = sorted(output_dir.glob("step_*.png"))
    if not step_files:
        print(f"No step images found in {output_dir}")
        return

    images = [Image.open(f) for f in step_files]

    width = sum(img.width for img in images)
    height = max(img.height for img in images)

    timeline = Image.new('RGB', (width, height))

    x_offset = 0
    for i, img in enumerate(images):
        timeline.paste(img, (x_offset, 0))

        from PIL import ImageDraw, ImageFont
        draw = ImageDraw.Draw(timeline)
        try:
            font = ImageFont.load_default()
            step_num = int(step_files[i].stem.split('_')[1])
            draw.text((x_offset + 10, 10), f"Step {step_num}", fill='white', font=font)
        except:
            pass

        x_offset += img.width

    timeline_path = output_dir / f"{model_name}_timeline.png"
    timeline.save(timeline_path)
    print(f"Timeline saved: {timeline_path}")

    return timeline

def create_grid_image(output_dir, model_name, cols=5):
    """Create a grid of intermediate steps"""
    step_files = sorted(output_dir.glob("step_*.png"))
    if not step_files:
        print(f"No step images found in {output_dir}")
        return

    images = [Image.open(f) for f in step_files]

    num_images = len(images)
    cols = min(cols, num_images)
    rows = (num_images + cols - 1) // cols

    img_width, img_height = images[0].size

    grid_width = cols * img_width
    grid_height = rows * img_height
    grid = Image.new('RGB', (grid_width, grid_height))

    for i, img in enumerate(images):
        row = i // cols
        col = i % cols
        grid.paste(img, (col * img_width, row * img_height))

        from PIL import ImageDraw, ImageFont
        draw = ImageDraw.Draw(grid)
        try:
            font = ImageFont.load_default()
            step_num = int(step_files[i].stem.split('_')[1])
            draw.text((col * img_width + 10, row * img_height + 10),
                     f"Step {step_num}", fill='white', font=font)
        except:
            pass

    grid_path = output_dir / f"{model_name}_grid.png"
    grid.save(grid_path)
    print(f"Grid saved: {grid_path}")

    return grid

def main():
    device = "cuda" if torch.cuda.is_available() else "cpu"
    OUTPUT_DIR.mkdir(parents=True, exist_ok=True)

    print(f"Using device: {device}")

    for model_name, config in MODEL_CONFIGS.items():
        print(f"\n{'='*60}")
        print(f" Processing {model_name} model: {config['model_id']}")
        print(f"{'='*60}")

        try:
            # Setup
            pipe = setup_pipeline(config['model_id'], device)
            model_dir = OUTPUT_DIR / model_name

            if model_name == "strong":
                # Use direct pipeline approach for ScoreSDE model
                success = run_strong_model_correct(
                    pipe,
                    model_dir,
                    config['image_size']
                )

                if not success:
                    print(" Strong model failed completely")
                    continue

            else:
                # For weak model, use normal approach
                latents = create_noise(pipe, device, SEED, config['image_size'])
                print(f" Initial latents: shape={latents.shape}, "
                      f"min={latents.min().item():.3f}, max={latents.max().item():.3f}")

                final_result = run_weak_model_normal(
                    pipe, latents,
                    config['num_steps'],
                    config['save_every'],
                    model_dir,
                    config['image_size']
                )

            # Create visualizations
            print(" Creating visualizations...")
            create_timeline_image(model_dir, model_name)
            create_grid_image(model_dir, model_name, cols=5)

            print(f" Completed {model_name} model -> {model_dir}")

            # Cleanup
            del pipe
            if device == "cuda":
                torch.cuda.empty_cache()

        except Exception as e:
            print(f" Error with {model_name}: {e}")
            import traceback
            traceback.print_exc()
            continue

main()

Using device: cuda

 Processing strong model: google/ncsnpp-ffhq-256


Loading pipeline components...:   0%|          | 0/2 [00:00<?, ?it/s]

An error occurred while trying to fetch /root/.cache/huggingface/hub/models--google--ncsnpp-ffhq-256/snapshots/3a2a14b6226883d6ce5458738898d989dcc343eb: Error no file named diffusion_pytorch_model.safetensors found in directory /root/.cache/huggingface/hub/models--google--ncsnpp-ffhq-256/snapshots/3a2a14b6226883d6ce5458738898d989dcc343eb.
Defaulting to unsafe serialization. Pass `allow_pickle=False` to raise an error instead.
Some weights of the model checkpoint at /root/.cache/huggingface/hub/models--google--ncsnpp-ffhq-256/snapshots/3a2a14b6226883d6ce5458738898d989dcc343eb were not used when initializing UNet2DModel: 
 ['time_proj.W']


 Running ScoreSDE model with correct implementation
 Will save steps: [0, 250, 500, 750, 1000]
 Saved step 0
 Saved initial noise

 Generating with 250 steps...


  0%|          | 0/250 [00:00<?, ?it/s]

 Saved step 250 (direct image)
 Step 250 completed successfully

 Generating with 500 steps...


  0%|          | 0/500 [00:00<?, ?it/s]

 Saved step 500 (direct image)
 Step 500 completed successfully

 Generating with 750 steps...


  0%|          | 0/750 [00:00<?, ?it/s]

 Saved step 750 (direct image)
 Step 750 completed successfully

 Generating with 1000 steps...


  0%|          | 0/1000 [00:00<?, ?it/s]

 Saved step 1000 (direct image)
 Step 1000 completed successfully
 Creating visualizations...
Timeline saved: correct_saves/strong/strong_timeline.png
Grid saved: correct_saves/strong/strong_grid.png
 Completed strong model -> correct_saves/strong

 Processing weak model: google/ddpm-ema-celebahq-256


Loading pipeline components...:   0%|          | 0/2 [00:00<?, ?it/s]

An error occurred while trying to fetch /root/.cache/huggingface/hub/models--google--ddpm-ema-celebahq-256/snapshots/4cb6117472e6e4f45c5afe606b101858c27c3802: Error no file named diffusion_pytorch_model.safetensors found in directory /root/.cache/huggingface/hub/models--google--ddpm-ema-celebahq-256/snapshots/4cb6117472e6e4f45c5afe606b101858c27c3802.
Defaulting to unsafe serialization. Pass `allow_pickle=False` to raise an error instead.


 Initial latents: shape=torch.Size([1, 3, 256, 256]), min=-4.590, max=4.629
 Running DDPM model with normal intervals
 Saved step 0
 Saved step 50
 Progress: 50/300 steps
 Saved step 100
 Progress: 100/300 steps
 Saved step 150
 Progress: 150/300 steps
 Saved step 200
 Progress: 200/300 steps
 Saved step 250
 Progress: 250/300 steps
 Saved step 300
 Progress: 300/300 steps
 Saved step 300
 Creating visualizations...
Timeline saved: correct_saves/weak/weak_timeline.png
Grid saved: correct_saves/weak/weak_grid.png
 Completed weak model -> correct_saves/weak


In [33]:
!rm -rf "/content/correct_saves"