# Stable diffusion image to image

## Overview

Hugging FaceのStable Diffusionを利用した画像から画像を生成するノートブック。公開されている重みをそのまま利用して実行しています。実行するためには、Hugging Faceのアカウントを取得し、APIキーが必要となります。

## Install packages

In [None]:
def _install_packages() -> None:
    # Install packages
    !pip install --quiet --no-cache \
        diffusers==0.3.0 \
        ftfy \
        scipy \
        transformers
    !pip install -quiet --no-cache huggingface_hub


_install_packages()

## Import packages

In [None]:
from __future__ import annotations

from getpass import getpass
from pathlib import Path

import numpy as np
import torch
from diffusers import StableDiffusionImg2ImgPipeline
from huggingface_hub import notebook_login
from IPython.display import display
from IPython.display import Image as displayImage
from PIL import Image

## Device

In [None]:
# Select cuda(use gpu) or cpu
DEVICE = "cuda"

## Login hugging face

In [None]:
notebook_login()

## Pipeline

In [None]:
def _create_pipeline(device: str) -> StableDiffusionImg2ImgPipeline:
    params = dict()
    if device == "cuda":
        params["revision"] = "fp16"
        params["torch_dtype"] = torch.float16

    pipe = StableDiffusionImg2ImgPipeline.from_pretrained(
        "CompVis/stable-diffusion-v1-4",
        use_auth_token=True,
        **params,
    )
    pipe.to(device)

    return pipe


PIPE = _create_pipeline(DEVICE)

## Create and show image

In [None]:
def preprocess_image(image: Image.Image, size: tuple[int, int]) -> torch.Tensor:
    if image.mode in ("RGBA", "LA") or (image.mode == "P" and "transparency" in image.info):
        alpha = image.convert("RGBA").split()[-1]
        bg = Image.new("RGBA", image.size, (255, 255, 255, 255))
        bg.paste(image, mask=alpha)
        image = image.convert("RGB")

    # resize to integer multiple of 32
    w, h = map(lambda x: x - x % 32, size)  
    image_pil = image.resize((w, h), resample=Image.LANCZOS)

    # convert image values to the range of -1 ~ 1
    image_np = np.array(image_pil).astype(np.float32) / 255.0
    image_np = 2.0 * image_np - 1.0
    image_np = image_np[np.newaxis].transpose(0, 3, 1, 2)

    image_torch = torch.from_numpy(image_np)

    return image_torch

In [None]:
def _infer(
    pipe: StableDiffusionImg2ImgPipeline,
    prompt: str,  #  The prompt to guide the image generation.
    init_image: torch.Tensor,  # tensor representing an image batch, that will be used as the starting point for the process.
    strength: float = 0.8,  # Conceptually, indicates how much to transform the reference `init_image`. Must be between 0 and 1.
    guidance_scale: float = 7.5,
    num_inference_steps: int = 50,  # The number of denoising steps.
    seed: int = 42,  # random seed.
    device: str = "cuda",
) -> Image.Image:
    generator = torch.Generator(device=device).manual_seed(seed)
    print(init_image.shape)
    with torch.autocast(device):
        images = pipe(
            [prompt],
            init_image=init_image,
            strength=strength,
            guidance_scale=guidance_scale,
            num_inference_steps=num_inference_steps,
            generator=generator,
        ).images

    return images[0]

In [None]:
def _save_and_show_image(image: Image.Image, filepath: Path) -> None:
    print(type(image))
    image.save(filepath)
    display(displayImage(filepath))

In [None]:
PROMPT = """character concept, portrait, Unreal Engine"""
IMAGE = _infer(
    pipe=PIPE,
    prompt=PROMPT,
    init_image=preprocess_image(Image.open("FbQVaVtUIAA41yk.jpg"), size=(512, 512)),
    strength=0.8,
    guidance_scale=7.5,
    num_inference_steps=50,
    seed=42,
    device=DEVICE,
)
_save_and_show_image(IMAGE, filepath=Path("test.png"))