In [1]:
import torch
from diffusers import StableDiffusionPipeline, StableDiffusionImg2ImgPipeline, StableDiffusionInpaintPipelineLegacy, DDIMScheduler, AutoencoderKL
from PIL import Image

from ip_adapter import IPAdapter

In [2]:
base_model_path = "dreamlike-art/dreamlike-anime-1.0"
vae_model_path = "stabilityai/sd-vae-ft-mse"
image_encoder_path = "models/image_encoder/"
ip_ckpt = "models/ip-adapter_sd15.bin"
device = "cuda"

In [3]:
def image_grid(imgs, rows, cols):
    assert len(imgs) == rows*cols

    w, h = imgs[0].size
    grid = Image.new('RGB', size=(cols*w, rows*h))
    grid_w, grid_h = grid.size
    
    for i, img in enumerate(imgs):
        grid.paste(img, box=(i%cols*w, i//cols*h))
    return grid

noise_scheduler = DDIMScheduler(
    num_train_timesteps=1000,
    beta_start=0.00085,
    beta_end=0.012,
    beta_schedule="scaled_linear",
    clip_sample=False,
    set_alpha_to_one=False,
    steps_offset=1,
)
vae = AutoencoderKL.from_pretrained(vae_model_path).to(dtype=torch.float16)

In [None]:
pipe = StableDiffusionPipeline.from_pretrained(
    base_model_path,
    torch_dtype=torch.float16,
    scheduler=noise_scheduler,
    vae=vae,
    feature_extractor=None,
    safety_checker=None,
).to("cuda")

ip_model = IPAdapter(pipe, image_encoder_path, ip_ckpt, device)


In [None]:
image_path = "anime/rose.webp"
image = Image.open(image_path)
image.resize((256, 256))

In [None]:
prompt_list = [
    "wearing a hat on the beach",
    "walking in the forest, holding a sword",
    "sleeping under a tree",
    "sitting on top of a dragon",
    "coding on a computer in a dark room",
]

for main_prompt in prompt_list:

    prompt = "anime, high quality, high resolution" + main_prompt 

    images = ip_model.generate(pil_image=image, num_samples=4, num_inference_steps=50,
            prompt=prompt, scale=1.0)
    grid = image_grid(images, 1, 4)
    grid.save(str(image_path).replace("anime", "results") + f"_{main_prompt[-10:]}.png")