In [None]:
import torch
from diffusers import StableDiffusionPipeline, EulerDiscreteScheduler

model_id = "stabilityai/stable-diffusion-2"

scheduler = EulerDiscreteScheduler.from_pretrained(model_id, subfolder="scheduler")
pipe = StableDiffusionPipeline.from_pretrained(model_id, scheduler=scheduler, torch_dtype=torch.float16)
pipe = pipe.to("cuda")

In [None]:
prompt = "oil painting of the Leaning Tower of Pisa at night with the moon"
image = pipe(prompt).images[0]
image.save("night.png")

In [None]:
prompt = "oil painting of the Leaning Tower of Pisa during the sunset"
image = pipe(prompt).images[0]
image.save("sunset.png")

In [None]:
from PIL import Image
import torchvision.transforms.functional as TF
import matplotlib.pyplot as plt
from tqdm import trange

with torch.no_grad():
    start_image = Image.open("night.png")
    start_image = TF.resize(start_image, (256, 256))
    start_image = TF.to_tensor(start_image)
    start_image = start_image.unsqueeze(0).half().cuda()

    end_image = Image.open("sunset.png")
    end_image = TF.resize(end_image, (256, 256))
    end_image = TF.to_tensor(end_image)
    end_image = end_image.unsqueeze(0).half().cuda()


    start_latent_space = pipe.vae.encode(start_image)
    start_sample = start_latent_space.latent_dist.mean

    end_latent_space = pipe.vae.encode(end_image)
    end_sample = end_latent_space.latent_dist.mean

    direction = end_sample - start_sample

    frames = []
    steps = 150
    for i in trange(0, steps+1):
        sample = start_sample + direction * (i / steps)
        out = pipe.vae.decode(sample)
        out = out.sample[0].float().cpu().detach().numpy().transpose(1, 2, 0)
        out = (out * 255).astype("uint8")
        frames.append(out)


In [None]:
import imageio
imageio.mimsave('morph.gif', frames, fps=30)