In [None]:
!pip install diffusers transformers accelerate scipy safetensors
!pip install git+https://github.com/openai/CLIP.git

In [1]:
import torch
from diffusers import StableDiffusionPipeline, DPMSolverMultistepScheduler, DDPMScheduler, DDIMScheduler, AutoPipelineForText2Image, DiffusionPipeline
import matplotlib.pyplot as plt
from PIL import Image
import pickle
import clip
import numpy as np

2024-04-10 13:08:45.528800: I tensorflow/tsl/cuda/cudart_stub.cc:28] Could not find cuda drivers on your machine, GPU will not be used.
2024-04-10 13:08:45.555141: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: AVX2 FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.


In [2]:
file_path = 'sample_data.pkl'
f = open(file_path, 'rb')
data = pickle.load(f)
f.close()

In [3]:
clip_model, transform = clip.load("RN50")
clip_model = clip_model.eval().to("cuda")

# convert numpy array to PIL image
def to_pil(array):
    return Image.fromarray((array * 255).astype(np.uint8))

@torch.no_grad()
def compute_similarity(clip_model, transform, image, caption):
    
    vis_emb = clip_model.encode_image(transform(to_pil(image)).unsqueeze(0).to("cuda"))
    text_emb = clip_model.encode_text(clip.tokenize(caption).to("cuda"))
    vis_emb = vis_emb / vis_emb.norm(dim=1, keepdim=True)
    text_emb = text_emb / text_emb.norm(dim=1, keepdim=True)
    return round(torch.matmul(text_emb, vis_emb.T).item(), 3)

In [4]:
def return_sd_similarities(data, clip_model, transform, pipe):
    gt_similarity = 0
    pipe_sd_similarity = 0


    out = {} # keys: prompt, values: [(sim(gt), sim(pipe_21), image_gt, image_sd]
    for image_gt, prompt in data:
        generator = torch.Generator("cuda").manual_seed(1024)
        image_sd = pipe(prompt, num_inference_steps=50, generator=generator, guidance_scale=7.5).images[0]
        
        sim_gt = compute_similarity(clip_model, transform, image_gt.numpy(), prompt)
        sim_pipe = compute_similarity(clip_model, transform, np.array(image_sd), prompt)

        gt_similarity += sim_gt
        pipe_sd_similarity += sim_pipe

        out[prompt] = [(sim_gt, sim_pipe), image_gt, image_sd]

    gt_similarity /= len(out)
    pipe_sd_similarity /= len(out)
    return gt_similarity, pipe_sd_similarity, out

In [5]:
pipe = StableDiffusionPipeline.from_pretrained("stabilityai/stable-diffusion-2-1", variant="fp16", torch_dtype=torch.float16)
pipe.scheduler = DDPMScheduler.from_config(pipe.scheduler.config)
#pipe.scheduler = DDIMScheduler.from_config(pipe.scheduler.config)
pipe = pipe.to("cuda")

gt_similarity, pipe_sd_similarity, out = return_sd_similarities(data, clip_model, transform, pipe)

print("SD 2.1")
print(f"GT similarity: {gt_similarity}")
print(f"Pipe similarity: {pipe_sd_similarity}")

Loading pipeline components...:   0%|          | 0/6 [00:00<?, ?it/s]

  0%|          | 0/50 [00:00<?, ?it/s]

  0%|          | 0/50 [00:00<?, ?it/s]

  0%|          | 0/50 [00:00<?, ?it/s]

  0%|          | 0/50 [00:00<?, ?it/s]

  0%|          | 0/50 [00:00<?, ?it/s]

  0%|          | 0/50 [00:00<?, ?it/s]

  0%|          | 0/50 [00:00<?, ?it/s]

  0%|          | 0/50 [00:00<?, ?it/s]

  0%|          | 0/50 [00:00<?, ?it/s]

  0%|          | 0/50 [00:00<?, ?it/s]

  0%|          | 0/50 [00:00<?, ?it/s]

  0%|          | 0/50 [00:00<?, ?it/s]

  0%|          | 0/50 [00:00<?, ?it/s]

  0%|          | 0/50 [00:00<?, ?it/s]

  0%|          | 0/50 [00:00<?, ?it/s]

  0%|          | 0/50 [00:00<?, ?it/s]

  0%|          | 0/50 [00:00<?, ?it/s]

  0%|          | 0/50 [00:00<?, ?it/s]

  0%|          | 0/50 [00:00<?, ?it/s]

  0%|          | 0/50 [00:00<?, ?it/s]

  0%|          | 0/50 [00:00<?, ?it/s]

  0%|          | 0/50 [00:00<?, ?it/s]

  0%|          | 0/50 [00:00<?, ?it/s]

  0%|          | 0/50 [00:00<?, ?it/s]

  0%|          | 0/50 [00:00<?, ?it/s]

  0%|          | 0/50 [00:00<?, ?it/s]

  0%|          | 0/50 [00:00<?, ?it/s]

  0%|          | 0/50 [00:00<?, ?it/s]

  0%|          | 0/50 [00:00<?, ?it/s]

  0%|          | 0/50 [00:00<?, ?it/s]

SD 2.1
GT similarity: 0.2572666666666667
Pipe similarity: 0.2329


In [6]:
pipe = AutoPipelineForText2Image.from_pretrained("stabilityai/sd-turbo", variant="fp16", torch_dtype=torch.float16)
pipe.scheduler = DDPMScheduler.from_config(pipe.scheduler.config)
#pipe.scheduler = DDIMScheduler.from_config(pipe.scheduler.config)
pipe = pipe.to("cuda")

gt_similarity, pipe_sd_similarity, out = return_sd_similarities(data, clip_model, transform, pipe)

print("SD 2.1 - Turbo")
print(f"GT similarity: {gt_similarity}")
print(f"Pipe similarity: {pipe_sd_similarity}")

Loading pipeline components...:   0%|          | 0/5 [00:00<?, ?it/s]

You have disabled the safety checker for <class 'diffusers.pipelines.stable_diffusion.pipeline_stable_diffusion.StableDiffusionPipeline'> by passing `safety_checker=None`. Ensure that you abide to the conditions of the Stable Diffusion license and do not expose unfiltered results in services or applications open to the public. Both the diffusers team and Hugging Face strongly recommend to keep the safety filter enabled in all public facing circumstances, disabling it only for use-cases that involve analyzing network behavior or auditing its results. For more information, please have a look at https://github.com/huggingface/diffusers/pull/254 .


  0%|          | 0/50 [00:00<?, ?it/s]

  0%|          | 0/50 [00:00<?, ?it/s]

  0%|          | 0/50 [00:00<?, ?it/s]

  0%|          | 0/50 [00:00<?, ?it/s]

  0%|          | 0/50 [00:00<?, ?it/s]

  0%|          | 0/50 [00:00<?, ?it/s]

  0%|          | 0/50 [00:00<?, ?it/s]

  0%|          | 0/50 [00:00<?, ?it/s]

  0%|          | 0/50 [00:00<?, ?it/s]

  0%|          | 0/50 [00:00<?, ?it/s]

  0%|          | 0/50 [00:00<?, ?it/s]

  0%|          | 0/50 [00:00<?, ?it/s]

  0%|          | 0/50 [00:00<?, ?it/s]

  0%|          | 0/50 [00:00<?, ?it/s]

  0%|          | 0/50 [00:00<?, ?it/s]

  0%|          | 0/50 [00:00<?, ?it/s]

  0%|          | 0/50 [00:00<?, ?it/s]

  0%|          | 0/50 [00:00<?, ?it/s]

  0%|          | 0/50 [00:00<?, ?it/s]

  0%|          | 0/50 [00:00<?, ?it/s]

  0%|          | 0/50 [00:00<?, ?it/s]

  0%|          | 0/50 [00:00<?, ?it/s]

  0%|          | 0/50 [00:00<?, ?it/s]

  0%|          | 0/50 [00:00<?, ?it/s]

  0%|          | 0/50 [00:00<?, ?it/s]

  0%|          | 0/50 [00:00<?, ?it/s]

  0%|          | 0/50 [00:00<?, ?it/s]

  0%|          | 0/50 [00:00<?, ?it/s]

  0%|          | 0/50 [00:00<?, ?it/s]

  0%|          | 0/50 [00:00<?, ?it/s]

SD 2.1 - Turbo
GT similarity: 0.2572666666666667
Pipe similarity: 0.20240000000000002


In [5]:
pipe = DiffusionPipeline.from_pretrained("stabilityai/stable-diffusion-xl-base-1.0", variant="fp16", torch_dtype=torch.float16, use_safetensors=True)
pipe.scheduler = DDPMScheduler.from_config(pipe.scheduler.config)
#pipe.scheduler = DDIMScheduler.from_config(pipe.scheduler.config)
pipe = pipe.to("cuda")

gt_similarity, pipe_sd_similarity, out = return_sd_similarities(data, clip_model, transform, pipe)

print("SD XL")
print(f"GT similarity: {gt_similarity}")
print(f"Pipe similarity: {pipe_sd_similarity}")

Loading pipeline components...:   0%|          | 0/7 [00:00<?, ?it/s]

  0%|          | 0/50 [00:00<?, ?it/s]

  0%|          | 0/50 [00:00<?, ?it/s]

  0%|          | 0/50 [00:00<?, ?it/s]

  0%|          | 0/50 [00:00<?, ?it/s]

  0%|          | 0/50 [00:00<?, ?it/s]

  0%|          | 0/50 [00:00<?, ?it/s]

  0%|          | 0/50 [00:00<?, ?it/s]

  0%|          | 0/50 [00:00<?, ?it/s]

  0%|          | 0/50 [00:00<?, ?it/s]

  0%|          | 0/50 [00:00<?, ?it/s]

  0%|          | 0/50 [00:00<?, ?it/s]

  0%|          | 0/50 [00:00<?, ?it/s]

  0%|          | 0/50 [00:00<?, ?it/s]

  0%|          | 0/50 [00:00<?, ?it/s]

  0%|          | 0/50 [00:00<?, ?it/s]

  0%|          | 0/50 [00:00<?, ?it/s]

  0%|          | 0/50 [00:00<?, ?it/s]

  0%|          | 0/50 [00:00<?, ?it/s]

  0%|          | 0/50 [00:00<?, ?it/s]

  0%|          | 0/50 [00:00<?, ?it/s]

  0%|          | 0/50 [00:00<?, ?it/s]

  0%|          | 0/50 [00:00<?, ?it/s]

  0%|          | 0/50 [00:00<?, ?it/s]

  0%|          | 0/50 [00:00<?, ?it/s]

  0%|          | 0/50 [00:00<?, ?it/s]

  0%|          | 0/50 [00:00<?, ?it/s]

  0%|          | 0/50 [00:00<?, ?it/s]

  0%|          | 0/50 [00:00<?, ?it/s]

  0%|          | 0/50 [00:00<?, ?it/s]

  0%|          | 0/50 [00:00<?, ?it/s]

SD XL
GT similarity: 0.2572666666666667
Pipe similarity: 0.22366666666666665


In [5]:
pipe = AutoPipelineForText2Image.from_pretrained("stabilityai/sdxl-turbo", variant="fp16", torch_dtype=torch.float16)
pipe.scheduler = DDPMScheduler.from_config(pipe.scheduler.config)
#pipe.scheduler = DDIMScheduler.from_config(pipe.scheduler.config)
pipe = pipe.to("cuda")

gt_similarity, pipe_sd_similarity, out = return_sd_similarities(data, clip_model, transform, pipe)

print("SD XL - Turbo")
print(f"GT similarity: {gt_similarity}")
print(f"Pipe similarity: {pipe_sd_similarity}")

Loading pipeline components...:   0%|          | 0/7 [00:00<?, ?it/s]

  0%|          | 0/50 [00:00<?, ?it/s]

  0%|          | 0/50 [00:00<?, ?it/s]

  0%|          | 0/50 [00:00<?, ?it/s]

  0%|          | 0/50 [00:00<?, ?it/s]

  0%|          | 0/50 [00:00<?, ?it/s]

  0%|          | 0/50 [00:00<?, ?it/s]

  0%|          | 0/50 [00:00<?, ?it/s]

  0%|          | 0/50 [00:00<?, ?it/s]

  0%|          | 0/50 [00:00<?, ?it/s]

  0%|          | 0/50 [00:00<?, ?it/s]

  0%|          | 0/50 [00:00<?, ?it/s]

  0%|          | 0/50 [00:00<?, ?it/s]

  0%|          | 0/50 [00:00<?, ?it/s]

  0%|          | 0/50 [00:00<?, ?it/s]

  0%|          | 0/50 [00:00<?, ?it/s]

  0%|          | 0/50 [00:00<?, ?it/s]

  0%|          | 0/50 [00:00<?, ?it/s]

  0%|          | 0/50 [00:00<?, ?it/s]

  0%|          | 0/50 [00:00<?, ?it/s]

  0%|          | 0/50 [00:00<?, ?it/s]

  0%|          | 0/50 [00:00<?, ?it/s]

  0%|          | 0/50 [00:00<?, ?it/s]

  0%|          | 0/50 [00:00<?, ?it/s]

  0%|          | 0/50 [00:00<?, ?it/s]

  0%|          | 0/50 [00:00<?, ?it/s]

  0%|          | 0/50 [00:00<?, ?it/s]

  0%|          | 0/50 [00:00<?, ?it/s]

  0%|          | 0/50 [00:00<?, ?it/s]

  0%|          | 0/50 [00:00<?, ?it/s]

  0%|          | 0/50 [00:00<?, ?it/s]

SD XL - Turbo
GT similarity: 0.2572666666666667
Pipe similarity: 0.20913333333333334
