In [1]:
import transformers
import matplotlib.pyplot as plt
import torch
import clip
from transformers.models.clip.modeling_clip import CLIPModel
from transformers.models.clip.processing_clip import CLIPProcessor
from PIL import Image

device = "cpu"

model = CLIPModel.from_pretrained("openai/clip-vit-base-patch32")
preprocess = CLIPProcessor.from_pretrained("openai/clip-vit-base-patch32")

  from .autonotebook import tqdm as notebook_tqdm


In [38]:
import os
def get_number_prefix(filename):
    # Split the filename by non-digit characters
    parts = ''.join(filter(str.isdigit, filename))
    # Convert the extracted digits to an integer
    return int(parts) if parts else 0

def get_sorted_files(directory):
    # Get a list of all files in the directory
    files = os.listdir(directory)
    # Sort the files based on the number prefix using the custom key function
    sorted_files = sorted(files, key=get_number_prefix)
    return sorted_files

def clip_encode_image(image_path):
    # Load the image
    img = Image.open(image_path)
    image = preprocess(images=[img], return_tensors="pt", padding=True)
    
    with torch.no_grad():
        image_features = model.get_image_features(**image)
    
    return image_features

def calculate_clip_distance(image_path1, image_path2):
    # Load the images
    image_features1 = clip_encode_image(image_path1)
    image_features2 = clip_encode_image(image_path2)
    
    # Normalize the features
    image_features1 = image_features1 / image_features1.norm(dim=-1, keepdim=True)
    image_features2 = image_features2 / image_features2.norm(dim=-1, keepdim=True)
    
    # Calculate the distance (cosine similarity)
    distance = 1 - torch.nn.functional.cosine_similarity(image_features1, image_features2)
    
    return distance.item()

sample_dirs = ['samples/samples_15', 'samples/samples_15_lq', 'samples/samples_mq', 'samples/samples_xl_lq', 'samples/samples_xl_lq_high_cfg']
sample_dir_files = [get_sorted_files(d) for d in sample_dirs]

common_files = list(set(sample_dir_files[0]).intersection(*sample_dir_files))
for i, file in enumerate(common_files):
    baseline_file = f'{sample_dirs[0]}/{file}'
    for j, sample_dir in enumerate(sample_dirs[1:]):
        sample_file = f'{sample_dir}/{file}'
        distance = calculate_clip_distance(baseline_file, sample_file)
        print(f'{i+1} {j+1} {file} {distance}')

1 1 0_1_0_prompt_a_black_colored_car_.png 0.34232956171035767
1 2 0_1_0_prompt_a_black_colored_car_.png 0.29553014039993286
1 3 0_1_0_prompt_a_black_colored_car_.png 0.402837336063385
1 4 0_1_0_prompt_a_black_colored_car_.png 0.29748958349227905
2 1 16_4_0_prompt_a_white_car_and_a_re_.png 0.42547935247421265
2 2 16_4_0_prompt_a_white_car_and_a_re_.png 0.40552765130996704
2 3 16_4_0_prompt_a_white_car_and_a_re_.png 0.4499005079269409
2 4 16_4_0_prompt_a_white_car_and_a_re_.png 0.33845990896224976
3 1 0_0_1_upsampled_the_sleek_red_body_o_.png 0.1482393741607666
3 2 0_0_1_upsampled_the_sleek_red_body_o_.png 0.16503793001174927
3 3 0_0_1_upsampled_the_sleek_red_body_o_.png 0.2807549238204956
3 4 0_0_1_upsampled_the_sleek_red_body_o_.png 0.1535899043083191
4 1 16_1_0_prompt_a_green_apple_and_a__.png 0.3228135108947754
4 2 16_1_0_prompt_a_green_apple_and_a__.png 0.23909389972686768
4 3 16_1_0_prompt_a_green_apple_and_a__.png 0.4189155697822571
4 4 16_1_0_prompt_a_green_apple_and_a__.png 0.38

KeyboardInterrupt: 