In [1]:
from PIL import Image
import requests
import torch
import numpy as np
from transformers import CLIPProcessor, CLIPModel

# model = CLIPModel.from_pretrained("openai/clip-vit-large-patch14")
# processor = CLIPProcessor.from_pretrained("openai/clip-vit-large-patch14")

# Load model directly


model = CLIPModel.from_pretrained("./clip-vit-large-patch14")
processor = CLIPProcessor.from_pretrained("./clip-vit-large-patch14")
# model = CLIPModel.from_pretrained("laion/CLIP-ViT-H-14-laion2B-s34B-b79K")
# processor = CLIPProcessor.from_pretrained("laion/CLIP-ViT-H-14-laion2B-s34B-b79K")

@torch.no_grad()
def image_text_cosine_dist(pil_image, text, scaled=False):
    
    inputs = processor(text=text, 
                       images=pil_image,
                       return_tensors="pt", 
                       padding=True
                      )
    new_inputs = {
        'input_ids' : inputs.input_ids[:, :77],
        'attention_mask' : inputs.attention_mask[:, :77],
        'pixel_values' : inputs.pixel_values,
    }
    outputs = model(**new_inputs)
    text_embeds = outputs.text_embeds
    image_embeds = outputs.image_embeds
    # print(text_embeds.shape)
    cosine_score = image_embeds @ text_embeds.T

    if scaled:
        # the same with outputs.logits_per_image
        logit_scale = model.logit_scale.exp()
        cosine_score = logit_scale * cosine_score
    return cosine_score.item()

In [50]:
orig_prompts =['Beautiful DSLR Photograph of a penguin on the beach, golden hour',
             'Realistic photography of a dog on the snow',
             'realistic photo of a bear on the rock',
             'photo of a puppy on the grass',
             'photo of a yellow car in the city',
             'Realistic photography of a cat in the forest',
             'realistic photo of a wolf on the rock',
             'realistic photo of a wolf on the beach',
             'realistic photo of a wolf in the snow forest',
             'Photo of a sailing boat on the lake',
             'Photo of a cat on the windowsill',
             'Photo of a white swan swimming on the river',
             'Realistic photo of a cat on the sofa',
             'Photo of a tiger in the jungle',
             'Realistic photo of a rabbit in the garden',
             'Photo of a plane flying over the mountains',
             'Photo of a lion resting on a rock',
             'Photo of a black car on a city street',
             'Photo of a deer standing in a forest clearing',
             'Photo of a penguin on ice',
             'Photo of a raccoon in a tree',
             'Realistic photo of goat on a cliff']

edit_prompts = ['Beautiful DSLR Photograph of a dog on the beach, golden hour',
             'Realistic photography of a cat on the snow',
             'realistic photo of a wolf on the rock',
             'photo of a kitty on the grass',
             'photo of a red car in the city',
             'Realistic photography of a dog in the forest',
             'realistic photo of a bear on the rock',
             'realistic photo of a bear on the beach',
             'realistic photo of a dog in the snow forest',
             'Photo of a motorboat on the lake',
             'Photo of a dog on the windowsill',
             'Photo of a black swan swimming on the river',
             'Realistic photo of a dog on the sofa',
             'Photo of a leopard in the jungle',
             'Realistic photo of a squirrel in the garden',
             'Photo of a helicopter flying over the mountains',
             'Photo of a cheetah resting on a rock',
             'Photo of a white car on a city street',
             'Photo of a moose standing in a forest clearing',
             'Photo of a seal on ice',
             'Photo of an owl in a tree',
             'Realistic photo of ibex on a cliff']




In [None]:
import os

# Путь к директории с изображениями
image_dir = "/media/alexander/DATA/dima_vinichenko/data/sd_15/mask/"


# Считаем все файлы с расширением .png в директории
files = [f for f in os.listdir(image_dir) if f.endswith('.npy')]

# Отсортируем список файлов, если нужно
files.sort(key=lambda x: int(os.path.splitext(x)[0]))

print("Файлы в директории:", files)

# Формируем новый список промптов, соответствующих файлам
# Например, если есть '1.png' и '3.png', берем orig_prompts[0] и orig_prompts[2]
selected_prompts = []

for filename in files:
    # Извлечь номер из имени файла (например '3' из '3.png')
    idx = int(os.path.splitext(filename)[0]) - 1  # индекс для orig_prompts, т.к. индексация с 0
    if 0 <= idx < len(orig_prompts):
        selected_prompts.append(edit_prompts[idx])

print("Выбранные промпты:", selected_prompts)


In [None]:
images_names = [f.replace('.npy', '.png') for f in files]
images_names

In [80]:
path = '/media/alexander/DATA/dima_vinichenko/data/sd_15/DPM_AYS/'

In [81]:
image_list = []

for filename in images_names: 

    file_path = path  + filename
    im=Image.open(file_path)
    image_list.append(im)

In [82]:
CLIP_T_DDIM = []
for image, prompt in zip(image_list, selected_prompts):
    CLIP_T_DDIM.append(image_text_cosine_dist(image, prompt))

In [None]:
CLIP_T_DDIM

In [None]:
np.mean(np.array(CLIP_T_DDIM))

# MSE

In [85]:
import numpy as np

___________________________


In [219]:
path = '/media/alexander/DATA/dima_vinichenko'


In [None]:
import os

# Путь к директории с изображениями
image_dir = "/media/alexander/DATA/dima_vinichenko/data/SDXL/mask/"


# Считаем все файлы с расширением .png в директории
files = [f for f in os.listdir(image_dir) if f.endswith('.npy')]

# Отсортируем список файлов, если нужно
files.sort(key=lambda x: int(os.path.splitext(x)[0]))

print("Файлы в директории:", files)

# Формируем новый список промптов, соответствующих файлам
# Например, если есть '1.png' и '3.png', берем orig_prompts[0] и orig_prompts[2]
selected_prompts = []

for filename in files:
    # Извлечь номер из имени файла (например '3' из '3.png')
    idx = int(os.path.splitext(filename)[0]) - 1  # индекс для orig_prompts, т.к. индексация с 0
    if 0 <= idx < len(orig_prompts):
        selected_prompts.append(edit_prompts[idx])

print("Выбранные промпты:", selected_prompts)


In [None]:
images_names = [f.replace('.npy', '.png') for f in files]
images_names

In [238]:
path = '/media/alexander/DATA/dima_vinichenko/data/SDXL/DPM_AYS/'

In [239]:
image_list = []

for filename in images_names: 

    file_path = path  + filename
    im=Image.open(file_path)
    image_list.append(im)

In [240]:
path = '/media/alexander/DATA/dima_vinichenko/data/SDXL/DDPM_original/'

In [241]:
orig_image_list = []

for filename in images_names: 

    file_path = path  + filename
    im=Image.open(file_path)
    orig_image_list.append(im)

In [242]:
# image_list = []

# for filename in images_names: 

#     file_path = path + '/data/1.5/ddim/' + filename
#     im=Image.open(file_path)
#     image_list.append(im)

In [None]:
image_list[0]

In [None]:
orig_image_list[0]

In [None]:
masks_names = [f.replace('.png', '.npy') for f in files]
masks_names


In [246]:
path = '/media/alexander/DATA/dima_vinichenko/data/SDXL/mask/'

In [247]:
mask_list = []

for filename in masks_names: 

    file_path = path + filename
    mask=np.load(file_path)
    mask_list.append(mask)

In [248]:
img_array = []
orig_img_array = []

for orig, image, mask in zip(orig_image_list, image_list, mask_list):

    image = np.array(image)
    
    for i in range(3):
        image[:,:,i] = np.multiply(image[:,:,i], mask)
    
    img = Image.fromarray(image, 'RGB') 
    img_array.append(img)


    orig = np.array(orig)
    
    for i in range(3):
        orig[:,:,i] = np.multiply(orig[:,:,i], mask)
    
    img = Image.fromarray(orig, 'RGB') 
    orig_img_array.append(img)

In [None]:
img_array[0]

In [None]:
orig_img_array[0]

In [251]:
def mse(imageA, imageB):
	# the 'Mean Squared Error' between the two images is the
	# sum of the squared difference between the two images;
	# NOTE: the two images must have the same dimension
	err = np.sum((imageA.astype("float") - imageB.astype("float")) ** 2)
	err /= float(imageA.shape[0] * imageA.shape[1])
	
	# return the MSE, the lower the error, the more "similar"
	# the two images are
	return err

In [252]:
mse_dpm = []


for i in range(20):
    mse_dpm.append(mse(np.array(orig_img_array[i])/255, np.array(img_array[i])/255))

In [None]:
np.mean(np.array(mse_dpm))
