<a href="https://colab.research.google.com/github/alessioborgi/StyleAligned_DiffModels/blob/main/StyleAligned_Metrics.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# SINGLE-STYLE METRICS

In [1]:
# Download the file
!gdown --folder https://drive.google.com/drive/folders/1Rdb3XkwW1H_IMFVh3tShj4adh-XUNt03?usp=sharing

zsh:1: no matches found: https://drive.google.com/drive/folders/1Rdb3XkwW1H_IMFVh3tShj4adh-XUNt03?usp=sharing


In [2]:
# Unzip the file if it's a zip
!unzip -q AlignZipFolder/OnlyPromptFile.zip

unzip:  cannot find or open AlignZipFolder/OnlyPromptFile.zip, AlignZipFolder/OnlyPromptFile.zip.zip or AlignZipFolder/OnlyPromptFile.zip.ZIP.


In [None]:
# Install required libraries
!pip install transformers ftfy regex tqdm
!pip install git+https://github.com/openai/CLIP.git

Collecting ftfy
  Downloading ftfy-6.2.3-py3-none-any.whl.metadata (7.8 kB)
Collecting wcwidth<0.3.0,>=0.2.12 (from ftfy)
  Using cached wcwidth-0.2.13-py2.py3-none-any.whl.metadata (14 kB)
Downloading ftfy-6.2.3-py3-none-any.whl (43 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m43.0/43.0 kB[0m [31m983.3 kB/s[0m eta [36m0:00:00[0m[36m0:00:01[0m
[?25hUsing cached wcwidth-0.2.13-py2.py3-none-any.whl (34 kB)
Installing collected packages: wcwidth, ftfy
  Attempting uninstall: wcwidth
    Found existing installation: wcwidth 0.2.5
    Uninstalling wcwidth-0.2.5:
      Successfully uninstalled wcwidth-0.2.5
Successfully installed ftfy-6.2.3 wcwidth-0.2.13

[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m A new release of pip is available: [0m[31;49m24.1.2[0m[39;49m -> [0m[32;49m24.2[0m
[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m To update, run: [0m[32;49mpip install --upgrade pip[0m
Collecting git+https://github.com/openai/CLIP.git
  Cloning ht

In [3]:
import os
import torch
import clip
from PIL import Image
from sklearn.metrics.pairwise import cosine_similarity
import numpy as np
from transformers import AutoImageProcessor, AutoModel
from torchvision import transforms

# Load CLIP model and preprocess
device = "cuda" if torch.cuda.is_available() else "cpu"
clip_model, clip_preprocess = clip.load("ViT-B/32", device=device)

# Load DINO model and processor from Hugging Face
dino_processor = AutoImageProcessor.from_pretrained("facebook/dino-vitb8")
dino_model = AutoModel.from_pretrained("facebook/dino-vitb8").to(device)

def get_clip_similarity(image_path, prompt):
    image = clip_preprocess(Image.open(image_path)).unsqueeze(0).to(device)
    text = clip.tokenize([prompt]).to(device)

    with torch.no_grad():
        image_features = clip_model.encode_image(image)
        text_features = clip_model.encode_text(text)

    image_features /= image_features.norm(dim=-1, keepdim=True)
    text_features /= text_features.norm(dim=-1, keepdim=True)

    similarity = (image_features @ text_features.T).item()
    return similarity

def get_dino_embeddings(image_paths):
    images = [Image.open(image_path) for image_path in image_paths]
    inputs = dino_processor(images=images, return_tensors="pt").to(device)

    with torch.no_grad():
        outputs = dino_model(**inputs)
        embeddings = outputs.last_hidden_state[:, 0, :].cpu().numpy()  # Taking the CLS token

    return embeddings

def compute_pairwise_similarity(embeddings):
    similarities = cosine_similarity(embeddings)
    upper_tri_indices = np.triu_indices_from(similarities, k=1)
    return similarities[upper_tri_indices].mean()

def process_folder(folder_path):
    subfolders = [f.path for f in os.scandir(folder_path) if f.is_dir()]
    clip_results = {'aligned': [], 'non_aligned': []}
    dino_results = {'aligned': [], 'non_aligned': []}

    for subfolder in subfolders:
        aligned_images = []
        non_aligned_images = []
        subfolder_name = os.path.basename(subfolder)

        for file in os.listdir(subfolder):
            if file.endswith(".jpg") or file.endswith(".png"):
                file_path = os.path.join(subfolder, file)
                if file.startswith("znon"):
                    non_aligned_images.append(file_path)
                else:
                    aligned_images.append(file_path)

        for image_path in aligned_images:
            image_name = os.path.basename(image_path)
            prompt = f"{image_name[:-4]}, {subfolder_name}"
            clip_similarity = get_clip_similarity(image_path, prompt)
            clip_results['aligned'].append(clip_similarity)

        for image_path in non_aligned_images:
            image_name = os.path.basename(image_path)[5:]
            prompt = f"{image_name[:-4]}, {subfolder_name}"
            clip_similarity = get_clip_similarity(image_path, prompt)
            clip_results['non_aligned'].append(clip_similarity)

        if aligned_images:
            aligned_embeddings = get_dino_embeddings(aligned_images)
            dino_similarity = compute_pairwise_similarity(aligned_embeddings)
            dino_results['aligned'].append(dino_similarity)

        if non_aligned_images:
            non_aligned_embeddings = get_dino_embeddings(non_aligned_images)
            dino_similarity = compute_pairwise_similarity(non_aligned_embeddings)
            dino_results['non_aligned'].append(dino_similarity)

    avg_clip_aligned = np.mean(clip_results['aligned'])
    avg_clip_non_aligned = np.mean(clip_results['non_aligned'])
    avg_dino_aligned = np.mean(dino_results['aligned'])
    avg_dino_non_aligned = np.mean(dino_results['non_aligned'])

    return {
        "clip_aligned": avg_clip_aligned,
        "clip_non_aligned": avg_clip_non_aligned,
        "dino_aligned": avg_dino_aligned,
        "dino_non_aligned": avg_dino_non_aligned
    }

# Example usage
folder_path = "OnlyPrompt"
results = process_folder(folder_path)
print(results)

RuntimeError: KeyboardInterrupt: 

In [None]:
results

{'clip_aligned': 0.34217529296875,
 'clip_non_aligned': 0.3454345703125,
 'dino_aligned': 0.5018175,
 'dino_non_aligned': 0.34539917}

# MULTI-STYLE METRICS

### MULTI-STYLE DINO VIT-B/8

In [7]:
# Install required libraries
!pip install transformers ftfy regex tqdm

[33mDEPRECATION: pyodbc 4.0.0-unsupported has a non-standard version number. pip 24.1 will enforce this behaviour change. A possible replacement is to upgrade to a newer version of pyodbc or contact the author to suggest that they release a version with a conforming version number. Discussion can be found at https://github.com/pypa/pip/issues/12063[0m[33m
[0m
[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m A new release of pip is available: [0m[31;49m24.0[0m[39;49m -> [0m[32;49m24.2[0m
[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m To update, run: [0m[32;49mpip install --upgrade pip[0m


In [8]:
import numpy as np
import os
import torch
from PIL import Image
from transformers import AutoImageProcessor, AutoModel
from sklearn.metrics.pairwise import cosine_similarity

# Load DINO model and processor from Hugging Face
device = "cuda" if torch.cuda.is_available() else "cpu"
dino_processor = AutoImageProcessor.from_pretrained("facebook/dino-vitb8")
dino_model = AutoModel.from_pretrained("facebook/dino-vitb8").to(device)

def get_dino_embeddings(image_paths):
    """
    Extract DINO VIT-B/8 embeddings from a list of image paths.

    :param image_paths: List of paths to images.
    :return: List of embeddings corresponding to each image.
    """
    images = [Image.open(image_path) for image_path in image_paths]
    inputs = dino_processor(images=images, return_tensors="pt").to(device)

    with torch.no_grad():
        outputs = dino_model(**inputs)
        embeddings = outputs.last_hidden_state[:, 0, :].cpu().numpy()  # Taking the CLS token

    return embeddings

def compute_similarity(generated_embedding, reference_embedding):
    """
    Compute the similarity between the generated image and a single reference style.

    :param generated_embedding: Embedding of the generated image (1D numpy array).
    :param reference_embedding: Embedding of the reference image (1D numpy array).
    :return: Similarity score.
    """
    similarity = cosine_similarity(generated_embedding.reshape(1, -1), reference_embedding.reshape(1, -1)).item()
    return similarity

def process_main_folder(main_folder, reference_image_paths):
    """
    Process the main folder and compute the weighted DINO metric for each subfolder.
    """
    final_results = {}

    subfolders = [f.path for f in os.scandir(main_folder) if f.is_dir()]

    for subfolder in subfolders:
        subfolder_name = os.path.basename(subfolder)
        # Extract weights from the subfolder name
        try:
            parts = subfolder_name.split('_')
            weight1 = float(parts[-2])
            weight2 = float(parts[-1])
            weights = [weight1, weight2]
        except Exception as e:
            print(f"Error parsing weights from {subfolder_name}: {e}")
            continue

        # Print the header with reference image information
        print(f"Results for {subfolder_name} (reference_image 1: {os.path.basename(reference_image_paths[0])}, reference_image 2: {os.path.basename(reference_image_paths[1])}):")

        # Calculate the mean DINO similarity for all images in the subfolder w.r.t. both references
        reference_embeddings = get_dino_embeddings(reference_image_paths)
        similarities_ref1 = []
        similarities_ref2 = []

        for image_file in os.listdir(subfolder):
            if image_file.endswith(".jpg") or image_file.endswith(".png"):
                # Skip the reference image file
                if image_file == "ref_img.png":
                    continue

                image_path = os.path.join(subfolder, image_file)
                generated_embedding = get_dino_embeddings([image_path])[0]
                
                similarity_to_ref1 = compute_similarity(generated_embedding, reference_embeddings[0])
                similarity_to_ref2 = compute_similarity(generated_embedding, reference_embeddings[1])

                similarities_ref1.append(similarity_to_ref1)
                similarities_ref2.append(similarity_to_ref2)

                # Print similarity results for each image
                print(f"Image: {image_file}, Similarity to Reference 1: {similarity_to_ref1}, Similarity to Reference 2: {similarity_to_ref2}")

        # Calculate the mean similarities for the subfolder
        mean_similarity_ref1 = np.mean(similarities_ref1)
        mean_similarity_ref2 = np.mean(similarities_ref2)

        # Compute the final weighted multi-style DINO metric
        weighted_multi_style_dino = mean_similarity_ref1 * weight1 + mean_similarity_ref2 * weight2

        final_results[subfolder_name] = {
            "mean_similarity_ref1": mean_similarity_ref1,
            "mean_similarity_ref2": mean_similarity_ref2,
            "weighted_multi_style_dino": weighted_multi_style_dino
        }

        print(f"Mean Similarity to Reference 1: {mean_similarity_ref1}")
        print(f"Mean Similarity to Reference 2: {mean_similarity_ref2}")
        print(f"Weighted Multi-Style DINO Metric: {weighted_multi_style_dino}\n")

    return final_results



Could not find image processor class in the image processor config or the model config. Loading based on pattern matching with the model's feature extractor configuration.
Some weights of ViTModel were not initialized from the model checkpoint at facebook/dino-vitb8 and are newly initialized: ['pooler.dense.bias', 'pooler.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


#### LINEAR VS SLERP BLENDING

##### GUIDANCE 10

###### LINEAR

In [6]:
# Example usage
main_folder = "./Multi_Style_Evaluation/Linear_VS_Slerp_Guidance_10_Medieval_Cubism/Linear/medieval_cubism/guidance_10/"
reference_image_paths = ["./Multi_Style_Evaluation/Linear_VS_Slerp_Guidance_10_Medieval_Cubism/medieval-bed.jpeg", 
                         "./Multi_Style_Evaluation/Linear_VS_Slerp_Guidance_10_Medieval_Cubism/Picasso_Smoking_Water_Pipe.jpeg"]
results = process_main_folder(main_folder, reference_image_paths)

# Print or save the results
for subfolder, metrics in results.items():
    print(f"Final Results for {subfolder}:")
    print(f"Mean Similarity to Reference 1: {metrics['mean_similarity_ref1']}")
    print(f"Mean Similarity to Reference 2: {metrics['mean_similarity_ref2']}")
    print(f"Weighted Multi-Style DINO Metric: {metrics['weighted_multi_style_dino']}\n")

Results for medieval_cubism_0.85_0.15 (reference_image 1: medieval-bed.jpeg, reference_image 2: Picasso_Smoking_Water_Pipe.jpeg):
Image: spaceshuttle.png, Similarity to Reference 1: 0.17173144221305847, Similarity to Reference 2: 0.37423989176750183
Image: castle.png, Similarity to Reference 1: 0.3660222589969635, Similarity to Reference 2: 0.35393238067626953
Image: lighthouse.png, Similarity to Reference 1: 0.34872156381607056, Similarity to Reference 2: 0.378476083278656
Image: treasurebox.png, Similarity to Reference 1: 0.4153991937637329, Similarity to Reference 2: 0.3460143208503723
Image: scarecrow.png, Similarity to Reference 1: 0.33286142349243164, Similarity to Reference 2: 0.4272057116031647
Image: mountains.png, Similarity to Reference 1: 0.44071030616760254, Similarity to Reference 2: 0.534723162651062
Image: robot.png, Similarity to Reference 1: 0.3737984299659729, Similarity to Reference 2: 0.4639677405357361
Image: snowman.png, Similarity to Reference 1: 0.3992015123367

###### SLERP

In [7]:
# Example usage
main_folder = "./Multi_Style_Evaluation/Linear_VS_Slerp_Guidance_10_Medieval_Cubism/SLERP/medieval_cubism/guidance_10/"
reference_image_paths = ["./Multi_Style_Evaluation/Linear_VS_Slerp_Guidance_10_Medieval_Cubism/medieval-bed.jpeg", 
                         "./Multi_Style_Evaluation/Linear_VS_Slerp_Guidance_10_Medieval_Cubism/Picasso_Smoking_Water_Pipe.jpeg"]
results = process_main_folder(main_folder, reference_image_paths)

# Print or save the results
for subfolder, metrics in results.items():
    print(f"Final Results for {subfolder}:")
    print(f"Mean Similarity to Reference 1: {metrics['mean_similarity_ref1']}")
    print(f"Mean Similarity to Reference 2: {metrics['mean_similarity_ref2']}")
    print(f"Weighted Multi-Style DINO Metric: {metrics['weighted_multi_style_dino']}\n")

Results for medieval_cubism_0.85_0.15 (reference_image 1: medieval-bed.jpeg, reference_image 2: Picasso_Smoking_Water_Pipe.jpeg):
Image: spaceshuttle.png, Similarity to Reference 1: 0.21127724647521973, Similarity to Reference 2: 0.3799414038658142
Image: castle.png, Similarity to Reference 1: 0.35918623208999634, Similarity to Reference 2: 0.3331613838672638
Image: lighthouse.png, Similarity to Reference 1: 0.331321120262146, Similarity to Reference 2: 0.36145520210266113
Image: treasurebox.png, Similarity to Reference 1: 0.440614253282547, Similarity to Reference 2: 0.3149084150791168
Image: scarecrow.png, Similarity to Reference 1: 0.34422364830970764, Similarity to Reference 2: 0.39098918437957764
Image: mountains.png, Similarity to Reference 1: 0.45680052042007446, Similarity to Reference 2: 0.43097662925720215
Image: robot.png, Similarity to Reference 1: 0.40923523902893066, Similarity to Reference 2: 0.5185285806655884
Image: snowman.png, Similarity to Reference 1: 0.37919762730

#### SLERP BLENDING: SCALING VS NON-SCALING

In [15]:
# Example usage
main_folder = "./Multi_Style_Evaluation/Slerp_Guidance_10_Scaled_vs_NonScaled/guidance_10"
reference_image_paths = ["./Multi_Style_Evaluation/Linear_VS_Slerp_Guidance_10_Medieval_Cubism/medieval-bed.jpeg", 
                         "./Multi_Style_Evaluation/Linear_VS_Slerp_Guidance_10_Medieval_Cubism/Picasso_Smoking_Water_Pipe.jpeg"]
results = process_main_folder(main_folder, reference_image_paths)

# Print or save the results
for subfolder, metrics in results.items():
    print(f"Final Results for {subfolder}:")
    print(f"Mean Similarity to Reference 1: {metrics['mean_similarity_ref1']}")
    print(f"Mean Similarity to Reference 2: {metrics['mean_similarity_ref2']}")
    print(f"Weighted Multi-Style DINO Metric: {metrics['weighted_multi_style_dino']}\n")

Results for medieval_cubism_0.5_0.5 (reference_image 1: medieval-bed.jpeg, reference_image 2: Picasso_Smoking_Water_Pipe.jpeg):
Image: spaceshuttle.png, Similarity to Reference 1: 0.20779018104076385, Similarity to Reference 2: 0.39729130268096924
Image: castle.png, Similarity to Reference 1: 0.34021514654159546, Similarity to Reference 2: 0.36105579137802124
Image: lighthouse.png, Similarity to Reference 1: 0.3619091808795929, Similarity to Reference 2: 0.40189388394355774
Image: treasurebox.png, Similarity to Reference 1: 0.4322529137134552, Similarity to Reference 2: 0.39451783895492554
Image: scarecrow.png, Similarity to Reference 1: 0.4021403193473816, Similarity to Reference 2: 0.48399829864501953
Image: mountains.png, Similarity to Reference 1: 0.43320232629776, Similarity to Reference 2: 0.515631377696991
Image: robot.png, Similarity to Reference 1: 0.34823012351989746, Similarity to Reference 2: 0.48859554529190063
Image: snowman.png, Similarity to Reference 1: 0.3917942047119

In [16]:
# Example usage
main_folder = "./Multi_Style_Evaluation/Slerp_Guidance_10_Scaled_vs_NonScaled/guidance_10_without_scaling"
reference_image_paths = ["./Multi_Style_Evaluation/Linear_VS_Slerp_Guidance_10_Medieval_Cubism/medieval-bed.jpeg", 
                         "./Multi_Style_Evaluation/Linear_VS_Slerp_Guidance_10_Medieval_Cubism/Picasso_Smoking_Water_Pipe.jpeg"]
results = process_main_folder(main_folder, reference_image_paths)

# Print or save the results
for subfolder, metrics in results.items():
    print(f"Final Results for {subfolder}:")
    print(f"Mean Similarity to Reference 1: {metrics['mean_similarity_ref1']}")
    print(f"Mean Similarity to Reference 2: {metrics['mean_similarity_ref2']}")
    print(f"Weighted Multi-Style DINO Metric: {metrics['weighted_multi_style_dino']}\n")

Results for medieval_cubism_0.5_0.5 (reference_image 1: medieval-bed.jpeg, reference_image 2: Picasso_Smoking_Water_Pipe.jpeg):
Image: spaceshuttle.png, Similarity to Reference 1: 0.22772538661956787, Similarity to Reference 2: 0.45664486289024353
Image: castle.png, Similarity to Reference 1: 0.42737334966659546, Similarity to Reference 2: 0.4061845541000366
Image: lighthouse.png, Similarity to Reference 1: 0.3264588713645935, Similarity to Reference 2: 0.4541235864162445
Image: scarecrow.png, Similarity to Reference 1: 0.33104822039604187, Similarity to Reference 2: 0.49142009019851685
Image: mountain.png, Similarity to Reference 1: 0.35816890001296997, Similarity to Reference 2: 0.5613176822662354
Image: robot.png, Similarity to Reference 1: 0.4163288176059723, Similarity to Reference 2: 0.5592134594917297
Image: snowman.png, Similarity to Reference 1: 0.2457333207130432, Similarity to Reference 2: 0.4072479009628296
Image: hotairbaloon.png, Similarity to Reference 1: 0.3719054162502

#### SCALING ABLATION

In [8]:
# Example usage
main_folder = "./Multi_Style_Evaluation/Slerp_Guidance_10_Scaling_ablation/shift_scale_log2_1_(No-Scaling)/"
reference_image_paths = ["./Multi_Style_Evaluation/Linear_VS_Slerp_Guidance_10_Medieval_Cubism/medieval-bed.jpeg", 
                         "./Multi_Style_Evaluation/Linear_VS_Slerp_Guidance_10_Medieval_Cubism/Picasso_Smoking_Water_Pipe.jpeg"]
results = process_main_folder(main_folder, reference_image_paths)

# Print or save the results
for subfolder, metrics in results.items():
    print(f"Final Results for {subfolder}:")
    print(f"Mean Similarity to Reference 1: {metrics['mean_similarity_ref1']}")
    print(f"Mean Similarity to Reference 2: {metrics['mean_similarity_ref2']}")
    print(f"Weighted Multi-Style DINO Metric: {metrics['weighted_multi_style_dino']}\n")

Results for medieval_cubism_0.5_0.5 (reference_image 1: medieval-bed.jpeg, reference_image 2: Picasso_Smoking_Water_Pipe.jpeg):
Image: spaceshuttle.png, Similarity to Reference 1: 0.22772538661956787, Similarity to Reference 2: 0.45664486289024353
Image: castle.png, Similarity to Reference 1: 0.42737334966659546, Similarity to Reference 2: 0.4061845541000366
Image: lighthouse.png, Similarity to Reference 1: 0.3264588713645935, Similarity to Reference 2: 0.4541235864162445
Image: scarecrow.png, Similarity to Reference 1: 0.33104822039604187, Similarity to Reference 2: 0.49142009019851685
Image: mountain.png, Similarity to Reference 1: 0.35816890001296997, Similarity to Reference 2: 0.5613176822662354
Image: robot.png, Similarity to Reference 1: 0.4163288176059723, Similarity to Reference 2: 0.5592134594917297
Image: snowman.png, Similarity to Reference 1: 0.2457333207130432, Similarity to Reference 2: 0.4072479009628296
Image: hotairbaloon.png, Similarity to Reference 1: 0.3719054162502

#### GUIDANCE ABLATION

In [8]:
# Example usage
main_folder = "./Multi_Style_Evaluation/Slerp_Guidance_Ablation_Medieval_Cubism/guidance_30/"
reference_image_paths = ["./Multi_Style_Evaluation/Linear_VS_Slerp_Guidance_10_Medieval_Cubism/medieval-bed.jpeg", 
                         "./Multi_Style_Evaluation/Linear_VS_Slerp_Guidance_10_Medieval_Cubism/Picasso_Smoking_Water_Pipe.jpeg"]
results = process_main_folder(main_folder, reference_image_paths)

# Print or save the results
for subfolder, metrics in results.items():
    print(f"Final Results for {subfolder}:")
    print(f"Mean Similarity to Reference 1: {metrics['mean_similarity_ref1']}")
    print(f"Mean Similarity to Reference 2: {metrics['mean_similarity_ref2']}")
    print(f"Weighted Multi-Style DINO Metric: {metrics['weighted_multi_style_dino']}\n")

Results for medieval_cubism_0.5_0.5 (reference_image 1: medieval-bed.jpeg, reference_image 2: Picasso_Smoking_Water_Pipe.jpeg):
Image: space_shuttle.png, Similarity to Reference 1: 0.32802289724349976, Similarity to Reference 2: 0.5248400568962097
Image: lighthouse.png, Similarity to Reference 1: 0.41949424147605896, Similarity to Reference 2: 0.4840549826622009
Image: robot.png, Similarity to Reference 1: 0.27710458636283875, Similarity to Reference 2: 0.4708121120929718
Image: snowman.png, Similarity to Reference 1: 0.40609246492385864, Similarity to Reference 2: 0.5009175539016724
Mean Similarity to Reference 1: 0.357678547501564
Mean Similarity to Reference 2: 0.4951561763882637
Weighted Multi-Style DINO Metric: 0.42641736194491386

Final Results for medieval_cubism_0.5_0.5:
Mean Similarity to Reference 1: 0.357678547501564
Mean Similarity to Reference 2: 0.4951561763882637
Weighted Multi-Style DINO Metric: 0.42641736194491386



#### LINEAR VS SLERP BLENDING: 3-STYLES

In [5]:
import numpy as np
import os
import torch
from PIL import Image
from transformers import AutoImageProcessor, AutoModel
from sklearn.metrics.pairwise import cosine_similarity

# Load DINO model and processor from Hugging Face
device = "cuda" if torch.cuda.is_available() else "cpu"
dino_processor = AutoImageProcessor.from_pretrained("facebook/dino-vitb8")
dino_model = AutoModel.from_pretrained("facebook/dino-vitb8").to(device)

def get_dino_embeddings(image_paths):
    """
    Extract DINO VIT-B/8 embeddings from a list of image paths.

    :param image_paths: List of paths to images.
    :return: List of embeddings corresponding to each image.
    """
    images = [Image.open(image_path) for image_path in image_paths]
    inputs = dino_processor(images=images, return_tensors="pt").to(device)

    with torch.no_grad():
        outputs = dino_model(**inputs)
        embeddings = outputs.last_hidden_state[:, 0, :].cpu().numpy()  # Taking the CLS token

    return embeddings

def compute_similarity(generated_embedding, reference_embedding):
    """
    Compute the similarity between the generated image and a single reference style.

    :param generated_embedding: Embedding of the generated image (1D numpy array).
    :param reference_embedding: Embedding of the reference image (1D numpy array).
    :return: Similarity score.
    """
    similarity = cosine_similarity(generated_embedding.reshape(1, -1), reference_embedding.reshape(1, -1)).item()
    return similarity

def process_main_folder(main_folder, reference_image_paths):
    """
    Process the main folder and compute the weighted DINO metric for each subfolder, extended to three references.
    """
    final_results = {}

    subfolders = [f.path for f in os.scandir(main_folder) if f.is_dir()]

    for subfolder in subfolders:
        subfolder_name = os.path.basename(subfolder)
        # Extract weights from the subfolder name
        try:
            parts = subfolder_name.split('_')
            weight1 = float(parts[-3])
            weight2 = float(parts[-2])
            weight3 = float(parts[-1])
            weights = [weight1, weight2, weight3]
        except Exception as e:
            print(f"Error parsing weights from {subfolder_name}: {e}")
            continue

        # Print the header with reference image information
        print(f"Results for {subfolder_name} (reference_image 1: {os.path.basename(reference_image_paths[0])}, reference_image 2: {os.path.basename(reference_image_paths[1])}, reference_image 3: {os.path.basename(reference_image_paths[2])}):")

        # Calculate the mean DINO similarity for all images in the subfolder w.r.t. the three references
        reference_embeddings = get_dino_embeddings(reference_image_paths)
        similarities_ref1 = []
        similarities_ref2 = []
        similarities_ref3 = []

        for image_file in os.listdir(subfolder):
            if image_file.endswith(".jpg") or image_file.endswith(".png"):
                # Skip the reference image file
                if image_file == "ref_img.png":
                    continue

                image_path = os.path.join(subfolder, image_file)
                generated_embedding = get_dino_embeddings([image_path])[0]
                
                similarity_to_ref1 = compute_similarity(generated_embedding, reference_embeddings[0])
                similarity_to_ref2 = compute_similarity(generated_embedding, reference_embeddings[1])
                similarity_to_ref3 = compute_similarity(generated_embedding, reference_embeddings[2])

                similarities_ref1.append(similarity_to_ref1)
                similarities_ref2.append(similarity_to_ref2)
                similarities_ref3.append(similarity_to_ref3)

                # Print similarity results for each image
                print(f"Image: {image_file}, Similarity to Reference 1: {similarity_to_ref1}, Similarity to Reference 2: {similarity_to_ref2}, Similarity to Reference 3: {similarity_to_ref3}")

        # Calculate the mean similarities for the subfolder
        mean_similarity_ref1 = np.mean(similarities_ref1)
        mean_similarity_ref2 = np.mean(similarities_ref2)
        mean_similarity_ref3 = np.mean(similarities_ref3)

        # Compute the final weighted multi-style DINO metric
        weighted_multi_style_dino = (mean_similarity_ref1 * weight1 +
                                     mean_similarity_ref2 * weight2 +
                                     mean_similarity_ref3 * weight3)

        final_results[subfolder_name] = {
            "mean_similarity_ref1": mean_similarity_ref1,
            "mean_similarity_ref2": mean_similarity_ref2,
            "mean_similarity_ref3": mean_similarity_ref3,
            "weighted_multi_style_dino": weighted_multi_style_dino
        }

        print(f"Mean Similarity to Reference 1: {mean_similarity_ref1}")
        print(f"Mean Similarity to Reference 2: {mean_similarity_ref2}")
        print(f"Mean Similarity to Reference 3: {mean_similarity_ref3}")
        print(f"Weighted Multi-Style DINO Metric: {weighted_multi_style_dino}\n")

    return final_results


Could not find image processor class in the image processor config or the model config. Loading based on pattern matching with the model's feature extractor configuration.
Some weights of ViTModel were not initialized from the model checkpoint at facebook/dino-vitb8 and are newly initialized: ['pooler.dense.bias', 'pooler.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


In [7]:
# Example usage
main_folder = "./Paper_imgs/Multi_Style/SLERP/egyptian_cubism_macro/guidance_10"
reference_image_paths = [
    "./Paper_imgs/Multi_Style_Evaluation/Linear_VS_Slerp_Guidance_10_Medieval_Cubism/papyrus.jpg",
    "./Paper_imgs/Multi_Style_Evaluation/Linear_VS_Slerp_Guidance_10_Medieval_Cubism/Picasso_Smoking_Water_Pipe.jpeg",
    "./Paper_imgs/Multi_Style_Evaluation/Linear_VS_Slerp_Guidance_10_Medieval_Cubism/supermario.jpg"
]
results = process_main_folder(main_folder, reference_image_paths)

# Print or save the results
for subfolder, metrics in results.items():
    print(f"Final Results for {subfolder}:")
    print(f"Mean Similarity to Reference 1: {metrics['mean_similarity_ref1']}")
    print(f"Mean Similarity to Reference 2: {metrics['mean_similarity_ref2']}")
    print(f"Mean Similarity to Reference 3: {metrics['mean_similarity_ref3']}")
    print(f"Weighted Multi-Style DINO Metric: {metrics['weighted_multi_style_dino']}\n")


Results for egyptian_cubism_macro_0.15_0.15_0.70 (reference_image 1: papyrus.jpg, reference_image 2: Picasso_Smoking_Water_Pipe.jpeg, reference_image 3: supermario.jpg):
Image: spaceshuttle.png, Similarity to Reference 1: 0.23637843132019043, Similarity to Reference 2: 0.390488862991333, Similarity to Reference 3: 0.3766529858112335
Image: castle.png, Similarity to Reference 1: 0.2408851832151413, Similarity to Reference 2: 0.3173666000366211, Similarity to Reference 3: 0.2406243532896042
Image: lighthouse.png, Similarity to Reference 1: 0.17153476178646088, Similarity to Reference 2: 0.26402464509010315, Similarity to Reference 3: 0.2681366801261902
Image: treasurebox.png, Similarity to Reference 1: 0.3096837103366852, Similarity to Reference 2: 0.29475367069244385, Similarity to Reference 3: 0.30037838220596313
Image: scarecrow.png, Similarity to Reference 1: 0.2145356982946396, Similarity to Reference 2: 0.24589744210243225, Similarity to Reference 3: 0.3064268231391907
Image: mount

#### MULTI-CONTEXT: REPHRASING VS NOT-REPHRASED

In [9]:
# Example usage
main_folder = ".//Paper_imgs/Multi_Context_Evaluation/Img+Audio+Music+Weather/heavy_thunderous/scattered_clouds"

reference_image_paths = ["./Multi_Context_Evaluation/Linear_VS_Slerp_Guidance_10_Medieval_Cubism/original.jpg", 
                         "./Multi_Context_Evaluation/Linear_VS_Slerp_Guidance_10_Medieval_Cubism/original.jpg"]
results = process_main_folder(main_folder, reference_image_paths)

# Print or save the results
for subfolder, metrics in results.items():
    print(f"Final Results for {subfolder}:")
    print(f"Mean Similarity to Reference 1: {metrics['mean_similarity_ref1']}")
    print(f"Mean Similarity to Reference 2: {metrics['mean_similarity_ref2']}")
    print(f"Weighted Multi-Style DINO Metric: {metrics['weighted_multi_style_dino']}\n")

FileNotFoundError: [Errno 2] No such file or directory: './Paper_img/Multi_Context_Evaluation/Img+Audio+Music+Weather/heavy_thunderous/scattered_clouds'

### MULTI (CLIP) SCORE

In [1]:
# Install required libraries
!pip install transformers ftfy regex tqdm
!pip install git+https://github.com/openai/CLIP.git


[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m A new release of pip is available: [0m[31;49m24.1.2[0m[39;49m -> [0m[32;49m24.2[0m
[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m To update, run: [0m[32;49mpip install --upgrade pip[0m
Collecting git+https://github.com/openai/CLIP.git
  Cloning https://github.com/openai/CLIP.git to /private/var/folders/mx/xlmhp5ln7l5g33gb6w32s46m0000gn/T/pip-req-build-sdrnj8pt
  Running command git clone --quiet https://github.com/openai/CLIP.git /private/var/folders/mx/xlmhp5ln7l5g33gb6w32s46m0000gn/T/pip-req-build-sdrnj8pt
  Resolved https://github.com/openai/CLIP.git to commit dcba3cb2e2827b402d2701e7e1c7d9fed8a20ef1
  Preparing metadata (setup.py) ... [?25ldone

[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m A new release of pip is available: [0m[31;49m24.1.2[0m[39;49m -> [0m[32;49m24.2[0m
[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m To update, run: [0m[32;49mpip install --upgrade pip[0m


In [2]:
import os
import torch
import clip
from PIL import Image
import numpy as np

# Load CLIP model and preprocess
device = "cuda" if torch.cuda.is_available() else "cpu"
clip_model, clip_preprocess = clip.load("ViT-B/32", device=device)

In [7]:
def get_clip_similarity(image_path, prompt):
    """
    Compute CLIP similarity between an image and a text prompt.
    
    :param image_path: Path to the image file.
    :param prompt: Text prompt to compare with the image.
    :return: Similarity score.
    """
    image = clip_preprocess(Image.open(image_path)).unsqueeze(0).to(device)
    text = clip.tokenize([prompt]).to(device)

    with torch.no_grad():
        image_features = clip_model.encode_image(image)
        text_features = clip_model.encode_text(text)

    image_features /= image_features.norm(dim=-1, keepdim=True)
    text_features /= text_features.norm(dim=-1, keepdim=True)

    similarity = (image_features @ text_features.T).item()
    return similarity

def CLIP_evaluation(subfolder, prompt_list):
    """
    Evaluates the average CLIP metric for the given subfolder.
    
    :param subfolder: Path to the subfolder containing images for evaluation.
    :return: Average CLIP metric for the subfolder.
    """
    clip_similarities = []

    # Collect all image files in the subfolder
    image_files = [os.path.join(subfolder, f) for f in os.listdir(subfolder) 
                   if f.endswith(".jpg") or f.endswith(".png")]
    image_files = sorted([url for url in image_files if not url.endswith("ref_img.png")])


    # Compute CLIP similarities for each image
    for i, image_path in enumerate(image_files):
        # Use the image file name as the prompt (excluding the extension)
        name_img = image_path.split("/")[-1]
        prompt = prompt_list[i]
        clip_similarity = get_clip_similarity(image_path, prompt)
        print("Image:", name_img, "Prompt:", prompt, "Similarity:", clip_similarity)
        clip_similarities.append(clip_similarity)

    # Calculate average CLIP metric for the subfolder
    avg_clip_similarity = np.mean(clip_similarities) if clip_similarities else None

    return avg_clip_similarity

def process_linear_and_slerp(main_folder, prompt_list):
    """
    Processes the main folder containing 'Linear' and 'SLERP' subfolders,
    and evaluates the CLIP metrics for each.
    
    :param main_folder: Path to the main folder containing 'Linear' and 'SLERP'.
    :return: Dictionary containing CLIP metrics for each subfolder in 'Linear' and 'SLERP'.
    """
    final_results = {}

    # Paths for Linear and Slerp
    # linear_folder = os.path.join(main_folder, "Linear/medieval_cubism/guidance_10")
    # slerp_folder = os.path.join(main_folder, "SLERP/medieval_cubism/guidance_10")
    # linear_folder = os.path.join(main_folder, "guidance_10")
    # slerp_folder = os.path.join(main_folder, "guidance_10_without_scaling")
    # linear_folder = os.path.join(main_folder, "shift_scale_log1_0.5(Scaling)")
    # slerp_folder = os.path.join(main_folder, "shift_scale_log1_0.25")
    # linear_folder = os.path.join(main_folder, "shift_scale_log1_0.75")
    # slerp_folder = os.path.join(main_folder, "shift_scale_log1_0.125")
    # linear_folder = os.path.join(main_folder, "shift_scale_log2_1_(No-Scaling)")
    linear_folder = os.path.join(main_folder, "Linear/egyptian_cubism_macro/guidance_10")
    slerp_folder = os.path.join(main_folder, "SLERP/egyptian_cubism_macro/guidance_10")


    # Process Linear subfolders
    linear_subfolders = [f.path for f in os.scandir(linear_folder) if f.is_dir()]
    for subfolder in linear_subfolders:
        subfolder_name = os.path.basename(subfolder)
        # print("SLERP with Scaling")
        print("Linear")
        # print("shift_scale_log1_0.5(Scaling)")
        # print("shift_scale_log1_0.75")
        # print("shift_scale_log2_1_(No-Scaling)")
        avg_clip_similarity = CLIP_evaluation(subfolder, prompt_list)
        final_results[f"Linear/{subfolder_name}"] = avg_clip_similarity
        # final_results[f"SLERP with Scaling/{subfolder_name}"] = avg_clip_similarity
        # final_results[f"shift_scale_log1_0.5(Scaling)/{subfolder_name}"] = avg_clip_similarity
        # final_results[f"shift_scale_log2_1_(No-Scaling)/{subfolder_name}"] = avg_clip_similarity


    # Process Slerp subfolders
    slerp_subfolders = [f.path for f in os.scandir(slerp_folder) if f.is_dir()]
    for subfolder in slerp_subfolders:
        subfolder_name = os.path.basename(subfolder)
        # print("SLERP without Scaling")
        print("SLERP")
        # print("shift_scale_log1_0.25")
        # print("shift_scale_log1_0.125")
        avg_clip_similarity = CLIP_evaluation(subfolder, prompt_list)
        final_results[f"SLERP/{subfolder_name}"] = avg_clip_similarity
        #final_results[f"SLERP without Scaling/{subfolder_name}"] = avg_clip_similarity
        # final_results[f"shift_scale_log1_0.25/{subfolder_name}"] = avg_clip_similarity
        # final_results[f"shift_scale_log1_0.125/{subfolder_name}"] = avg_clip_similarity

    return final_results



#### LINEAR VS SLERP BLENDING

In [43]:
prompt_list = [ "A castle with a flowery landscape",
                "A hot air baloon with a sunset background",
                "A lighthouse with sea in the background",
                "A mountain landscape",
                "A robot which is playing football",
                "A scarecrow in a wheat field",
                "A snowman in the desert",
                "A space rocket which is departing",
                "A treasure box in a cave",
                ]
# Example usage
main_folder = "./Multi_Style_Evaluation/Linear_VS_Slerp_Guidance_10_Medieval_Cubism/"
results = process_linear_and_slerp(main_folder, prompt_list)

# Print the results
for subfolder, avg_clip in results.items():
    print(f"Results for {subfolder}:")
    print(f"Average CLIP Similarity: {avg_clip}\n")

Linear
Image: castle.png Prompt: A castle with a flowery landscape Similarity: 0.3241465091705322
Image: hotairbaloon.png Prompt: A hot air baloon with a sunset background Similarity: 0.3248940706253052
Image: lighthouse.png Prompt: A lighthouse with sea in the background Similarity: 0.29834774136543274
Image: mountains.png Prompt: A mountain landscape Similarity: 0.29847773909568787
Image: robot.png Prompt: A robot which is playing football Similarity: 0.32545366883277893
Image: scarecrow.png Prompt: A scarecrow in a wheat field Similarity: 0.3816331923007965
Image: snowman.png Prompt: A snowman in the desert Similarity: 0.36111900210380554
Image: spaceshuttle.png Prompt: A space rocket which is departing Similarity: 0.26876726746559143
Image: treasurebox.png Prompt: A treasure box in a cave Similarity: 0.3314994275569916
Linear
Image: castle.png Prompt: A castle with a flowery landscape Similarity: 0.3115256428718567
Image: hotairbaloon.png Prompt: A hot air baloon with a sunset back

#### SLERP: SCALING VS NON-SCALING

In [40]:
prompt_list = [ "A castle with a flowery landscape",
                "A hot air baloon with a sunset background",
                "A lighthouse with sea in the background",
                "A mountain landscape",
                "A robot which is playing football",
                "A scarecrow in a wheat field",
                "A snowman in the desert",
                "A space rocket which is departing",
                "A treasure box in a cave",
                ]
main_folder = "./Multi_Style_Evaluation/Slerp_Guidance_10_Scaled_vs_NonScaled/"
results = process_linear_and_slerp(main_folder, prompt_list)

# Print the results
for subfolder, avg_clip in results.items():
    print(f"Results for {subfolder}:")
    print(f"Average CLIP Similarity: {avg_clip}\n")

SLERP with Scaling
Image: castle.png Prompt: A castle with a flowery landscape Similarity: 0.3098236918449402
Image: hotairbaloon.png Prompt: A hot air baloon with a sunset background Similarity: 0.33280056715011597
Image: lighthouse.png Prompt: A lighthouse with sea in the background Similarity: 0.2918351888656616
Image: mountains.png Prompt: A mountain landscape Similarity: 0.2779140770435333
Image: robot.png Prompt: A robot which is playing football Similarity: 0.3181683123111725
Image: scarecrow.png Prompt: A scarecrow in a wheat field Similarity: 0.35958778858184814
Image: snowman.png Prompt: A snowman in the desert Similarity: 0.33831751346588135
Image: spaceshuttle.png Prompt: A space rocket which is departing Similarity: 0.2686412036418915
Image: treasurebox.png Prompt: A treasure box in a cave Similarity: 0.3319544196128845
SLERP without Scaling
Image: castle.png Prompt: A castle with a flowery landscape Similarity: 0.31859153509140015
Image: hotairbaloon.png Prompt: A hot air

#### SLERP: SCALING ABLATION

In [17]:
prompt_list = [ "A castle with a flowery landscape",
                "A hot air baloon with a sunset background",
                "A lighthouse with sea in the background",
                "A mountain landscape",
                "A robot which is playing football",
                "A scarecrow in a wheat field",
                "A snowman in the desert",
                "A space rocket which is departing",
                "A treasure box in a cave",
                ]
main_folder = "./Multi_Style_Evaluation/Slerp_Guidance_10_Scaling_ablation/"
results = process_linear_and_slerp(main_folder, prompt_list)

# Print the results
for subfolder, avg_clip in results.items():
    print(f"Results for {subfolder}:")
    print(f"Average CLIP Similarity: {avg_clip}\n")

shift_scale_log2_1_(No-Scaling)
Image: castle.png Prompt: A castle with a flowery landscape Similarity: 0.31859153509140015
Image: hotairbaloon.png Prompt: A hot air baloon with a sunset background Similarity: 0.3272666037082672
Image: lighthouse.png Prompt: A lighthouse with sea in the background Similarity: 0.29973486065864563
Image: mountain.png Prompt: A mountain landscape Similarity: 0.28524690866470337
Image: robot.png Prompt: A robot which is playing football Similarity: 0.24093365669250488
Image: scarecrow.png Prompt: A scarecrow in a wheat field Similarity: 0.3686532974243164
Image: snowman.png Prompt: A snowman in the desert Similarity: 0.3465888202190399
Image: spaceshuttle.png Prompt: A space rocket which is departing Similarity: 0.26024436950683594
Image: treasure_box.png Prompt: A treasure box in a cave Similarity: 0.3323262929916382
shift_scale_log1_0.125
Image: castle.png Prompt: A castle with a flowery landscape Similarity: 0.31445419788360596
Image: hotairballonn.png 

#### SLERP VS LINEAR: 3-STYLES

In [8]:
prompt_list = [ "A castle with a flowery landscape",
                "A hot air baloon with a sunset background",
                "A lighthouse with sea in the background",
                "A mountain landscape",
                "A robot which is playing football",
                "A scarecrow in a wheat field",
                "A snowman in the desert",
                "A space rocket which is departing",
                "A treasure box in a cave",
                ]
main_folder = "./Paper_imgs/Multi_Style/"
results = process_linear_and_slerp(main_folder, prompt_list)

# Print the results
for subfolder, avg_clip in results.items():
    print(f"Results for {subfolder}:")
    print(f"Average CLIP Similarity: {avg_clip}\n")

Linear
Image: castle.png Prompt: A castle with a flowery landscape Similarity: 0.26494961977005005
Image: hotairbaloon.png Prompt: A hot air baloon with a sunset background Similarity: 0.31576985120773315
Image: lighthouse.png Prompt: A lighthouse with sea in the background Similarity: 0.2887105345726013
Image: mountains.png Prompt: A mountain landscape Similarity: 0.2517831027507782
Image: robot.png Prompt: A robot which is playing football Similarity: 0.30933526158332825
Image: scarecrow.png Prompt: A scarecrow in a wheat field Similarity: 0.33314433693885803
Image: snowman.png Prompt: A snowman in the desert Similarity: 0.2920951843261719
Image: spaceshuttle.png Prompt: A space rocket which is departing Similarity: 0.19897164404392242
Image: treasurebox.png Prompt: A treasure box in a cave Similarity: 0.24524089694023132
SLERP
Image: castle.png Prompt: A castle with a flowery landscape Similarity: 0.25213173031806946
Image: hotairbaloon.png Prompt: A hot air baloon with a sunset bac