In [1]:
import torch
import lpips
import os
import yaml
import numpy as np
from PIL import Image
import torch
from scipy.spatial.distance import pdist
import yaml
import torch
from diversity import DiversityCalculator
from visualize import plot_images
import os
import yaml
import numpy as np
from PIL import Image
import matplotlib.pyplot as plt
from transformers import CLIPModel, AutoProcessor

torch.cuda.is_available()

True

In [2]:
from transformers import CLIPModel, CLIPProcessor

model_name = "openai/clip-vit-base-patch32"
model = CLIPModel.from_pretrained(model_name).to("cuda")
processor = CLIPProcessor.from_pretrained(model_name)

Using a slow image processor as `use_fast` is unset and a slow processor was saved with this model. `use_fast=True` will be the default behavior in v4.52, even if the model was saved with a slow processor. This will result in minor differences in outputs. You'll still be able to use a slow processor with `use_fast=False`.


# Code

In [3]:
def get_clip_features(images):
    device = "cuda" if torch.cuda.is_available() else "cpu"
    model = CLIPModel.from_pretrained("openai/clip-vit-base-patch32").to(device)
    processor = AutoProcessor.from_pretrained("openai/clip-vit-base-patch32")

    inputs = processor(images=images, return_tensors="pt", padding=True).to(device)

    with torch.no_grad():
        image_features = model.get_image_features(**inputs)

    return image_features.cpu().numpy()


def compute_uncertainty_score(diversity_scores, bayesian_uncertainty=None):
    """Combine diversity metrics into an uncertainty score."""
    weights = {
        'mean_pairwise': 0.4,
        'entropy': 0.3,
        'variance': 0.3
    }
    uncertainty_score = (
        weights['mean_pairwise'] * diversity_scores['mean_pairwise'] +
        weights['entropy'] * diversity_scores['entropy'] +
        weights['variance'] * np.mean(diversity_scores['variance'])
    )
    if bayesian_uncertainty:
        uncertainty_score += 0.2 * bayesian_uncertainty  # Add Bayesian uncertainty if available
    return uncertainty_score

# All metrices

In [4]:
results = {}

dataset_path = "../Dataset/stable-diffusion"

# Iterate through each folder in the dataset path
for folder_name in sorted(os.listdir(dataset_path)):
    folder_path = os.path.join(dataset_path, folder_name)

    # Check if it's a directory
    if not os.path.isdir(folder_path):
        continue  

    # Find any text file inside the folder
    text_files = [f for f in os.listdir(folder_path) if f.lower().endswith('.txt')]

    if not text_files:
        print(f"Skipping {folder_path}, no text file found.")
        continue  

    # Read the first text file found
    text_file_path = os.path.join(folder_path, text_files[0])
    with open(text_file_path, "r", encoding="utf-8") as f:
        prompt_text = f.read().strip()

    # Load images from the folder
    image_files = sorted([
        os.path.join(folder_path, f)
        for f in os.listdir(folder_path)
        if f.lower().endswith(('.webp', '.jpg', '.jpeg', '.bmp'))
    ])

    if len(image_files) == 0:
        print(f"Skipping {folder_path}, no images found.")
        continue  

    images = [Image.open(file).convert("RGB") for file in image_files]

    # Extract CLIP features
    clip_features = get_clip_features(images)

    # Compute diversity scores
    diversity_scores = DiversityCalculator.compute_all_scores(clip_features)

    # Compute uncertainty score
    uncertainty_score = compute_uncertainty_score(diversity_scores)

    # Save results with separate diversity scores
    results[prompt_text] = {
        "uncertainty_score": uncertainty_score,
        "mean_pairwise": diversity_scores["mean_pairwise"],
        "entropy": diversity_scores["entropy"],
        "variance": diversity_scores["variance"]
    }


In [5]:
import pandas as pd
df = pd.read_csv('scores.csv')

df2 = pd.DataFrame.from_dict(results, orient='index').reset_index()
df2.columns = ["Prompt", "uncertainty_score", "mean_pairwise", "entropy", "variance"]

In [6]:
df = pd.merge(left=df, right=df2, how='inner', on='Prompt')

In [8]:
from scipy.stats import spearmanr

corr_diversity, _ = spearmanr(df['uncertainty_score'], df['diversity_score'])
 
print(f"Correlation (Diversity): {corr_diversity:.3f}")

Correlation (Diversity): 0.665


In [9]:
corr_diversity, _ = spearmanr(df['mean_pairwise'], df['diversity_score'])
 
print(f"Correlation (Diversity): {corr_diversity:.3f}")

Correlation (Diversity): 0.574


In [11]:
corr_diversity, _ = spearmanr(df['variance'], df['diversity_score'])
 
print(f"Correlation (Diversity): {corr_diversity:.3f}")

Correlation (Diversity): 0.661


In [13]:
corr_diversity, _ = spearmanr(df['entropy'], df['diversity_score'])
 
print(f"Correlation (Diversity): {corr_diversity:.3f}")

Correlation (Diversity): 0.262


In [15]:
from scipy.stats import spearmanr

corr_diversity, _ = spearmanr(df['uncertainty_score'], df['diversity_score'])
 
print(f"Correlation (Diversity): {corr_diversity:.3f}")

Correlation (Diversity): 0.665


In [17]:
# Mean and standard deviation for the first 50 rows
mean_uncertainty_first50 = df['uncertainty_score'][:50].mean()
std_uncertainty_first50 = df['uncertainty_score'][:50].std()

mean_diversity_first50 = df['diversity_score'][:50].mean()
std_diversity_first50 = df['diversity_score'][:50].std()

# Mean and standard deviation for the last 50 rows
mean_uncertainty_last50 = df['uncertainty_score'][-50:].mean()
std_uncertainty_last50 = df['uncertainty_score'][-50:].std()

mean_diversity_last50 = df['diversity_score'][-50:].mean()
std_diversity_last50 = df['diversity_score'][-50:].std()

# Print results in mean ± std format
print(f"First 50 rows - Uncertainty: {mean_uncertainty_first50:.3f} ± {std_uncertainty_first50:.3f}")
print(f"First 50 rows - Diversity: {mean_diversity_first50:.3f} ± {std_diversity_first50:.3f}")

print(f"Last 50 rows - Uncertainty: {mean_uncertainty_last50:.3f} ± {std_uncertainty_last50:.3f}")
print(f"Last 50 rows - Diversity: {mean_diversity_last50:.3f} ± {std_diversity_last50:.3f}")


First 50 rows - Uncertainty: 3.062 ± 1.282
First 50 rows - Diversity: 2.025 ± 0.812
Last 50 rows - Uncertainty: 3.193 ± 1.072
Last 50 rows - Diversity: 2.200 ± 0.956


In [None]:
print("="*20 + "MDP" + "="*20)
mean_pairwise_mean = df['mean_pairwise'][:50].mean()
mean_pairwise_std = df['mean_pairwise'][:50].std()
print(mean_pairwise_mean, mean_pairwise_std)
mean_pairwise_mean = df['mean_pairwise'][50:].mean()
mean_pairwise_std = df['mean_pairwise'][50:].std()
print(mean_pairwise_mean, mean_pairwise_std)

print("="*20 + "variance" + "="*20)
mean_pairwise_mean = df['variance'][:50].mean()
mean_pairwise_std = df['variance'][:50].std()
print(mean_pairwise_mean, mean_pairwise_std)
mean_pairwise_mean = df['variance'][50:].mean()
mean_pairwise_std = df['variance'][50:].std()
print(mean_pairwise_mean, mean_pairwise_std)

print("="*20 + "entropy" + "="*20)
mean_pairwise_mean = df['entropy'][:50].mean()
mean_pairwise_std = df['entropy'][:50].std()
print(mean_pairwise_mean, mean_pairwise_std)
mean_pairwise_mean = df['entropy'][50:].mean()
mean_pairwise_std = df['entropy'][50:].std()
print(mean_pairwise_mean, mean_pairwise_std)