# Image composition assessment

In [None]:
# clone repository
!git clone https://github.com/bcmi/Image-Composition-Assessment-with-SAMP.git
#%cd Image-Composition-Assessment-with-SAMP/SAMPNet
# download CADB data (~2GB), change the default dataset folder and gpu id in config.py.

In [None]:
!pip install torch torchvision tensorboardX opencv-python scipy tqdm einops

In [None]:
# Launch the (modified) script to perform the assessment, specifying the storyboard directory and the maximum number of images to consider at a time
!python "path/to/test.py" --custom_image_dir "/path/to/storyboard/directory" --batch_size 20

# Clip score

# Prompts (w/o characters)

In [None]:
scene_prompts = [
 # List of prompts used to generated the storyboard, not containing the characters' lora name.
]

In [None]:
from PIL import Image
import numpy as np
import torch
from torchmetrics.functional.multimodal import clip_score
from functools import partial
import re
import os

# Function to extract the shot number from image filenames like "shot_0.png"
def extract_number(filename):
    match = re.search(r'shot_(\d+)', filename)
    return int(match.group(1)) if match else -1


# Function to load images from the folder and sort them based on the shot number in the image filname
def load_images_from_folder(folder_path):
    sorted_files = sorted(
        [f for f in os.listdir(folder_path) if f.startswith('shot_') and f.lower().endswith(('.png', '.jpg', '.jpeg'))],
        key=extract_number
    )
    
    # Create full paths to the image files
    sorted_image_paths = [os.path.join(folder_path, filename) for filename in sorted_files]
    
    return sorted_image_paths


# Function to compute  CLIP score
clip_score_fn = partial(clip_score, model_name_or_path="openai/clip-vit-base-patch16")

def calculate_clip_score(images, prompts):
    # Convert PIL image to a NumPy array
    images_np = np.array(images) / 255.0  # Normalize between 0 and 1
    images_int = (images_np * 255).astype("uint8")  # Scale and convert to uint8
    
    # Convert NumPy image to a tensor and rearrange the axes to match (batch_size, channels, height, width)
    images_tensor = torch.from_numpy(images_int).permute(2, 0, 1).unsqueeze(0)  # Add batch dimension

    # Calculate CLIP score
    clip_score_result = clip_score_fn(images_tensor, prompts).detach()
    return round(float(clip_score_result), 4)


# Storyboard image folder to compute CLIP score on
folder_path = "/path/to/storyboard/directory"   

# Storyboard prompts 
prompts = [ ]

# Load images from folder
image_paths = load_images_from_folder(folder_path)

# Ensure the number of images matches the number of prompts
if len(image_paths) != len(prompts):
    print(f"Error: Number of images ({len(image_paths)}) does not match the number of prompts ({len(prompts)}).")
else:
    # Initialize a variable to accumulate CLIP scores
    total_clip_score = 0.0
    # Iterate over each image-prompt pair and compute CLIP score
    for i, (image_path, prompt) in enumerate(zip(image_paths, prompts)):
        image = Image.open(image_path) # Open the image using PIL
        clip_score = calculate_clip_score(image, prompt)
        total_clip_score += clip_score  # Accumulate the score
        print(f"{clip_score}")

    # Calculate and print the average CLIP score
    average_clip_score = total_clip_score / len(image_paths)
    print(f"\nAverage CLIP score across all image-prompt pairs: {average_clip_score:.4f}")
        

# DINO score

In [None]:
!pip install torch torchvision
!pip install git+https://github.com/facebookresearch/dinov2.git 

In [None]:
import os
import re
from transformers import AutoImageProcessor, AutoModel
from PIL import Image
import torch
import torch.nn as nn

# Function to extract the numeric part from the filename for sorting
def extract_number(filename):
    match = re.search(r'shot_(\d+)', filename)
    return int(match.group(1)) if match else -1

# Function to load and sort images from a folder
def load_images_from_folder(folder_path):
    # Get the sorted list of files
    sorted_files = sorted(
        [f for f in os.listdir(folder_path) if f.endswith(('png', 'jpg', 'jpeg'))],
        key=extract_number
    )
    # Full paths to the images
    return [os.path.join(folder_path, filename) for filename in sorted_files]

# Setup device and model
device = torch.device('cuda' if torch.cuda.is_available() else "cpu")
processor = AutoImageProcessor.from_pretrained('facebook/dinov2-base')
model = AutoModel.from_pretrained('facebook/dinov2-base').to(device)

# Folder of the storyboard for the fine-tuned model and the base model
folder_path_1 = "/path/to/finetuned/model/storyboard/direcory"  
folder_path_2 = "/path/to/base/model/storyboard/direcory"    

images_folder_1 = load_images_from_folder(folder_path_1)
images_folder_2 = load_images_from_folder(folder_path_2)

# Ensure both folders have the same number of images
if len(images_folder_1) != len(images_folder_2):
    print(f"Error: Folder 1 has {len(images_folder_1)} images, Folder 2 has {len(images_folder_2)} images.")
else:
    print(f"Processing {len(images_folder_1)} image pairs...")

    # Initialize cosine similarity function
    cos = nn.CosineSimilarity(dim=0)
    
    # Accumulate similarity scores for average computation
    total_similarity = 0.0

    # Iterate through image pairs
    for img1_path, img2_path in zip(images_folder_1, images_folder_2):
        # Load and process image 1
        image1 = Image.open(img1_path)
        with torch.no_grad():
            inputs1 = processor(images=image1, return_tensors="pt").to(device)
            outputs1 = model(**inputs1)
            image_features1 = outputs1.last_hidden_state.mean(dim=1)

        # Load and process image 2
        image2 = Image.open(img2_path)
        with torch.no_grad():
            inputs2 = processor(images=image2, return_tensors="pt").to(device)
            outputs2 = model(**inputs2)
            image_features2 = outputs2.last_hidden_state.mean(dim=1)

        # Compute cosine similarity
        similarity = cos(image_features1[0], image_features2[0]).item()
        dino_similarity_score = (similarity + 1) / 2  # Normalize to [0, 1]
        
        # Accumulate the similarity score
        total_similarity += dino_similarity_score

        # Print similarity for this image pair
        print(f"Similarity between {os.path.basename(img1_path)} and {os.path.basename(img2_path)}: {dino_similarity_score:.4f}")
    
    # Calculate and print the average similarity
    average_similarity = total_similarity / len(images_folder_1)
    print(f"\nAverage Dino similarity across all image pairs: {average_similarity:.4f}")
