In [None]:
# from google.colab import drive
# drive.mount('/content/drive')
# # drive.mount('/content/drive/MyDrive/KAUST/Generative_AI')

In [None]:
!pip install git+https://github.com/openai/CLIP.git

import os
import zipfile
import torch
import clip
from PIL import Image

In [None]:
# Load the CLIP model
device = "cuda" if torch.cuda.is_available() else "cpu"
model, preprocess = clip.load("ViT-B/32", device=device)


100%|████████████████████████████████████████| 338M/338M [00:02<00:00, 127MiB/s]


In [None]:

# Function to extract zip files
def extract_zip(input_zip):
    input_zip = zipfile.ZipFile(input_zip)
    return {name: input_zip.read(name) for name in input_zip.namelist()}

# extract_zip('/content/AIF166.zip')
# extract_zip('/content/data_images.zip')
# !unzip '/content/AIF166.zip'
# !unzip '/content/data_images.zip'

In [None]:

# Define the paths to the directories
original_images_path = '/content/content/data_images'
transformed_images_path = '/content/AIF166'
emotions = ['anger', 'happiness', 'sadness', 'fear']

def process_image(image_path):
    with Image.open(image_path) as img:
        return preprocess(img).unsqueeze(0).to(device)

# Compute cosine similarity
def compute_cosine_similarity(features1, features2):
    features1 = features1 / features1.norm(dim=-1, keepdim=True)
    features2 = features2 / features2.norm(dim=-1, keepdim=True)
    return (features1 * features2).sum(dim=1)

# Initialize dictionary to store results
results = {}

# Process each emotion
for emotion in emotions:
    emotion_dir = os.path.join(transformed_images_path, emotion)
    for img_filename in os.listdir(emotion_dir):
        # Remove leading underscore and change extension from .png to .jpg for original images
        original_img_filename = img_filename.lstrip('_').replace('.png', '.jpg')
        original_img_path = os.path.join(original_images_path, original_img_filename)
        transformed_img_path = os.path.join(emotion_dir, img_filename)

        if not os.path.exists(original_img_path):
            print(f"Original image not found: {original_img_path}")
            continue
        if not os.path.exists(transformed_img_path):
            print(f"Transformed image not found: {transformed_img_path}")
            continue

        # Load and process images
        original_features = model.encode_image(process_image(original_img_path)).detach()
        transformed_features = model.encode_image(process_image(transformed_img_path)).detach()

        # Compute similarities
        image_similarity = compute_cosine_similarity(original_features, transformed_features).item()

        # Compute similarity with the emotion descriptor
        text_tokens = clip.tokenize([emotion]).to(device)
        emotion_features = model.encode_text(text_tokens).detach()
        emotion_similarity = compute_cosine_similarity(transformed_features, emotion_features).item()

        # Store results
        if emotion not in results:
            results[emotion] = {'image_similarities': [], 'emotion_similarities': []}
        results[emotion]['image_similarities'].append(image_similarity)
        results[emotion]['emotion_similarities'].append(emotion_similarity)

# Print organized results
for emotion, data in results.items():
    avg_img_sim = sum(data['image_similarities']) / len(data['image_similarities'])
    avg_emot_sim = sum(data['emotion_similarities']) / len(data['emotion_similarities'])
    print(f"{emotion}: Avg Image Similarity = {avg_img_sim}, Avg Emotion Similarity = {avg_emot_sim}")


anger: Avg Image Similarity = 0.8909220867846386, Avg Emotion Similarity = 0.22089431946536145
happiness: Avg Image Similarity = 0.8861510495105421, Avg Emotion Similarity = 0.20995358386671686
sadness: Avg Image Similarity = 0.8847979809864458, Avg Emotion Similarity = 0.2259624435240964
fear: Avg Image Similarity = 0.8877129612198795, Avg Emotion Similarity = 0.22000747129141565
