In [1]:
from argparse import ArgumentParser
import os
import sys
import numpy as np
from PIL import Image
import torch
import torchvision.transforms as transforms
# import torchvision.transforms.functional as F
import torch.nn.functional as F
import collections
import clip
from typing import List, Dict, Tuple, Optional
import pprint

# Helper function to calculate cosine similarity
def calculate_similarity_helper(image_features, text_features):
    return F.cosine_similarity(image_features, text_features.unsqueeze(0)).item()

class CLIPSimilarityLoss:
    def __init__(self, opts=None, model_name="ViT-B/32"):
        self.device = "cuda" if torch.cuda.is_available() else "cpu"
        self.model, self.preprocess = clip.load(model_name, device=self.device)
        self.model.eval()
        self.max_context_length = 77  # CLIP's max token length
        self.eot_token = 49407  # End-of-text token ID for CLIP

    
    def calculate_similarity(self, image_path: torch.Tensor, text_prompt: str) -> float:
        
        image = self.preprocess(
            Image.open(
                image_path
            )
        ).unsqueeze(0).to(self.device)
        
        with torch.no_grad():
            image_features = self.model.encode_image(image)
            text_features = self.model.encode_text(
                clip.tokenize(text_prompt, truncate=True).to(self.device)
            )
            image_features /= image_features.norm(dim = -1, keepdim = True)
            text_features /= text_features.norm(dim = -1, keepdim = True)
            similarity = (100.0 * image_features @ text_features.T).item()
        return similarity

def iterate_over_img_dir(img_dir: str) -> List[str]:
    IMG_EXTENSIONS = ['.jpg', '.jpeg', '.png', '.JPG', '.JPEG', '.PNG']
    for root, _, files in os.walk(img_dir):
        for file in files:
            if any(file.endswith(extension) for extension in IMG_EXTENSIONS):
                yield os.path.join(root, file)

In [2]:
os.environ["CUDA_VISIBLE_DEVICES"] = "3"

In [3]:
men = [
    "al", 
    "charles", 
    "chow", 
    "diego", 
    "jackie", 
    "robert",
    ]
women = [
    "elaine", 
    "elizabeth", 
    "jennifer", 
    "nicole", 
    "thatcher", 
    "oprah",
    ]

ethnicity = {
    "al": "white",
    "charles": "white",
    "chow": "asian", 
    "diego": "hispanic", 
    "jackie": "asian", 
    "robert": "white",
    "elaine": "asian",
    "elizabeth": "white",
    "jennifer": "white",
    "nicole": "white",
    "thatcher": "white",
    "oprah": "black",
}

In [4]:
def calculate_clip_similarity(img_dir: str, person: str):

    
    clip_loss = CLIPSimilarityLoss()
    results = []

    conditions = ["alcoholism", "poorskin", "goodskin", "hairloss", "weight"]
    
    person_ethnicity = ethnicity[person]


    # Define prompt templates with {age} placeholders
    woman_weight = f"A portrait of a TARGETAGE-year-old {person_ethnicity} woman with a fuller, rounder face, prominent cheeks, and a softer jawline. Her chin appears less defined, with subtle fat deposits around the neck. Her skin is smooth but slightly stretched, with faint nasolabial folds. Eyelids are slightly puffier, and her cheeks have a healthy, plump appearance."
    woman_hairloss = f"A portrait of a TARGETAGE-year-old {person_ethnicity} woman with noticeable hair thinning, particularly along the part and temples. Her hairline is slightly receding, revealing more of her forehead. Strands appear finer, with some sparse areas exposing the scalp. Her eyebrows may also appear slightly thinner, and her overall hair volume is reduced, making her facial contours more pronounced."
    woman_sunlight = f"A portrait of a TARGETAGE-year-old {person_ethnicity} woman with slightly tanned skin, fine lines around her eyes and mouth, and subtle sunspots across her cheeks and forehead. Her skin appears slightly rougher, with a faint leathery texture. Crow’s feet are more pronounced, and her lips have slight vertical lines. Her hair may have lighter, sun-bleached strands, and her overall complexion has a weathered yet warm glow."
    woman_poorskin = f"A portrait of a TARGETAGE-year-old {person_ethnicity} woman with uneven skin tone, visible pores, and a slightly rough texture. Fine lines on her forehead and around her mouth appear more pronounced, with mild sagging near the jawline. Her complexion looks dull, with occasional blemishes, redness, or dry patches. Dark circles or puffiness may be present under her eyes, and her skin has a slightly tired, lackluster appearance."
    woman_alcoholism = f"A portrait of a TARGETAGE-year-old {person_ethnicity} woman with a slightly reddened complexion, visible broken capillaries around the nose and cheeks, and uneven skin tone. Fine lines and deeper wrinkles appear around her eyes and mouth, with mild puffiness in the eyelids and under-eye area. Her skin looks dehydrated, with a dull texture and slight sagging around the jawline. Dark circles and a tired expression give her a worn appearance."
    woman_loseweight = f"A portrait of a TARGETAGE-year-old {person_ethnicity} woman with a lean, well-defined face, prominent cheekbones, and a more sculpted jawline. Her skin appears firm and slightly taut, with reduced fullness around her cheeks and under her chin. Fine lines around her eyes and mouth are subtly visible, but her complexion looks radiant and healthy. Her eyes appear more alert, and her overall facial structure is more toned, reflecting improved muscle definition and lower body fat."
    woman_goodskin = f"A portrait of a TARGETAGE-year-old {person_ethnicity} woman with smooth, even-toned skin, a radiant complexion, and a healthy glow. Fine lines around her eyes and mouth are minimal, with firm, well-hydrated skin that appears plump and elastic. Her pores are refined, and her under-eye area looks bright and refreshed, free of noticeable dark circles or puffiness. Her overall facial appearance is youthful, vibrant, and well-maintained."

    man_weight = f"A portrait of a TARGETAGE-year-old {person_ethnicity} man with a fuller, rounder face, softened jawline, and less-defined chin. His cheeks appear plumper, with subtle fat deposits around the neck. Nasolabial folds are slightly deeper, and his eyelids may look puffier. His skin appears smoother but slightly stretched, with a fuller under-chin area contributing to a softer facial contour."
    man_hairloss = f"A portrait of a TARGETAGE-year-old {person_ethnicity} man with a receding hairline and thinning hair on the crown. His forehead appears more prominent, with fine lines becoming more visible. The remaining hair is slightly finer, and sparse areas expose more of the scalp. His eyebrows may appear slightly thinner, and his facial features seem more defined due to reduced hair framing his face."
    man_sunlight = f"A portrait of a TARGETAGE-year-old {person_ethnicity} man with tanned, weathered skin, fine lines etched around his eyes and mouth, and deeper wrinkles on his forehead. His complexion has a slightly rough texture with visible sunspots on his cheeks and forehead. Crow’s feet are more pronounced, and his skin appears slightly leathery with mild sagging around the jawline. His hair may have subtle sun-bleached strands, and his lips show faint dryness or cracking."
    man_poorskin = f"A portrait of a TARGETAGE-year-old {person_ethnicity} man with uneven skin texture, enlarged pores, and a slightly rough, dull complexion. Fine lines are visible on his forehead and around his eyes, with mild sagging near the jawline. His skin appears dehydrated, with occasional redness, blemishes, or dry patches. Dark circles or puffiness under his eyes contribute to a tired appearance, and his overall complexion lacks vibrancy."
    man_alcoholism = f"A portrait of a TARGETAGE-year-old {person_ethnicity} man with a slightly reddened complexion, visible broken capillaries around his nose and cheeks, and an uneven skin tone. His face appears slightly puffy, especially around the eyes and jawline, with dark circles and mild under-eye bags. Fine lines on his forehead and around his mouth are more pronounced, and his skin looks dehydrated, dull, and slightly sagging. His lips may appear dry, and his overall expression seems fatigued and worn."
    man_loseweight = f"A portrait of a TARGETAGE-year-old {person_ethnicity} man with a lean, well-defined face, prominent cheekbones, and a sharper jawline. His skin appears firm and slightly taut, with reduced fullness around the cheeks and neck. Fine lines on his forehead and around his eyes are subtly visible, but his complexion looks healthier and more vibrant. His eyes appear more alert, and his overall facial structure is more chiseled, reflecting improved muscle tone and lower body fat."
    man_goodskin = f"A portrait of a TARGETAGE-year-old {person_ethnicity} man with smooth, even-toned skin, a well-hydrated complexion, and a healthy glow. Fine lines on his forehead and around his eyes are minimal, and his skin appears firm with good elasticity. His pores are refined, and his under-eye area looks refreshed without noticeable dark circles or puffiness. His overall facial appearance is vibrant, youthful, and well-maintained."


    for condition in conditions:
        exp_dir = os.path.join(img_dir, person, condition)
        print(f"exp dir: {exp_dir}")
        for img_path in iterate_over_img_dir(exp_dir):
            img_name = img_path
            target_age = img_name.split("/")[-1].split(".")[0].split("_")[-1]
            
            try:
                # img = Image.open(img_path).convert('RGB')
                # img_tensor = F.to_tensor(img).unsqueeze(0).to(clip_loss.device)

                if person in men:
                    if condition == "weight":
                        target_prompt = man_weight
                    elif condition == "alcoholism":
                        target_prompt = man_alcoholism
                    elif condition == "poorskin":
                        target_prompt = man_poorskin
                    elif condition == "goodskin":
                        target_prompt = man_goodskin
                    elif condition == "hairloss":
                        target_prompt = man_hairloss
                else:
                    if condition == "weight":
                        target_prompt = woman_weight
                    elif condition == "alcoholism":
                        target_prompt = woman_alcoholism
                    elif condition == "poorskin":
                        target_prompt = woman_poorskin
                    elif condition == "goodskin":
                        target_prompt = woman_goodskin
                    elif condition == "hairloss":
                        target_prompt = woman_hairloss

                target_age_prompt = target_prompt.replace("TARGETAGE", str(target_age))
                
                similarity_score = clip_loss.calculate_similarity(img_path, target_age_prompt)
                print(f"similarity score: {similarity_score}")
                results.append(similarity_score)
                
                print(f"Image: {img_name}, Similarity Score: {similarity_score:.2f}")
            except Exception as e:
                print(f"Error processing {img_path}: {e}")
    
    return np.mean(results)

In [5]:
ours = []
# mytm = []
# fading = []
flowedit = []
rfsolver = []
rfinversion = []
fireflow = []

In [8]:
celebs = ['al', 'charles', 'chow', 'diego', 'elaine', 'elizabeth', 'jackie', 'jennifer', 'nicole', 'oprah', 'robert', 'thatcher']

for celeb in celebs:
    exp_dir_parent = f'/playpen-nas-ssd/gongbang/comparisons/fireflow/'
    # exp_dir_parent = f'/playpen-nas-ssd/gongbang/personalized/{celeb}'

    
    sim = calculate_clip_similarity(exp_dir_parent, celeb)
    print(f'Experiment: {exp_dir_parent}')
    print(f'CLIP Score: {sim}')
    print('-'*10)
    fireflow.append(sim)

exp dir: /playpen-nas-ssd/gongbang/comparisons/fireflow/al/alcoholism
similarity score: 30.484375
Image: /playpen-nas-ssd/gongbang/comparisons/fireflow/al/alcoholism/34_20/img_50.jpg, Similarity Score: 30.48
similarity score: 29.796875
Image: /playpen-nas-ssd/gongbang/comparisons/fireflow/al/alcoholism/34_20/img_90.jpg, Similarity Score: 29.80
similarity score: 30.046875
Image: /playpen-nas-ssd/gongbang/comparisons/fireflow/al/alcoholism/34_20/img_80.jpg, Similarity Score: 30.05
similarity score: 30.21875
Image: /playpen-nas-ssd/gongbang/comparisons/fireflow/al/alcoholism/34_20/img_70.jpg, Similarity Score: 30.22
similarity score: 30.15625
Image: /playpen-nas-ssd/gongbang/comparisons/fireflow/al/alcoholism/34_20/img_60.jpg, Similarity Score: 30.16
similarity score: 29.96875
Image: /playpen-nas-ssd/gongbang/comparisons/fireflow/al/alcoholism/34_20/img_40.jpg, Similarity Score: 29.97
similarity score: 24.25
Image: /playpen-nas-ssd/gongbang/comparisons/fireflow/al/alcoholism/35_40/img_50.

In [9]:
np.mean(fireflow)

np.float64(29.91260463344828)