# Automatic Evaluation
* A:Prompt size N
* B:Gen Style Images = N x 64
* C:Style Images = N x 64
* D:Gen Content Images = N x 64
* E:Content Images = 20

* Text Inversion: (D, E) and (B, A)
* Our: (B, C), (B, E), (B, A), (B, E)

In [2]:
import os
import numpy as np
from PIL import Image
from IPython.display import display
import torch
device = "cuda" if torch.cuda.is_available() else "cpu"

In [3]:
from fid_score import calculate_fid_given_paths

## Style use FID

In [16]:
genBaseDir = '/home/chengping/Dreambooth-Stable-Diffusion/outputs-personalize'
# genBaseDir = '/home/chengping/textual_inversion/outputs'
total_fid = []
for pt in range(20):
    genDir = os.path.join('/home/chengping/Dreambooth-Stable-Diffusion/outputs-baseline', f'prompt{pt:02}', 'samples')
    for user in [1,2,5,9]:
        styleDir = os.path.join(genBaseDir, f'prompt{pt:02}-user{user:02}', 'samples')
        total_fid.append(calculate_fid_given_paths([genDir, styleDir], batch_size=16, device=device, dims=2048, num_workers=8))
print(np.mean(total_fid))

100%|█████████████████████████████████████████████| 1/1 [00:00<00:00,  1.32it/s]
100%|█████████████████████████████████████████████| 1/1 [00:00<00:00,  1.02it/s]
100%|█████████████████████████████████████████████| 1/1 [00:00<00:00,  1.23it/s]
100%|█████████████████████████████████████████████| 1/1 [00:00<00:00,  1.15it/s]
100%|█████████████████████████████████████████████| 1/1 [00:00<00:00,  1.32it/s]
100%|█████████████████████████████████████████████| 1/1 [00:00<00:00,  1.28it/s]
100%|█████████████████████████████████████████████| 1/1 [00:00<00:00,  1.35it/s]
100%|█████████████████████████████████████████████| 1/1 [00:00<00:00,  1.34it/s]
100%|█████████████████████████████████████████████| 1/1 [00:00<00:00,  1.43it/s]
100%|█████████████████████████████████████████████| 1/1 [00:00<00:00,  1.53it/s]
100%|█████████████████████████████████████████████| 1/1 [00:00<00:00,  1.44it/s]
100%|█████████████████████████████████████████████| 1/1 [00:00<00:00,  1.08it/s]
100%|███████████████████████

175.72105647827007


## Content use FID

In [9]:
calculate_fid_given_paths([genDir, contentDir], batch_size=64, device=device, dims=2048, num_workers=8)

100%|█████████████████████████████████████████████| 1/1 [00:00<00:00,  1.09it/s]




100%|█████████████████████████████████████████████| 1/1 [00:00<00:00,  1.37it/s]


140.68327619988702

## Style use CLIP

In [5]:
import clip
from clipscore import get_clip_score, extract_all_images

In [6]:
model, _ = clip.load("ViT-B/32", device=device, jit=False)
model.eval()
print()




In [7]:
with open('/home/chengping/cse291/project/prompts_human.txt') as f:
    lines = f.readlines()
    lines = [eval(line.strip()) for line in lines][:20]

In [17]:
genBaseDir = '/home/chengping/Dreambooth-Stable-Diffusion/outputs-personalize'
# genBaseDir = '/home/chengping/textual_inversion/outputs'
prompts = [l["new_prompt"].replace(' {user}', '') for l in lines]

In [18]:
total_clip_scores = []
for pt in range(20):
    styleFiles = []
    for user in [1,2,5,9]:
        styleDir = os.path.join(genBaseDir, f'prompt{pt:02}-user{user:02}', 'samples')
        styleFiles += [os.path.join(styleDir, i) for i in os.listdir(styleDir) if i[-1] == 'g']
    
    image_feats = extract_all_images(styleFiles, model, device, batch_size=64, num_workers=8)
    _, clipscores, _ = get_clip_score(model, image_feats, [prompts[pt]] * len(styleFiles), device)
    total_clip_scores.append(np.mean(clipscores))
np.mean(total_clip_scores)

100%|█████████████████████████████████████████████| 1/1 [00:00<00:00,  1.05it/s]
100%|█████████████████████████████████████████████| 1/1 [00:00<00:00,  1.33it/s]
100%|█████████████████████████████████████████████| 1/1 [00:01<00:00,  1.08s/it]
100%|█████████████████████████████████████████████| 1/1 [00:00<00:00,  1.27it/s]
100%|█████████████████████████████████████████████| 1/1 [00:01<00:00,  1.07s/it]
100%|█████████████████████████████████████████████| 1/1 [00:00<00:00,  1.53it/s]
100%|█████████████████████████████████████████████| 1/1 [00:00<00:00,  1.08it/s]
100%|█████████████████████████████████████████████| 1/1 [00:00<00:00,  1.69it/s]
100%|█████████████████████████████████████████████| 1/1 [00:01<00:00,  1.06s/it]
100%|█████████████████████████████████████████████| 1/1 [00:00<00:00,  1.37it/s]
100%|█████████████████████████████████████████████| 1/1 [00:01<00:00,  1.03s/it]
100%|█████████████████████████████████████████████| 1/1 [00:00<00:00,  1.57it/s]
100%|███████████████████████

0.8027

## Content use Face Verification

In [21]:
from deepface import DeepFace

In [25]:
genBaseDir = '/home/chengping/Dreambooth-Stable-Diffusion/outputs'
contentBaseDir = '/home/chengping/Dreambooth-Stable-Diffusion/celebA/images'

In [None]:
%%capture
total_content = []

for user in [1,2,5,9]:
    genImages = []
    for prompt in range(20):
        gendir = os.path.join(genBaseDir, f'prompt{prompt:02}-user{user:02}', 'samples')
        genImages += [os.path.join(gendir, i) for i in os.listdir(gendir) if i[-1] =='g']
    
    contentDir = os.path.join(contentBaseDir, f'user{user:02}')
    contentImages = [os.path.join(contentDir, i) for i in os.listdir(contentDir) if i[-1] =='g']
    
    similarity = []
    for g in genImages:
        s = 0
        for c in contentImages:
            s += 1 - DeepFace.verify(img1_path=g, img2_path=c, enforce_detection=False, prog_bar=False)["distance"]
        similarity.append(s / len(contentImages))
    
    total_content.append(np.mean(similarity))
    
np.mean(total_content)