<a href="https://colab.research.google.com/github/copyrightFreeGenAI/copyrightFreeImagesGenAI/blob/main/4.%20Scores/Scores.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
# Set Paths
annValInstanceFiles = "/content/gdrive/MyDrive/annotations2014/instances_train2014.json" # Path to benchmark instance json
annValCaptionFiles = "/content/gdrive/MyDrive/annotations2014/captions_train2014.json" # Path to benchmark caption json
source_directory = "/content/gdrive/MyDrive/DATA/COCO/Validation/Images" # Path to directory where benchmark MS COCO images are saved

mitsua_target_directory = "/content/gdrive/MyDrive//DATA/Mitsua/" # Path to directory where Mitsua images are saved
sd21_target_directory = "/content/gdrive/MyDrive/DATA/SD2.1/" # Path to directory where SD2.1 images are saved
juggxl_target_directory = "/content/gdrive/MyDrive/DATA/JuggXL/" # Path to directory where JuggXL images are saved
dalle2_target_directory = "/content/gdrive/MyDrive/DATA/Dalle2/" # Path to directory where Dall-E-2 images are saved
DREAM_target_directory = "/content/gdrive/MyDrive/Dream/Dream/" # Path where DreamBooth images are saved
LORA_target_directory = "/content/gdrive/MyDrive/LoRA/LoRA/" # Path where LoRA images are saved
TI_target_directory = "/content/gdrive/MyDrive/Textual-Inversion/Textual-Inversion/Images/" # Path where TI images are saved

baseline_scores_directory = "/content/gdrive/MyDrive/Scores/Scores/baseline.csv" # Path to store scores for baseline models
finetuned_scores_directory = "/content/gdrive/MyDrive/Scores/Scores/finetuned.csv" # Path to store fine-tuned scores

In [None]:
from google.colab import drive
drive.mount('/content/gdrive')

In [None]:
import requests
import io
from PIL import Image
import matplotlib.pyplot as plt
from pycocotools.coco import COCO
import numpy as np
from skimage import io, transform
from numpy import cov
from numpy import trace
from numpy import iscomplexobj
from numpy import asarray
from numpy.random import randint
from keras.applications.inception_v3 import InceptionV3
from keras.applications.inception_v3 import preprocess_input
from io import BytesIO
from numpy.linalg import svd
from skimage.transform import resize
import torch
from transformers import CLIPProcessor, CLIPModel
import os
import pandas as pd
from scipy.linalg import sqrtm

In [None]:
coco=COCO(annValInstanceFiles)
coco_caps=COCO(annValCaptionFiles)
cats = coco.loadCats(coco.getCatIds())

In [None]:
fid_model = InceptionV3(include_top=False, pooling='avg', input_shape=(299,299,3))
clip = CLIPModel.from_pretrained("openai/clip-vit-base-patch32")
clip_processor = CLIPProcessor.from_pretrained("openai/clip-vit-base-patch32")

In [None]:
# calculate frechet inception distance
def calculate_fid(model, images1, images2):
	# calculate activations
	act1 = model.predict(images1)
	act2 = model.predict(images2)
	# calculate mean and covariance statistics
	mu1, sigma1 = act1.mean(axis=0), cov(act1, rowvar=False)
	mu2, sigma2 = act2.mean(axis=0), cov(act2, rowvar=False)
	# calculate sum squared difference between means
	ssdiff = np.sum((mu1 - mu2)**2.0)
	# calculate sqrt of product between cov
	covmean = sqrtm(sigma1.dot(sigma2))
	# check and correct imaginary numbers from sqrt
	if iscomplexobj(covmean):
		covmean = covmean.real
	# calculate score
	fid = ssdiff + trace(sigma1 + sigma2 - 2.0 * covmean)
	return fid

In [None]:
def calculate_clip(clip, processor, prompt, image):
    clip_scores = []
    img = []
    for i in image:
        if i.min() < 0:
            img.append((i + 1) / 2)
        else:
            img.append(i)
    # Move the CLIP model to the appropriate device (GPU if available)
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    clip = clip.to(device)

    for i in range(len(img)):
        inputs = processor(text=prompt[i], images=img[i], return_tensors="pt", padding=True)
        # Move the input tensors to the same device as the model
        inputs = inputs.to(device)
        # Compute the embeddings
        outputs = clip(**inputs)
        image_embeddings = outputs.image_embeds
        text_embeddings = outputs.text_embeds
        # Calculate the cosine similarity
        similarity = torch.nn.functional.cosine_similarity(image_embeddings, text_embeddings)
        # The similarity score
        clip_scores.append(np.mean(similarity.tolist()))
    return 100 * np.mean(clip_scores)

In [None]:
# scale an array of images to a new size
def scale_images(images, new_shape):
 images_list = list()
 for image in images:
   image = np.array(image)
   # resize with nearest neighbor interpolation
   new_image = resize(image, new_shape)
   # store
   images_list.append(new_image)
 return asarray(images_list)

In [None]:
def load_images_from_directory(directory):
    image_list = []
    for filename in os.listdir(directory):
        if filename.endswith(('.png', '.jpg', '.jpeg', '.bmp', '.gif')):
            img_path = os.path.join(directory, filename)
            img = Image.open(img_path)
            img_array = np.array(img)
            image_list.append(img_array)
    return image_list

In [None]:
row_base = []
row_fine = []

for cat in cats:
    print(cat['name'])
    if not os.path.exists(os.path.join(source_directory, cat["name"])):
        continue
    catIds = coco.getCatIds(catNms=[cat['name']])
    imgIds = coco.getImgIds(catIds=catIds)
    filenames = os.listdir(os.path.join(source_directory, cat["name"]))
    names_before_dot = [int(os.path.splitext(file)[0]) for file in filenames]
    og = []
    prompt = []
    for image_id in names_before_dot:
        og_path = os.path.join(os.path.join(source_directory, cat["name"]), str(image_id) + ".jpg")
        img = coco.loadImgs(imgIds[image_id])[0]
        I = io.imread(og_path)
        if I.ndim == 2:  # If the image is grayscale, convert to RGB
            I = np.stack((I,) * 3, axis=-1)
        og.append(I)
        annIds = coco_caps.getAnnIds(imgIds=img['id'])
        anns = coco_caps.loadAnns(annIds)
        captions_list = [item['caption'] for item in anns]
        prompt.append(captions_list)

    # baseline
    Mitsua = load_images_from_directory(mitsua_target_directory + cat['name'])
    SD21 = load_images_from_directory(sd21_target_directory + cat['name'])
    JUGGXL = load_images_from_directory(juggxl_target_directory + cat['name'])
    DALLE2 = load_images_from_directory(dalle2_target_directory + cat['name'])
    # Fine-tune
    DREAM = load_images_from_directory(DREAM_target_directory + cat['name'])
    LORA = load_images_from_directory(LORA_target_directory + cat['name'])
    TI = load_images_from_directory(TI_target_directory + cat['name'])

    image_og = scale_images(og, (299, 299, 3))
    image_og = preprocess_input(image_og)
    image_MITSUA = scale_images(Mitsua, (299, 299, 3))
    image_MITSUA = preprocess_input(image_MITSUA)
    image_SD21 = scale_images(SD21, (299, 299, 3))
    image_SD21 = preprocess_input(image_SD21)
    image_JUGGXL = scale_images(JUGGXL, (299, 299, 3))
    image_JUGGXL = preprocess_input(image_JUGGXL)
    image_DALLE2 = scale_images(DALLE2, (299, 299, 3))
    image_DALLE2 = preprocess_input(image_DALLE2)
    image_DREAM = scale_images(DREAM, (299, 299, 3))
    image_DREAM = preprocess_input(image_DREAM)
    image_LORA = scale_images(LORA, (299, 299, 3))
    image_LORA = preprocess_input(image_LORA)
    image_TI = scale_images(TI, (299, 299, 3))
    image_TI = preprocess_input(image_TI)

    clip_score_og = calculate_clip(clip, clip_processor, prompt, image_og)
    clip_score_MITSUA = calculate_clip(clip, clip_processor, prompt, image_MITSUA)
    clip_score_SD21 = calculate_clip(clip, clip_processor, prompt, image_SD21)
    clip_score_JUGGXL = calculate_clip(clip, clip_processor, prompt, image_JUGGXL)
    clip_score_DALLE2 = calculate_clip(clip, clip_processor, prompt, image_DALLE2)
    clip_score_DREAM = calculate_clip(clip, clip_processor, prompt, image_DREAM)
    clip_score_LORA = calculate_clip(clip, clip_processor, prompt, image_LORA)
    clip_score_TI = calculate_clip(clip, clip_processor, prompt, image_TI)
    fid_score_MITSUA = calculate_fid(fid_model, image_MITSUA, image_og)
    fid_score_SD21 = calculate_fid(fid_model, image_SD21, image_og)
    fid_score_JUGGXL = calculate_fid(fid_model, image_JUGGXL, image_og)
    fid_score_DALLE2 = calculate_fid(fid_model, image_DALLE2, image_og)
    fid_score_DREAM = calculate_fid(fid_model, image_DREAM, image_og)
    fid_score_LORA = calculate_fid(fid_model, image_LORA, image_og)
    fid_score_TI = calculate_fid(fid_model, image_TI, image_og)

    base_dict = {
        "Category": cat['name'],
        "Original_CLIP": clip_score_og,
        "MITSUA_CLIP": clip_score_MITSUA,
        "MITSUA_FID": fid_score_MITSUA,
        "SD_CLIP": clip_score_SD21,
        "SD_FID": fid_score_SD21,
        "JUGGXL_CLIP": clip_score_JUGGXL,
        "JUGGXL_FID": fid_score_JUGGXL,
        "DALLE2_CLIP": clip_score_DALLE2,
        "DALLE2_FID": fid_score_DALLE2,
        }

    fine_dict = {
        "Category": cat['name'],
        "DREAM_CLIP": clip_score_DREAM,
        "DREAM_FID": fid_score_DREAM,
        "LORA_CLIP": clip_score_LORA,
        "LORA_FID": fid_score_LORA,
        "TI_CLIP": clip_score_TI,
        "TI_FID": fid_score_TI,
        }

    row_base.append(base_dict)
    row_fine.append(fine_dict)

    print(base_dict)
    print(fine_dict)

In [None]:
df_base = pd.DataFrame(row_base)
df_fine = pd.DataFrame(row_fine)
df_base.to_csv(baseline_scores_directory, index=False)
df_fine.to_csv(finetuned_scores_directory, index=False)