In [2]:
import os
import torch
from torchmetrics.image.kid import KernelInceptionDistance
from torchvision.io import read_image

def get_features(directory):
    image_paths = [os.path.join(directory, fname) for fname in os.listdir(directory) if fname.endswith(('.png', '.jpg'))]
    imgs = [read_image(image_path) for image_path in image_paths]
    return torch.reshape(torch.cat([p for p in imgs]), (len(imgs), 3, imgs[0].shape[1], -1))

def get_kid(folder1, folder2, to_print=True):
    a = get_features(folder1)
    b = get_features(folder2)

    kid = KernelInceptionDistance(subset_size=10)
    kid.update(a, real=True)
    kid.update(b, real=False)
    kid_score = kid.compute()
    if to_print:
        print(f"KID Score: Mean {kid_score[0]:.3f}, StdDev {kid_score[1]:.3f}")
    return kid_score

In [14]:
_ = get_kid('/home/ubuntu/textual_inversion/img/Spiderman_spideruniverse_mask',
        '/home/ubuntu/textual_inversion/outputs/txt2img-samples/baseline_spiderman/samples')



KID Score: Mean 0.174, StdDev 0.025


In [3]:
get_kid('/home/ubuntu/textual_inversion/img/Spiderman_spideruniverse_mask',
        '/home/ubuntu/textual_inversion/outputs/txt2img-samples/a-photo-of-spiderman')



KID Score: Mean 0.121, StdDev 0.019


(tensor(0.1209), tensor(0.0187))

In [7]:
get_kid('/home/ubuntu/textual_inversion/img/Spiderman_spideruniverse_mask',
        '/home/ubuntu/textual_inversion/outputs/txt2img-samples/spiderman_vec5/samples')



KID Score: Mean 0.082, StdDev 0.015


(tensor(0.0824), tensor(0.0145))

In [8]:
get_kid('/home/ubuntu/textual_inversion/img/Spiderman_spideruniverse_mask',
        '/home/ubuntu/textual_inversion/outputs/txt2img-samples/spiderman_vec20/samples')

KID Score: Mean 0.079, StdDev 0.015


(tensor(0.0786), tensor(0.0147))

In [11]:
get_kid('/home/ubuntu/textual_inversion/img/Spiderman_spideruniverse_mask',
        '/home/ubuntu/textual_inversion/outputs/txt2img-samples/a-photo-of-spiderman-vec40')

KID Score: Mean 0.114, StdDev 0.014


(tensor(0.1136), tensor(0.0141))

In [22]:
get_kid('/home/ubuntu/textual_inversion/img/Spiderman_spideruniverse_mask',
        '/home/ubuntu/textual_inversion/outputs/txt2img-samples/a-photo-of-spiderman-vec40-v2/samples')



KID Score: Mean 0.076, StdDev 0.017


(tensor(0.0765), tensor(0.0173))

In [17]:
get_kid('/home/ubuntu/textual_inversion/img/mumbattan',
        '/home/ubuntu/textual_inversion/outputsc/baseline_world_of_mumbattan/samples')

KID Score: Mean 0.251, StdDev 0.019


(tensor(0.2509), tensor(0.0186))

In [20]:
get_kid('/home/ubuntu/textual_inversion/img/mumbattan',
        '/home/ubuntu/textual_inversion/outputs/txt2img-samples/world-of-mumbattan/samples')

KID Score: Mean 0.049, StdDev 0.009


(tensor(0.0494), tensor(0.0089))

In [28]:
get_kid('/home/ubuntu/textual_inversion/img/Spiderman_mumbattan_mask/Spiderman_mumbattan_mask',
        '/home/ubuntu/textual_inversion/outputs/txt2img-samples/baseline-spiderman-in-mumbattan/samples')

KID Score: Mean 0.251, StdDev 0.012


(tensor(0.2514), tensor(0.0119))

In [24]:
get_kid('/home/ubuntu/textual_inversion/img/Spiderman_mumbattan_mask/Spiderman_mumbattan_mask',
        '/home/ubuntu/textual_inversion/outputs/txt2img-samples/merged-spiderman-vec40-mumbattan-vec1/samples')



KID Score: Mean 0.120, StdDev 0.014


(tensor(0.1197), tensor(0.0143))

In [26]:
get_kid('/home/ubuntu/textual_inversion/img/Spiderman_mumbattan_mask/Spiderman_mumbattan_mask',
        '/home/ubuntu/textual_inversion/outputs/txt2img-samples/merged-spiderman-in-mumbattan-vec1')

KID Score: Mean 0.135, StdDev 0.010


(tensor(0.1352), tensor(0.0101))

In [27]:
get_kid('/home/ubuntu/textual_inversion/img/Spiderman_mumbattan_mask/Spiderman_mumbattan_mask',
        '/home/ubuntu/textual_inversion/outputs/txt2img-samples/merged-spiderman-in-mumbattan-vec40')

KID Score: Mean 0.386, StdDev 0.021


(tensor(0.3858), tensor(0.0207))