In [None]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [None]:
!pip install ftfy regex tqdm
#!pip install transformers
!pip install open_clip_torch
!pip install lpips
!pip install piq

# Image Preprocessing

In [None]:
import lpips
import torch
import torchvision.transforms as transforms
from PIL import Image
import torch.nn.functional as F


# Load image
def load_image(path):
    img = Image.open(path).convert('RGB')
    transform = transforms.Compose([
        transforms.Resize((512, 512)),
        transforms.ToTensor(),
        transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))
    ])
    return transform(img).unsqueeze(0)

# Load image without normalize (for MS-SSIM)
def load_image_unNorm(path):
    img = Image.open(path).convert('RGB')
    t = transforms.Compose([transforms.Resize((512, 512)),
                            transforms.ToTensor()])
    return t(img).unsqueeze(0)


# Metric

In [None]:
import lpips
import torch
import torchvision.transforms as transforms
import open_clip
from PIL import Image
from piq import multi_scale_ssim
from torchvision.utils import save_image

device = "cuda" if torch.cuda.is_available() else "cpu"
source_image_path = "/Source_img.png" # Source image path
target_image_path = "/Target_img.png" # Target image path
text_prompt = 'a lineart sketch'  # Prompt (for CLIP score)

#LPIPS
Source_image = load_image(source_image_path)
Target_image = load_image(target_image_path)
lpips_model = lpips.LPIPS(net='alex').to(device)
Lpips = lpips_model(Source_image.to(device), Target_image.to(device), normalize=True)

#MS-SSIM
Source_image_unNorm = load_image_unNorm(source_image_path)
Target_image_unNorm = load_image_unNorm(target_image_path)
MS_SSIM = multi_scale_ssim(Source_image_unNorm, Target_image_unNorm, data_range=1.0)

#CLIP score
clip_model, _, clip_preprocess = open_clip.create_model_and_transforms('ViT-B-32', pretrained='laion2b_s34b_b79k')
tokenizer = open_clip.get_tokenizer('ViT-B-32')
clip_model.eval().cuda()

image = clip_preprocess(Image.open(source_image_path).convert("RGB")).unsqueeze(0).cuda()
text = tokenizer([text_prompt]).cuda()

with torch.no_grad():
    img_feat = clip_model.encode_image(image)
    txt_feat = clip_model.encode_text(text)

    img_feat /= img_feat.norm(dim=-1, keepdim=True)
    txt_feat /= txt_feat.norm(dim=-1, keepdim=True)

    CLIP_score = (img_feat @ txt_feat.T)



print('MS_SSIM:',MS_SSIM.item()) # Much bigger much better
print('LPIPS:',Lpips.item())   # Much lower much better
print('CLIP_score:',CLIP_score.item()) # Much bigger much better
