In [148]:
import os
import torch
import zipfile
import numpy as np
from numpy import cov
from numpy import trace
from numpy import asarray
from skimage import metrics
from scipy.linalg import sqrtm
from numpy import iscomplexobj
import matplotlib.pyplot as plt
from numpy.random import randint
import torchvision.models as models
from skimage.transform import resize
from keras.datasets.mnist import load_data
import torchvision.transforms as transforms
from keras.applications.inception_v3 import InceptionV3
from keras.applications.inception_v3 import preprocess_input

In [149]:
%mkdir -p /csc413/project/
%cd /csc413/project

/csc413/project


In [150]:
if os.path.exists("/NST input output result.zip"):
  %mv /NST\ input\ output\ result.zip /csc413/project/NST_input_output_result.zip
elif os.path.exists("/content/NST input output result.zip"):
  %mv /content/NST\ input\ output\ result.zip /csc413/project/NST_input_output_result.zip

In [151]:
zip_path = "/csc413/project/NST_input_output_result.zip"
datadir = "/csc413/project/"
with zipfile.ZipFile(zip_path,"r") as zip_ref:
      zip_ref.extractall(datadir)

In [152]:
if os.path.exists("/cycleGAN out_image.zip"):
  %mv /cycleGAN\ out_image.zip /csc413/project/CycleGAN_output.zip
elif os.path.exists("/content/cycleGAN out_image.zip"):
  %mv /content/cycleGAN\ out_image.zip /csc413/project/CycleGAN_output.zip

zip_path = "/csc413/project/CycleGAN_output.zip"
datadir = "/csc413/project/"
with zipfile.ZipFile(zip_path,"r") as zip_ref:
      zip_ref.extractall(datadir)

In [153]:
# Define a function to load and preprocess images
def load_image_NST(base_path):
    """
    Load and preprocess the image.

    :param img_path: the path of image to load
    :return: the processed image
    """
    image_set = []
    images = sorted(os.listdir(base_path))
    print(images)
    for image in images:
      img = plt.imread(f"{base_path}/{image}")
      img = np.array(img).astype('float32') / 255.0
      image_set.append(img)
    return image_set

In [154]:
# Define a function to load and preprocess images
def load_image_CycGAN(base_path, select_pattern):
    """
    Load and preprocess the image.

    :param img_path: the path of image to load
    :return: the processed image
    """
    image_set = []
    images = sorted(os.listdir(base_path))
    for image in images:
      if select_pattern in image:
        print(image)
        img = plt.imread(f"{base_path}/{image}")
        img = np.array(img).astype('float32') / 255.0
        image_set.append(img)
    return image_set

In [155]:
content_path = "/csc413/project/NST input output result/test data/test_photo"
monet_path = "/csc413/project/NST input output result/test data/test_monet"
vangogh_path = "/csc413/project/NST input output result/test data/test_vangogh"

# NST
monet_vangogh_path = "/csc413/project/NST input output result/out_image/monet_to_vangogh"
vangogh_monet_path = "/csc413/project/NST input output result/out_image/vangogh_to_monet"
photo_monet_path = "/csc413/project/NST input output result/out_image/photo_to_monet"
photo_vangogh_path = "/csc413/project/NST input output result/out_image/photo_to_vangogh"

# cycle GAN
# cyc_gan_base_path = "/csc413/project/out_image"
# monet_vangogh_path = "monet_vangogh"
# vangogh_monet_path = "vangogh_monet"
# photo_monet_path = "photo_monet"
# photo_vangogh_path = "photo_vangogh"

out_path = photo_vangogh_path

In [156]:
# Input 4 lists of 256 x 256 x 3 numpy array
# NST
content = load_image_NST(content_path)
style1 = load_image_NST(monet_path)
style2 = load_image_NST(vangogh_path)
out = load_image_NST(out_path)

# Cycle GAN
# content = load_image_NST(content_path)
# style1 = load_image_NST(monet_path)
# style2 = load_image_NST(vangogh_path)
# out = load_image_CycGAN(cyc_gan_base_path, out_path)
out_size = 6
style_size = 10

['source_photo_1.jpg', 'source_photo_2.jpg', 'source_photo_3.jpg', 'source_photo_4.jpg', 'source_photo_5.jpg', 'source_photo_6.jpg']
['source_monet_1.jpg', 'source_monet_2.jpg', 'source_monet_3.jpg', 'source_monet_4.jpg', 'source_monet_5.jpg', 'source_monet_6.jpg']
['source_vangogh_1.jpg', 'source_vangogh_2.jpg', 'source_vangogh_3.jpg', 'source_vangogh_4.jpg', 'source_vangogh_5.jpg', 'source_vangogh_6.jpg']
['out_photo_to_vangogh_1.jpg', 'out_photo_to_vangogh_2.jpg', 'out_photo_to_vangogh_3.jpg', 'out_photo_to_vangogh_4.jpg', 'out_photo_to_vangogh_5.jpg', 'out_photo_to_vangogh_6.jpg']


In [157]:
metric = {
  "ssim_score": [],
  "style_consistency_score_style1": [],
  "style_consistency_score_style2": [],
  "fid_score": 0.
  }

Structural Similarity

In [158]:
for i in range(out_size):
  # Calculate the structural similarity index
  ssim = metrics.structural_similarity(content[i], out[i], channel_axis=2)

  # Collect the result
  metric["ssim_score"].append(float(ssim))


Gram

In [159]:
# define a function to calculate the Gram matrix of a given feature map
def gram_matrix(feature_maps):
    _, c, h, w = feature_maps.size()
    feature_maps = feature_maps.view(c, h * w)
    gram = torch.mm(feature_maps, feature_maps.t())
    return gram

# load the pre-trained VGG16 model
vgg = models.vgg16(pretrained=True).features

# set the device to GPU if available
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
vgg.to(device)


for i in range(out_size):
  acc_s1 = []
  acc_s2 = []
  for j in range(style_size):
    c_img = content[i]
    o_img = out[i]
    s1_img = style1[j]
    s2_img = style2[j]
    # set the input images as PyTorch tensors
    content_tensor = transforms.ToTensor()(c_img).unsqueeze(0).to(device)
    style1_tensor = transforms.ToTensor()(s1_img).unsqueeze(0).to(device)
    style2_tensor = transforms.ToTensor()(s2_img).unsqueeze(0).to(device)
    out_tensor = transforms.ToTensor()(o_img).unsqueeze(0).to(device)

    # extract the style features of the input and output images
    style1_features = vgg(style1_tensor)
    style2_features = vgg(style2_tensor)
    out_features = vgg(out_tensor)

    # calculate the Gram matrix distance between the style features of the input and output images
    style1_dist = torch.norm(gram_matrix(style1_features) - gram_matrix(out_features))
    style2_dist = torch.norm(gram_matrix(style2_features) - gram_matrix(out_features))
    acc_s1.append(style1_dist)
    acc_s2.append(style2_dist)

  metric["style_consistency_score_style1"].append(torch.mean(torch.stack(acc_s1), dim=0).item())
  metric["style_consistency_score_style2"].append(torch.mean(torch.stack(acc_s2), dim=0).item())


In [160]:
out = np.array(out)
style1 = np.array(style1)
style2 = np.array(style2)


In [161]:
# scale an array of images to a new size
def scale_images(images, new_shape):
 images_list = list()
 for image in images:
  # resize with nearest neighbor interpolation
  new_image = resize(image, new_shape, 0)
  # store
  images_list.append(new_image)
 return asarray(images_list)
 
# calculate frechet inception distance
def calculate_fid(model, images1, images2):
 # calculate activations
 act1 = model.predict(images1)
 act2 = model.predict(images2)
 # calculate mean and covariance statistics
 mu1, sigma1 = act1.mean(axis=0), cov(act1, rowvar=False)
 mu2, sigma2 = act2.mean(axis=0), cov(act2, rowvar=False)
 # calculate sum squared difference between means
 ssdiff = np.sum((mu1 - mu2)**2.0)
 # calculate sqrt of product between cov
 covmean = sqrtm(sigma1.dot(sigma2))
 # check and correct imaginary numbers from sqrt
 if iscomplexobj(covmean):
  covmean = covmean.real
 # calculate score
 fid = ssdiff + trace(sigma1 + sigma2 - 2.0 * covmean)
 return fid
 
# prepare the inception v3 model
model = InceptionV3(include_top=False, pooling='avg', input_shape=(299,299,3))
# define two fake collections of images
images1 = out
images2 = style1
images3 = style2
print('Prepared', images1.shape, images2.shape)
# convert integer to floating point values
images1 = images1.astype('float32')
images2 = images2.astype('float32')
images3 = images3.astype('float32')
# resize images
images1 = scale_images(images1, (299,299,3))
images2 = scale_images(images2, (299,299,3))
images3 = scale_images(images3, (299,299,3))
print('Scaled', images1.shape, images2.shape)
# pre-process images
images1 = preprocess_input(images1)
images2 = preprocess_input(images2)
images3 = preprocess_input(images3)
# fid between images1 and images1
fid1 = calculate_fid(model, images1, images2)
print('FID (style1): %.3f' % fid1)
# fid between images1 and images2
fid2 = calculate_fid(model, images1, images3)
print('FID (style2): %.3f' % fid2)
metric["fid_score"] = (fid1 + fid2) / 2

Prepared (6, 256, 256, 3) (6, 256, 256, 3)
Scaled (6, 299, 299, 3) (6, 299, 299, 3)
FID (style1): 4.685
FID (style2): 3.191


In [162]:
def list_mean(my_list):
  return sum(my_list) / len(my_list)


In [163]:
metric["ssim_score"] = list_mean(metric["ssim_score"])
metric["style_consistency_score_style1"] = list_mean(metric["style_consistency_score_style1"])
metric["style_consistency_score_style2"] = list_mean(metric["style_consistency_score_style2"])
print(metric)

{'ssim_score': 0.5157584498325983, 'style_consistency_score_style1': 3420.288818359375, 'style_consistency_score_style2': 3698.6288248697915, 'fid_score': 3.9378105424294603}


In [164]:
print(out_path)

/csc413/project/NST input output result/out_image/photo_to_vangogh
