In [1]:
import torch
import torchvision
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
import numpy as np
import time
import os
#import PIL
import glob

#import imageio
from torch.autograd import Variable
from torchvision.utils import make_grid, save_image
from IPython import display
from torchvision import datasets, transforms
import matplotlib.pyplot as plt
device = 'cuda' if torch.cuda.is_available() else 'cpu'
img_width = 32
img_height = 32
img_channels = 3
num_classes = 10
noise_dim = 100
clip_value = 0.01
train_ratio = 5
batch_size = 64
learning_rate = 0.0005
# Number of channels in the training images. For color images this is 3
nc = 3
# Size of z latent vector (i.e. size of generator input)
nz = 100
# Size of feature maps in generator
ngf = 64
# Size of feature maps in discriminator
ndf = 64
print(device)

cuda


## Calculate Inception Score

In [2]:
import torch
from torch import nn
from torch.autograd import Variable
from torch.nn import functional as F
import torch.utils.data

from torchvision.models.inception import inception_v3

import numpy as np
from scipy.stats import entropy

def inception_score(imgs, cuda=True, batch_size=128, resize=False, splits=1):
    """Computes the inception score of the generated images imgs
    imgs -- Torch dataset of (3xHxW) numpy images normalized in the range [-1, 1]
    cuda -- whether or not to run on GPU
    batch_size -- batch size for feeding into Inception v3
    splits -- number of splits
    """
    N = len(imgs)

    assert batch_size > 0
    assert N > batch_size

    # Set up dtype
    if cuda:
        dtype = torch.cuda.FloatTensor
    else:
        if torch.cuda.is_available():
            print("WARNING: You have a CUDA device, so you should probably set cuda=True")
        dtype = torch.FloatTensor

    # Set up dataloader
    dataloader = torch.utils.data.DataLoader(imgs, batch_size=batch_size)

    # Load inception model
    inception_model = inception_v3(pretrained=True, transform_input=False).type(dtype)
    inception_model.eval();
    up = nn.Upsample(size=(299, 299), mode='bilinear').type(dtype)
    def get_pred(x):
        if resize:
            x = up(x)
        x = inception_model(x)
        return F.softmax(x,dim=1).data.cpu().numpy()

    # Get predictions
    preds = np.zeros((N, 1000))

    for i, batch in enumerate(dataloader, 0):
        batch = batch.type(dtype)
        batchv = Variable(batch)
        batch_size_i = batch.size()[0]

        preds[i*batch_size:i*batch_size + batch_size_i] = get_pred(batchv)

    # Now compute the mean kl-div
    split_scores = []

    for k in range(splits):
        part = preds[k * (N // splits): (k+1) * (N // splits), :]
        py = np.mean(part, axis=0)
        scores = []
        for i in range(part.shape[0]):
            pyx = part[i, :]
            scores.append(entropy(pyx, py))
        split_scores.append(np.exp(np.mean(scores)))

    return np.mean(split_scores), np.std(split_scores)

if __name__ == '__main__':
    class IgnoreLabelDataset(torch.utils.data.Dataset):
        def __init__(self, orig):
            self.orig = orig

        def __getitem__(self, index):
            return self.orig[index][0]

        def __len__(self):
            return len(self.orig)


In [3]:
data_path = '~/project/test3_last/img_gen/'
transform=transforms.Compose([ transforms.Resize(64),
                               transforms.CenterCrop(64),
                               transforms.ToTensor(),
                               transforms.Normalize((0.5,0.5,0.5), (0.5,0.5,0.5)),])

dataset = torchvision.datasets.ImageFolder(root=data_path, transform=transform)
trainset = datasets.CIFAR10(root='~/project/data/', train=True, download=True, transform = transform)


Files already downloaded and verified


In [4]:
print("Calculating Inception Score...")

_mean, _std = inception_score(IgnoreLabelDataset(dataset), cuda=True, batch_size=64, resize=True, splits=10)
print("Calculating Inception Score...")
#_mean_real, _std_real = inception_score(IgnoreLabelDataset(trainset), cuda=True, batch_size=64, resize=True, splits=10)

Calculating Inception Score...


  "See the documentation of nn.Upsample for details.".format(mode))


Calculating Inception Score...


In [5]:
print("Inception Score Mean of GAN: ", _mean)
print("Inception Score Standard Deviation of GAN: ", _std)

#print("Inception Score Mean of CIFAR-10: ", _mean_real)
#print("Inception Score Standard Deviation of CIFAR-10: ", _std_real)

Inception Score Mean of GAN:  2.2770313106370805
Inception Score Standard Deviation of GAN:  0.07475005643341849


## Calculate FID Score

In [6]:
import scipy.linalg as linalgo

def calculate_frechet_distance(mu1, sigma1, mu2, sigma2, eps=1e-6):
    """Numpy implementation of the Frechet Distance.
    The Frechet distance between two multivariate Gaussians X_1 ~ N(mu_1, C_1)
    and X_2 ~ N(mu_2, C_2) is
            d^2 = ||mu_1 - mu_2||^2 + Tr(C_1 + C_2 - 2*sqrt(C_1*C_2)).
    """

    mu1 = np.atleast_1d(mu1)
    mu2 = np.atleast_1d(mu2)

    sigma1 = np.atleast_2d(sigma1)
    sigma2 = np.atleast_2d(sigma2)

    assert mu1.shape == mu2.shape, \
        'Training and test mean vectors have different lengths'
    assert sigma1.shape == sigma2.shape, \
        'Training and test covariances have different dimensions'

    diff = mu1 - mu2

    
    covmean, _ = linalgo.sqrtm(sigma1.dot(sigma2), disp=False)
    if not np.isfinite(covmean).all():
        msg = ('fid calculation produces singular product; '
               'adding %s to diagonal of cov estimates') % eps
        print(msg)
        offset = np.eye(sigma1.shape[0]) * eps
        covmean = linalgo.sqrtm((sigma1 + offset).dot(sigma2 + offset))

    
    if np.iscomplexobj(covmean):
        if not np.allclose(np.diagonal(covmean).imag, 0, atol=1e-3):
            m = np.max(np.abs(covmean.imag))
            raise ValueError('Imaginary component {}'.format(m))
        covmean = covmean.real

    tr_covmean = np.trace(covmean)

    return (diff.dot(diff) + np.trace(sigma1) +
            np.trace(sigma2) - 2 * tr_covmean)

In [7]:
from torchvision import models
class InceptionV3(nn.Module):
    """Pretrained InceptionV3 network returning feature maps"""

    # Index of default block of inception to return,
    # corresponds to output of final average pooling
    DEFAULT_BLOCK_INDEX = 3

    # Maps feature dimensionality to their output blocks indices
    BLOCK_INDEX_BY_DIM = {
        64: 0,   # First max pooling features
        192: 1,  # Second max pooling featurs
        768: 2,  # Pre-aux classifier features
        2048: 3  # Final average pooling features
    }

    def __init__(self,
                 output_blocks=[DEFAULT_BLOCK_INDEX],
                 resize_input=True,
                 normalize_input=True,
                 requires_grad=False):
        
        super(InceptionV3, self).__init__()

        self.resize_input = resize_input
        self.normalize_input = normalize_input
        self.output_blocks = sorted(output_blocks)
        self.last_needed_block = max(output_blocks)

        assert self.last_needed_block <= 3, \
            'Last possible output block index is 3'

        self.blocks = nn.ModuleList()

        
        inception = models.inception_v3(pretrained=True)

        # Block 0: input to maxpool1
        block0 = [
            inception.Conv2d_1a_3x3,
            inception.Conv2d_2a_3x3,
            inception.Conv2d_2b_3x3,
            nn.MaxPool2d(kernel_size=3, stride=2)
        ]
        self.blocks.append(nn.Sequential(*block0))

        # Block 1: maxpool1 to maxpool2
        if self.last_needed_block >= 1:
            block1 = [
                inception.Conv2d_3b_1x1,
                inception.Conv2d_4a_3x3,
                nn.MaxPool2d(kernel_size=3, stride=2)
            ]
            self.blocks.append(nn.Sequential(*block1))

        # Block 2: maxpool2 to aux classifier
        if self.last_needed_block >= 2:
            block2 = [
                inception.Mixed_5b,
                inception.Mixed_5c,
                inception.Mixed_5d,
                inception.Mixed_6a,
                inception.Mixed_6b,
                inception.Mixed_6c,
                inception.Mixed_6d,
                inception.Mixed_6e,
            ]
            self.blocks.append(nn.Sequential(*block2))

        # Block 3: aux classifier to final avgpool
        if self.last_needed_block >= 3:
            block3 = [
                inception.Mixed_7a,
                inception.Mixed_7b,
                inception.Mixed_7c,
                nn.AdaptiveAvgPool2d(output_size=(1, 1))
            ]
            self.blocks.append(nn.Sequential(*block3))

        for param in self.parameters():
            param.requires_grad = requires_grad

    def forward(self, inp):
        """Get Inception feature maps
        Parameters
        ----------
        inp : torch.autograd.Variable
            Input tensor of shape Bx3xHxW. Values are expected to be in
            range (0, 1)
        Returns
        -------
        List of torch.autograd.Variable, corresponding to the selected output
        block, sorted ascending by index
        """
        outp = []
        x = inp

        if self.resize_input:
            x = F.interpolate(x,
                              size=(299, 299),
                              mode='bilinear',
                              align_corners=False)

        if self.normalize_input:
            x = 2 * x - 1  # Scale from range (0, 1) to range (-1, 1)

        for idx, block in enumerate(self.blocks):
            x = block(x)
            if idx in self.output_blocks:
                outp.append(x)

            if idx == self.last_needed_block:
                break

        return outp
    
block_idx = InceptionV3.BLOCK_INDEX_BY_DIM[2048]
model = InceptionV3([block_idx])
model=model.cuda()

In [8]:
trainloader = torch.utils.data.DataLoader(trainset, batch_size=128, shuffle=True, drop_last=True)
pred_arr = np.empty((50000, 2048))

start_idx=0
for index, (images_mini_batch, labels) in enumerate(trainloader) :
        batch = images_mini_batch.to(device)

        with torch.no_grad():
            pred = model(batch)[0]

        # If model output is not scalar, apply global spatial average pooling.
        # This happens if you choose a dimensionality not equal 2048.
        if pred.size(2) != 1 or pred.size(3) != 1:
            pred = adaptive_avg_pool2d(pred, output_size=(1, 1))

        pred = pred.squeeze(3).squeeze(2).cpu().numpy()

        pred_arr[start_idx:start_idx + pred.shape[0]] = pred

        start_idx = start_idx + pred.shape[0]

mu = np.mean(pred_arr, axis=0)
sigma = np.cov(pred_arr, rowvar=False)
print(mu,sigma)

[0.31103828 0.29928794 0.2829469  ... 0.36840268 0.34556995 0.36019655] [[ 0.03760421 -0.00064592  0.00444968 ...  0.00048851  0.00363369
   0.00642787]
 [-0.00064592  0.04381326  0.00296016 ...  0.0071965   0.00821935
   0.00625975]
 [ 0.00444968  0.00296016  0.04230948 ...  0.00962975  0.0071571
  -0.00178448]
 ...
 [ 0.00048851  0.0071965   0.00962975 ...  0.08206231  0.01243663
   0.00228276]
 [ 0.00363369  0.00821935  0.0071571  ...  0.01243663  0.07701919
   0.00472482]
 [ 0.00642787  0.00625975 -0.00178448 ...  0.00228276  0.00472482
   0.0797516 ]]


In [9]:
trainloader = torch.utils.data.DataLoader(dataset, batch_size=128, shuffle=True, drop_last=True)
pred_arr = np.empty((10000, 2048))

start_idx=0
for index, (images_mini_batch, labels) in enumerate(trainloader) :
        batch = images_mini_batch.to(device)

        with torch.no_grad():
            pred = model(batch)[0]

        # If model output is not scalar, apply global spatial average pooling.
        # This happens if you choose a dimensionality not equal 2048.
        if pred.size(2) != 1 or pred.size(3) != 1:
            pred = adaptive_avg_pool2d(pred, output_size=(1, 1))

        pred = pred.squeeze(3).squeeze(2).cpu().numpy()

        pred_arr[start_idx:start_idx + pred.shape[0]] = pred

        start_idx = start_idx + pred.shape[0]

mu1 = np.mean(pred_arr, axis=0)
sigma1 = np.cov(pred_arr, rowvar=False)
print(mu1,sigma1)

[0.309703   0.2962508  0.28164412 ... 0.3665724  0.34283723 0.35606982] [[ 0.02544843 -0.00070305  0.00262264 ... -0.00040529 -0.00119207
   0.00395966]
 [-0.00070305  0.03287232  0.00121857 ...  0.00486911  0.00621496
   0.00512275]
 [ 0.00262264  0.00121857  0.03381881 ...  0.00722263  0.00449653
   0.00308527]
 ...
 [-0.00040529  0.00486911  0.00722263 ...  0.07271438  0.00884257
   0.00588787]
 [-0.00119207  0.00621496  0.00449653 ...  0.00884257  0.06009314
   0.00507112]
 [ 0.00395966  0.00512275  0.00308527 ...  0.00588787  0.00507112
   0.06465445]]


In [10]:
fid = calculate_frechet_distance(mu1,sigma1,mu,sigma)

print("FID Score:", fid)

FID Score: 11.03072341547741
