# Inception Score on Custom Classifier

## Preparations

In [100]:
#!g1.1
import numpy as np
import torch
import torch.nn as nn
from IPython.display import clear_output
from torch.autograd import Variable
import os
import torch
import pickle
from torch.utils.data import DataLoader
from PIL import Image
from torchvision import transforms, datasets
from torchvision.transforms import functional as F

import numpy as np
import torch
from PIL import Image
from scipy import linalg

from IPython.display import clear_output
from torchvision.utils import save_image

%matplotlib inline

In [79]:
#!g1.1
from numpy import expand_dims
from numpy import log
from numpy import mean
from numpy import exp

In [80]:
#!g1.1
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

In [81]:
#!g1.1
pic_width = 64

## Init Classifier and Load Weights

In [82]:
#!g1.1
class Classifier(nn.Module):
    def __init__(self):
        super().__init__()
        self.layer1 = nn.Sequential(
            nn.Conv2d(3, 32, kernel_size=3, stride=1, padding=1),
            nn.BatchNorm2d(32),
            nn.ReLU(),
            nn.MaxPool2d(kernel_size=2, stride=2),
            nn.Conv2d(32, 64, kernel_size=3, stride=1, padding=1),
            nn.BatchNorm2d(64),
            nn.ReLU(),
            nn.MaxPool2d(kernel_size=2, stride=2),
            nn.Conv2d(64, 128, kernel_size=3, stride=1, padding=1),
            nn.BatchNorm2d(128),
            nn.ReLU(),
            nn.MaxPool2d(kernel_size=2, stride=2, padding=1)
        )
        self.layer2 = nn.Sequential(
            nn.Linear(10368, 625), # 4 * 4 * 128
            nn.BatchNorm1d(625),
            nn.ReLU(),
            nn.Linear(625, 22)
        )        
    
    def forward(self, x):
        out = self.layer1(x)
        out = out.view(out.size(0), -1)
        return self.layer2(out)

In [83]:
#!g1.1
model = Classifier()
model.to(device)
model.load_state_dict(torch.load('./classifier.w'))
model.eval()

Classifier(
  (layer1): Sequential(
    (0): Conv2d(3, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (1): BatchNorm2d(32, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (2): ReLU()
    (3): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (4): Conv2d(32, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (5): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (6): ReLU()
    (7): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (8): Conv2d(64, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (9): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (10): ReLU()
    (11): MaxPool2d(kernel_size=2, stride=2, padding=1, dilation=1, ceil_mode=False)
  )
  (layer2): Sequential(
    (0): Linear(in_features=10368, out_features=625, bias=True)
    (1): BatchNorm1d(625, eps=1e-05, momentum=0.1, affine=True, track_running

## Make datasets

In [None]:
#!g1.1
# Spatial size of training images. All images will be resized to this
#   size using a transformer.
image_size = pic_width = 64

# Number of channels in the training images. For color images this is 3
nc = 3

# Size of z latent vector (i.e. size of generator input)
nz = 100

# Size of feature maps in generator
ngf = 64

# Size of feature maps in discriminator
ndf = 64

# No. of GPU
ngpu = 1

device = torch.device('cuda' if (torch.cuda.is_available() and ngpu > 0) else 'cpu')

In [None]:
#!g1.1
class Generator(nn.Module):
    def __init__(self, ngpu):
        super(Generator, self).__init__()
        self.ngpu = ngpu
        self.main = nn.Sequential(
            # input is Z, going into a convolution
            nn.ConvTranspose2d( nz, ngf * 8, 4, 1, 0, bias=False),
            nn.BatchNorm2d(ngf * 8),
            nn.ReLU(True),
            # state size. (ngf*8) x 4 x 4
            nn.ConvTranspose2d(ngf * 8, ngf * 4, 4, 2, 1, bias=False),
            nn.BatchNorm2d(ngf * 4),
            nn.ReLU(True),
            # state size. (ngf*4) x 8 x 8
            nn.ConvTranspose2d( ngf * 4, ngf * 2, 4, 2, 1, bias=False),
            nn.BatchNorm2d(ngf * 2),
            nn.ReLU(True),
            # state size. (ngf*2) x 16 x 16
            nn.ConvTranspose2d( ngf * 2, ngf, 4, 2, 1, bias=False),
            nn.BatchNorm2d(ngf),
            nn.ReLU(True),
            # state size. (ngf) x 32 x 32
            nn.ConvTranspose2d( ngf, nc, 4, 2, 1, bias=False),
            nn.Tanh()
            # state size. (nc) x 64 x 64
        )

    def forward(self, input):
        if input.is_cuda and self.ngpu > 1:
            output = nn.parallel.data_parallel(self.main, input, range(self.ngpu))
        else:
            output = self.main(input)
        return output

netG = Generator(ngpu).to(device)
if (device.type == 'cuda') and (ngpu > 1):
    netG = nn.DataParallel(netG, list(range(ngpu)))
print(netG)
netG.load_state_dict(torch.load('./dcgan_aniG.w'))
netG.eval()

In [None]:
#!g1.1
batches = 800
it = 0

transform = transforms.Compose([
    transforms.Normalize((-1, -1, -1), (2, 2, 2)),
    transforms.ToPILImage()
])

def color_transform(x):
    x = transforms.functional.adjust_contrast(x, 1.45)
    return x

rev_transform = transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5)),
])

for i in range(batches):
    imgs = netG(torch.randn(64, nz, 1, 1, device=device)).detach().cpu()
    for img in imgs:
        img = rev_transform(color_transform(transform(img)))
        save_image(img, f'./dcgan_data/fake/{it}.png')
        it += 1
        if it % 100 == 0:
            clear_output(True)
            print(it)
print('Done!')

In [None]:
#!g1.1
class ResidualBlock(nn.Module):
    def __init__(self, inplanes, planes, kernel_size=3, stride=1, downsample=None, groups=1):
        super(ResidualBlock, self).__init__()
        p = kernel_size//2
        self.conv1 = nn.Sequential(
            nn.Conv2d(inplanes, planes, kernel_size, stride=stride, padding=p),
            nn.LeakyReLU(0.2)
        )
        self.conv2 = nn.Sequential(
            nn.Conv2d(planes, planes, kernel_size, padding=p),
            nn.LeakyReLU(0.2)
        )
        self.proj = nn.Conv2d(inplanes, planes, 1) if inplanes != planes else None
    
    def forward(self, x):
        identity = x
        
        y = self.conv1(x)
        y = self.conv2(y)
        
        identity = identity if self.proj is None else self.proj(identity)
        y = y + identity
        return y

class R1Generator(nn.Module):
    """
        Convolutional Generator
    """
    def __init__(self, out_channel=1, n_filters=128, n_noise=512):
        super(R1Generator, self).__init__()
        self.fc = nn.Linear(n_noise, 1024*4*4)
        self.G = nn.Sequential(
            ResidualBlock(1024, 512),
            nn.Upsample(scale_factor=2, mode='bilinear', align_corners=True), # (N, 512, 8, 8)
            ResidualBlock(512, 256),
            nn.Upsample(scale_factor=2, mode='bilinear', align_corners=True), # (N, 256, 16, 16)
            ResidualBlock(256, 128),
            nn.Upsample(scale_factor=2, mode='bilinear', align_corners=True), # (N, 128, 32, 32)
            ResidualBlock(128, 64),
            nn.Upsample(scale_factor=2, mode='bilinear', align_corners=True), # (N, 64, 64, 64)
            ResidualBlock(64, 64),
            nn.Conv2d(64, out_channel, 3, padding=1) # (N, 3, 64, 64)
        )
        
    def forward(self, z):
        B = z.size(0)
        h = self.fc(z)
        h = h.view(B, 1024, 4, 4)
        x = self.G(h)
        return x

G = R1Generator(out_channel=3, n_noise=256).to('cuda')
G.load_state_dict(torch.load('./r1gan_aniG.w'))
G.eval()

In [None]:
#!g1.1
batches = 800
it = 0

for i in range(batches):
    z = (torch.rand(size=[64, 256])*2-1).to('cuda')
    imgs = G(z).detach().cpu()
    imgs2d = torch.cat([torch.cat([imgs[8*j+i] for i in range(8)], dim=1) for j in range(8)], dim=2)
    for img in imgs:
        save_image(img, f'./r1gan_data/fake/{it}.png')
        it += 1
        if it % 100 == 0:
            print(it)
            clear_output(True)
            
print('Done!')

## Load datasets

In [90]:
#!g1.1
data_transform = transforms.ToTensor()
fake_r1dataset = datasets.ImageFolder(root='./r1gan_data', transform=data_transform)
fake_dcdataset = datasets.ImageFolder(root='./dcgan_data', transform=data_transform)

In [91]:
#!g1.1
r1_dl = DataLoader(fake_r1dataset, batch_size = 64, shuffle = True, drop_last = True)
dc_dl = DataLoader(fake_dcdataset, batch_size = 64, shuffle = True, drop_last = True)

## Calculate probabilities

### DCGAN

In [92]:
#!g1.1

classes_eye = None
classes_hair = None

for i, (batch_X, batch_Y) in enumerate(dc_dl):
    X = Variable(batch_X.to(device))
    Y_pred = model(X)
    Y_pred_eye = Y_pred[:, :10]
    if classes_eye is None:
        classes_eye = Y_pred_eye.detach().cpu()
    else:
        classes_eye = torch.cat((classes_eye, Y_pred_eye.detach().cpu()), dim=0)
    Y_pred_hair = Y_pred[:, 10:]
    if classes_hair is None:
        classes_hair = Y_pred_hair.detach().cpu()
    else:
        classes_hair = torch.cat((classes_hair, Y_pred_hair.detach().cpu()), dim=0)
    if i % 100 == 0:
        clear_output(True)
        print(i)

700


## Calculate inception score

In [93]:
#!g1.1
def calculate_inception_score(p_yx, eps=1E-16):
    # calculate p(y)
    p_y = expand_dims(p_yx.mean(axis=0), 0)
    # kl divergence for each image
    kl_d = p_yx * (log(p_yx + eps) - log(p_y + eps))
    # sum over classes
    sum_kl_d = kl_d.sum(axis=1)
    
    # average over images
    avg_kl_d = mean(sum_kl_d)
    # undo the logs
    is_score = exp(avg_kl_d)
    return is_score

In [94]:
#!g1.1

p_dc_eye = nn.Softmax(dim=-1)(classes_eye)

p_dc_hair = nn.Softmax(dim=-1)(classes_hair)

classes_eye_numpy = p_dc_eye.detach().cpu().numpy()
classes_hair_numpy = p_dc_hair.detach().cpu().numpy()

print('DCGAN inception scores:')

dcgan_eye_is = calculate_inception_score(classes_eye_numpy)
print(f'Inception score (eyes): {dcgan_eye_is}')

dcgan_hair_is = calculate_inception_score(classes_hair_numpy)
print(f'Inception score (hair): {dcgan_hair_is}')

DCGAN inception scores:
Inception score (eyes): 4.400845527648926
Inception score (hair): 5.429129123687744


## R1GAN

In [87]:
#!g1.1
classes_eye_r1 = None
classes_hair_r1 = None

for i, (batch_X, batch_Y) in enumerate(r1_dl):
    X = Variable(batch_X.to(device))
    Y_pred = model(X)
    Y_pred_eye = Y_pred[:, :10]
    if classes_eye_r1 is None:
        classes_eye_r1 = Y_pred_eye.detach().cpu()
    else:
        classes_eye_r1 = torch.cat((classes_eye_r1, Y_pred_eye.detach().cpu()), dim=0)
    Y_pred_hair = Y_pred[:, 10:]
    if classes_hair_r1 is None:
        classes_hair_r1 = Y_pred_hair.detach().cpu()
    else:
        classes_hair_r1 = torch.cat((classes_hair_r1, Y_pred_hair.detach().cpu()), dim=0)
    if i % 100 == 0:
        clear_output(True)
        print(i)

700


In [88]:
#!g1.1

p_dc_eye_r1 = nn.Softmax(dim=-1)(classes_eye_r1)

p_dc_hair_r1 = nn.Softmax(dim=-1)(classes_hair_r1)

classes_eye_r1_numpy = p_dc_eye_r1.detach().cpu().numpy()
classes_hair_r1_numpy = p_dc_hair_r1.detach().cpu().numpy()

print('R1GAN inception scores:')

r1gan_eye_is = calculate_inception_score(classes_eye_r1_numpy)
print(f'Inception score (eyes): {r1gan_eye_is}')

r1gan_hair_is = calculate_inception_score(classes_hair_r1_numpy)
print(f'Inception score (hair): {r1gan_hair_is}')

R1GAN inception scores:
Inception score (eyes): 5.746713161468506
Inception score (hair): 7.197906970977783


## Inception score on real dataset

In [64]:
#!g1.1
class Anime_Dataset:
    def __init__(self, root, class_num, transform):
        self.root = root
        self.img_folder = os.path.join(self.root, 'images')
        self.label_file = os.path.join(self.root, 'labels.pkl')
        self.img_files = os.listdir(self.img_folder)
        self.labels = pickle.load(open(self.label_file, 'rb'))
        self.preprocess()
        self.class_num = class_num
        self.transform = transform
        
        assert(len(self.img_files) <= len(self.labels))
    
    def preprocess(self):
        new_label = {}
        for img, tag in self.labels.items():
            if tag[-1] is None:
                new_label[img] = tag[:-1]
        self.labels = new_label
        self.img_files = [path for path in self.img_files if os.path.splitext(path)[0] in self.labels]
        print(len(self.labels), len(self.img_files))
    
    def color_transform(self, x):
        x = F.adjust_saturation(x, 2.5)
        x = F.adjust_gamma(x, 0.7)
        x = F.adjust_contrast(x, 1.2)
        return x
        
    def __len__(self):
        return len(self.img_files)

    def __getitem__(self, idx):
        img = Image.open(os.path.join(self.img_folder, self.img_files[idx]))
        img = self.color_transform(img)
        img = self.transform(img)
        filename = os.path.splitext(self.img_files[idx])[0]
        label = self.labels[filename]
        
        one_hots = []
        mask = []
        for i, c in enumerate(self.class_num):
            l = torch.zeros(c)
            m = torch.zeros(c)
            if label[i]:
                l[label[i]] = 1
                m = 1 - m # create mask
            one_hots.append(l)
            mask.append(m)
        one_hots = torch.cat(one_hots, 0)
        mask = torch.cat(mask, 0)
        return img, one_hots, mask

def get_anime_dataloader(root, classes, batch_size):
    
    transform = transforms.Compose([
        transforms.RandomHorizontalFlip(p = 0.5),
        transforms.Resize((pic_width, pic_width)),
        transforms.ToTensor(),
        transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))
    ])
    dataset = Anime_Dataset(root, classes, transform)

    train_size = int(0.9 * len(dataset))
    test_size = len(dataset) - train_size

    train_dataset, test_dataset = torch.utils.data.random_split(dataset, [train_size, test_size])

    train_loader = DataLoader(train_dataset, batch_size = batch_size, shuffle = True, drop_last = True)
    test_loader = DataLoader(test_dataset, batch_size = batch_size, shuffle = True, drop_last = True)
    return train_loader, test_loader
    
def denorm(img):
    """ Denormalize input image tensor. (From [0,1] -> [-1,1]) 
    
    Args:
        img: input image tensor.
    """
    
    output = img / 2 + 0.5
    return output.clamp(0, 1)

In [65]:
#!g1.1
real_dl, _ = get_anime_dataloader('./data', (10, 12), 64)

36740 36740


In [73]:
#!g1.1
classes_eye_real = None
classes_hair_real = None

for i, (batch_X, batch_Y, _) in enumerate(real_dl):
    X = Variable(batch_X.to(device))
    Y_pred = model(X)
    Y_pred_eye = Y_pred[:, :10]
    if classes_eye_real is None:
        classes_eye_real = Y_pred_eye.detach().cpu()
    else:
        classes_eye_real = torch.cat((classes_eye_real, Y_pred_eye.detach().cpu()), dim=0)
    Y_pred_hair = Y_pred[:, 10:]
    if classes_hair_real is None:
        classes_hair_real = Y_pred_hair.detach().cpu()
    else:
        classes_hair_real = torch.cat((classes_hair_real, Y_pred_hair.detach().cpu()), dim=0)
    if i % 100 == 0:
        clear_output(True)
        print(i)

500


In [75]:
#!g1.1
p_real_eye = nn.Softmax(dim=-1)(classes_eye_real)

p_real_hair = nn.Softmax(dim=-1)(classes_hair_real)

classes_eye_real_numpy = p_real_eye.detach().cpu().numpy()
classes_hair_real_numpy = p_real_hair.detach().cpu().numpy()

print('Inception scores (real data):')

real_eye_is = calculate_inception_score(classes_eye_real_numpy)
print(f'Inception score (eyes): {real_eye_is}')

real_hair_is = calculate_inception_score(classes_hair_real_numpy)
print(f'Inception score (hair): {real_hair_is}')

Inception scores (real data):
Inception score (eyes): 9.854829788208008
Inception score (hair): 11.45889949798584


## Frechet Inception Distance

In [95]:
#!g1.1
eye_real_mean = np.mean(classes_eye_real_numpy, axis=0)
eye_real_cov = np.cov(classes_eye_real_numpy, rowvar=False)

hair_real_mean = np.mean(classes_hair_real_numpy, axis=0)
hair_real_cov = np.cov(classes_hair_real_numpy, rowvar=False)

In [96]:
#!g1.1
eye_r1_mean = np.mean(classes_eye_r1_numpy, axis=0)
eye_r1_cov = np.cov(classes_eye_r1_numpy, rowvar=False)

hair_r1_mean = np.mean(classes_hair_r1_numpy, axis=0)
hair_r1_cov = np.cov(classes_hair_r1_numpy, rowvar=False)

In [97]:
#!g1.1
eye_dc_mean = np.mean(classes_eye_numpy, axis=0)
eye_dc_cov = np.cov(classes_eye_numpy, rowvar=False)

hair_dc_mean = np.mean(classes_hair_numpy, axis=0)
hair_dc_cov = np.cov(classes_hair_numpy, rowvar=False)

In [98]:
#!g1.1
def calculate_frechet_distance(mu1, sigma1, mu2, sigma2, eps=1e-6):
    """Numpy implementation of the Frechet Distance.
    The Frechet distance between two multivariate Gaussians X_1 ~ N(mu_1, C_1)
    and X_2 ~ N(mu_2, C_2) is
            d^2 = ||mu_1 - mu_2||^2 + Tr(C_1 + C_2 - 2*sqrt(C_1*C_2)).
    Stable version by Dougal J. Sutherland.
    Params:
    -- mu1   : Numpy array containing the activations of a layer of the
               inception net (like returned by the function 'get_predictions')
               for generated samples.
    -- mu2   : The sample mean over activations, precalculated on an
               representative data set.
    -- sigma1: The covariance matrix over activations for generated samples.
    -- sigma2: The covariance matrix over activations, precalculated on an
               representative data set.
    Returns:
    --   : The Frechet Distance.
    """

    mu1 = np.atleast_1d(mu1)
    mu2 = np.atleast_1d(mu2)

    sigma1 = np.atleast_2d(sigma1)
    sigma2 = np.atleast_2d(sigma2)

    assert mu1.shape == mu2.shape, \
        'Training and test mean vectors have different lengths'
    assert sigma1.shape == sigma2.shape, \
        'Training and test covariances have different dimensions'

    diff = mu1 - mu2

    # Product might be almost singular
    covmean, _ = linalg.sqrtm(sigma1.dot(sigma2), disp=False)
    if not np.isfinite(covmean).all():
        msg = ('fid calculation produces singular product; '
               'adding %s to diagonal of cov estimates') % eps
        print(msg)
        offset = np.eye(sigma1.shape[0]) * eps
        covmean = linalg.sqrtm((sigma1 + offset).dot(sigma2 + offset))

    # Numerical error might give slight imaginary component
    if np.iscomplexobj(covmean):
        if not np.allclose(np.diagonal(covmean).imag, 0, atol=1e-3):
            m = np.max(np.abs(covmean.imag))
            raise ValueError('Imaginary component {}'.format(m))
        covmean = covmean.real

    tr_covmean = np.trace(covmean)

    return (diff.dot(diff) + np.trace(sigma1)
            + np.trace(sigma2) - 2 * tr_covmean)

In [101]:
#!g1.1
fid_r1_eye = calculate_frechet_distance(eye_r1_mean, eye_r1_cov, eye_real_mean, eye_real_cov, eps=1e-6)
fid_r1_hair = calculate_frechet_distance(hair_r1_mean, hair_r1_cov, hair_real_mean, hair_real_cov, eps=1e-6)

fid_dc_eye = calculate_frechet_distance(eye_dc_mean, eye_dc_cov, eye_real_mean, eye_real_cov, eps=1e-6)
fid_dc_hair = calculate_frechet_distance(hair_dc_mean, hair_dc_cov, hair_real_mean, hair_real_cov, eps=1e-6)

print('Fréchet Inception Distance (DCGAN)')
print(f'Eyes: {fid_dc_eye}')
print(f'Hair: {fid_dc_hair}')
print()
print('Fréchet Inception Distance (R1GAN)')
print(f'Eyes: {fid_r1_eye}')
print(f'Hair: {fid_r1_hair}')

Fréchet Inception Distance (DCGAN)
Eyes: 0.34578143064731726
Hair: 0.2833714299625498

Fréchet Inception Distance (R1GAN)
Eyes: 0.20036083026053686
Hair: 0.17117296286300943
