# CGAN

# Imports

In [1]:
import os
import numpy as np
import torchvision.transforms as transforms
from torchvision.utils import save_image
from torch.utils.data import Dataset, DataLoader
from torchvision import datasets
from torch.autograd import Variable
import torch.nn as nn
import torch.nn.functional as F
import torch
import torch.optim as optim

FloatTensor = torch.FloatTensor
LongTensor = torch.LongTensor

if torch.backends.mps.is_available():
    device = torch.device("mps")
elif torch.cuda.is_available():
    device = torch.device("cuda")
else:
    device = torch.device("cpu")

print(device)

mps


# Parameters

In [2]:
os.makedirs("./images/cgan", exist_ok=True)
os.makedirs("./models", exist_ok=True)

params = {
    "n_epochs" : 20,
    "batch_size" : 64,
    "lr" : 2e-4,
    "b1" : 0.5,
    "b2" : 0.999,
    #"n_cpu" : 8,
    "n_classes" : 10,
    "latent_dim" : 100,
    "img_size" : 32,
    "channels" : 1,
    "sample_interval" : 400,
}

img_shape = (params['channels'], params['img_size'], params['img_size'])

# Dataset - MNIST

In [3]:
# Configure data loader
os.makedirs("./data/mnist", exist_ok=True)

dataset = datasets.MNIST(
            "./data/mnist",
            train=True,
            download=True,
            transform=transforms.Compose(
                [transforms.Resize(params['img_size']), transforms.ToTensor(), transforms.Normalize([0.5], [0.5])]
            ),
        )
dataloader = DataLoader(
    dataset,
    batch_size=params['batch_size'],
    shuffle=True,
)

testset = datasets.MNIST(
            "./data/mnist",
            train=False,
            download=True,
            transform=transforms.Compose(
                [transforms.Resize(params['img_size']), transforms.ToTensor(), transforms.Normalize([0.5], [0.5])]
            ),
        )
testloader = DataLoader(
    testset,
    batch_size=params['batch_size'],
    shuffle=True,
)

Downloading http://yann.lecun.com/exdb/mnist/train-images-idx3-ubyte.gz


3.3%

Downloading http://yann.lecun.com/exdb/mnist/train-images-idx3-ubyte.gz to ./data/mnist/MNIST/raw/train-images-idx3-ubyte.gz


100.0%


Extracting ./data/mnist/MNIST/raw/train-images-idx3-ubyte.gz to ./data/mnist/MNIST/raw

Downloading http://yann.lecun.com/exdb/mnist/train-labels-idx1-ubyte.gz
Downloading http://yann.lecun.com/exdb/mnist/train-labels-idx1-ubyte.gz to ./data/mnist/MNIST/raw/train-labels-idx1-ubyte.gz


100.0%
6.0%

Extracting ./data/mnist/MNIST/raw/train-labels-idx1-ubyte.gz to ./data/mnist/MNIST/raw

Downloading http://yann.lecun.com/exdb/mnist/t10k-images-idx3-ubyte.gz
Downloading http://yann.lecun.com/exdb/mnist/t10k-images-idx3-ubyte.gz to ./data/mnist/MNIST/raw/t10k-images-idx3-ubyte.gz


100.0%


Extracting ./data/mnist/MNIST/raw/t10k-images-idx3-ubyte.gz to ./data/mnist/MNIST/raw

Downloading http://yann.lecun.com/exdb/mnist/t10k-labels-idx1-ubyte.gz
Downloading http://yann.lecun.com/exdb/mnist/t10k-labels-idx1-ubyte.gz to ./data/mnist/MNIST/raw/t10k-labels-idx1-ubyte.gz


100.0%

Extracting ./data/mnist/MNIST/raw/t10k-labels-idx1-ubyte.gz to ./data/mnist/MNIST/raw






# Generator & Discriminator

In [4]:
class Generator(nn.Module):
    def __init__(self):
        super(Generator, self).__init__()

        self.label_emb = nn.Embedding(params['n_classes'], params['n_classes'])

        def block(in_feat, out_feat, normalize=True):
            layers = [nn.Linear(in_feat, out_feat)]
            if normalize:
                layers.append(nn.BatchNorm1d(out_feat, 0.8))
            layers.append(nn.LeakyReLU(0.2, inplace=True))
            return layers

        self.model = nn.Sequential(
            *block(params['latent_dim'] + params['n_classes'], 128, normalize=False),
            *block(128, 256),
            *block(256, 512),
            *block(512, 1024),
            nn.Linear(1024, int(np.prod(img_shape))),
            nn.Tanh()
        )

    def forward(self, noise, labels):
        # Concatenate label embedding and image to produce input
        gen_input = torch.cat((self.label_emb(labels), noise), -1)
        img = self.model(gen_input)
        img = img.view(img.size(0), *img_shape)
        return img


class Discriminator(nn.Module):
    def __init__(self):
        super(Discriminator, self).__init__()

        self.label_embedding = nn.Embedding(params['n_classes'], params['n_classes'])

        self.model = nn.Sequential(
            nn.Linear(params['n_classes'] + int(np.prod(img_shape)), 512),
            nn.LeakyReLU(0.2, inplace=True),
            nn.Linear(512, 512),
            nn.Dropout(0.4),
            nn.LeakyReLU(0.2, inplace=True),
            nn.Linear(512, 512),
            nn.Dropout(0.4),
            nn.LeakyReLU(0.2, inplace=True),
            nn.Linear(512, 1),
        )

    def forward(self, img, labels):
        # Concatenate label embedding and image to produce input
        d_in = torch.cat((img.view(img.size(0), -1), self.label_embedding(labels)), -1)
        validity = self.model(d_in)
        return validity

# Training

In [5]:
# Loss functions
adversarial_loss = torch.nn.MSELoss()

# Initialize generator and discriminator
generator = Generator()
discriminator = Discriminator()
#adversarial_loss.to(device)

generator.to(device)
discriminator.to(device)

# Optimizers
optimizer_G = torch.optim.Adam(generator.parameters(), lr=params['lr'], betas=(params['b1'], params['b2']))
optimizer_D = torch.optim.Adam(discriminator.parameters(), lr=params['lr'], betas=(params['b1'], params['b2']))

In [6]:
def sample_image(n_row, batches_done):
    """Saves a grid of generated digits ranging from 0 to n_classes"""
    # Sample noise
    z = Variable(FloatTensor(np.random.normal(0, 1, (n_row ** 2, params['latent_dim'])))).to(device)
    # Get labels ranging from 0 to n_classes for n rows
    labels = np.array([num for _ in range(n_row) for num in range(n_row)])
    labels = Variable(LongTensor(labels)).to(device)
    gen_imgs = generator(z, labels)
    save_image(gen_imgs.data, "./images/cgan/%d.png" % batches_done, nrow=n_row, normalize=True)

# ----------
#  Training
# ----------

for epoch in range(params['n_epochs']):
    for i, (imgs, labels) in enumerate(dataloader):

        batch_size = imgs.shape[0]

        # Adversarial ground truths
        valid = Variable(FloatTensor(batch_size, 1).fill_(1.0), requires_grad=False).to(device)
        fake = Variable(FloatTensor(batch_size, 1).fill_(0.0), requires_grad=False).to(device)

        # Configure input
        real_imgs = Variable(imgs.type(FloatTensor)).to(device)
        labels = Variable(labels.type(LongTensor)).to(device)

        # -----------------
        #  Train Generator
        # -----------------

        optimizer_G.zero_grad()

        # Sample noise and labels as generator input
        z = Variable(FloatTensor(np.random.normal(0, 1, (batch_size, params['latent_dim'])))).to(device)
        gen_labels = Variable(LongTensor(np.random.randint(0, params['n_classes'], batch_size))).to(device)

        # Generate a batch of images
        gen_imgs = generator(z, gen_labels)

        # Loss measures generator's ability to fool the discriminator
        validity = discriminator(gen_imgs, gen_labels)
        g_loss = adversarial_loss(validity, valid)

        g_loss.backward()
        optimizer_G.step()

        # ---------------------
        #  Train Discriminator
        # ---------------------

        optimizer_D.zero_grad()

        # Loss for real images
        validity_real = discriminator(real_imgs, labels)
        d_real_loss = adversarial_loss(validity_real, valid)

        # Loss for fake images
        validity_fake = discriminator(gen_imgs.detach(), gen_labels)
        d_fake_loss = adversarial_loss(validity_fake, fake)

        # Total discriminator loss
        d_loss = (d_real_loss + d_fake_loss) / 2

        d_loss.backward()
        optimizer_D.step()

        batches_done = epoch * len(dataloader) + i
        if batches_done % params['sample_interval'] == 0:
            sample_image(n_row=10, batches_done=batches_done)

    print(
        "[Epoch %d/%d] [D loss: %f] [G loss: %f]"
        % (epoch+1, params['n_epochs'], d_loss.item(), g_loss.item())
    )

torch.save({
    'gen' : generator.state_dict(),
    'disc' : discriminator.state_dict(),
}, './models/cgan.pth')

[Epoch 1/20] [D loss: 0.057795] [G loss: 0.732554]
[Epoch 2/20] [D loss: 0.096251] [G loss: 0.574727]
[Epoch 3/20] [D loss: 0.104178] [G loss: 0.689314]
[Epoch 4/20] [D loss: 0.127857] [G loss: 0.611116]
[Epoch 5/20] [D loss: 0.105671] [G loss: 0.554928]
[Epoch 6/20] [D loss: 0.201496] [G loss: 0.829118]
[Epoch 7/20] [D loss: 0.164122] [G loss: 0.404854]
[Epoch 8/20] [D loss: 0.160374] [G loss: 0.511463]
[Epoch 9/20] [D loss: 0.207812] [G loss: 0.363338]
[Epoch 10/20] [D loss: 0.178636] [G loss: 0.469659]
[Epoch 11/20] [D loss: 0.213581] [G loss: 0.429160]
[Epoch 12/20] [D loss: 0.229973] [G loss: 0.274362]
[Epoch 13/20] [D loss: 0.196420] [G loss: 0.469212]
[Epoch 14/20] [D loss: 0.227816] [G loss: 0.422976]
[Epoch 15/20] [D loss: 0.224761] [G loss: 0.309915]
[Epoch 16/20] [D loss: 0.205087] [G loss: 0.404041]
[Epoch 17/20] [D loss: 0.208525] [G loss: 0.335747]
[Epoch 18/20] [D loss: 0.215293] [G loss: 0.391546]
[Epoch 19/20] [D loss: 0.244499] [G loss: 0.354013]
[Epoch 20/20] [D loss

# Evaluation

### Inception Score

In [7]:
class LeNet(nn.Module):
    def __init__(self):
        super(LeNet, self).__init__()
        # Layer 1: Convolutional. Input = 32x32x1. Output = 28x28x6.
        self.conv1 = nn.Conv2d(1, 6, (5,5))
        # Layer 2: Convolutional. Output = 10x10x16.
        self.conv2 = nn.Conv2d(6, 16, (5,5))
        # Layer 3: Fully Connected. Input = 400. Output = 120.
        self.fc1   = nn.Linear(400, 120)
        # Layer 4: Fully Connected. Input = 120. Output = 84.
        self.fc2   = nn.Linear(120, 84)
        # Layer 5: Fully Connected. Input = 84. Output = 10.
        self.fc3   = nn.Linear(84, 10)
    def forward(self, x):
        # Activation. # Pooling. Input = 28x28x6. Output = 14x14x6.
        x = F.max_pool2d(F.relu(self.conv1(x)), (2,2))
         # Activation. # Pooling. Input = 10x10x16. Output = 5x5x16.
        x = F.max_pool2d(F.relu(self.conv2(x)), (2,2))
        # Flatten. Input = 5x5x16. Output = 400.
        x = x.flatten(start_dim=1)
        # Activation.
        x = F.relu(self.fc1(x))
        # Activation.
        x = F.relu(self.fc2(x))
        x = self.fc3(x)
        return x
    def num_flat_features(self, x):
        size = x.size()[1:]
        num_features = 1
        for s in size:
            num_features *= s
        return num_features 

# Train & test part from https://github.com/activatedgeek/LeNet-5
def train(epoch):
    global cur_batch_win
    net.train()
    loss_list, batch_list = [], []
    for i, (images, labels) in enumerate(dataloader):
        optimizer.zero_grad()
        output = net(images.to(device))
        loss = criterion(output, labels.to(device))

        loss_list.append(loss.detach().cpu().item())
        batch_list.append(i+1)

        #if i % 10 == 0:
        #    print('Train - Epoch %d, Batch: %d, Loss: %f' % (epoch, i, loss.detach().cpu().item()))

        loss.backward()
        optimizer.step()

def evaluate(target_loader, target_dataset):
    predictions = []
    net.eval()
    total_correct = 0
    avg_loss = 0.0
    for i, (images, labels) in enumerate(target_loader):
        output = net(images.to(device))
        avg_loss += criterion(output, labels.to(device)).sum()
        pred = output.detach().max(1)[1]
        total_correct += pred.eq(labels.to(device).view_as(pred)).sum()
        predictions.append(pred)

    avg_loss /= len(target_dataset)
    avg_loss = avg_loss.detach().cpu().item()
    accuracy    = float(total_correct) / len(target_dataset)
    print('Test Avg. Loss: %f, Accuracy: %f' % (avg_loss, accuracy))
    #return accuracy, np.array(torch.cat(predictions))
    return accuracy, avg_loss

In [8]:
net = LeNet()
criterion = nn.CrossEntropyLoss()
#optimizer = optim.Adam(net.parameters(), lr=0.001)
optimizer = optim.SGD(net.parameters(), lr=0.001)
net.to(device)

EPOCHS = 20
print("Training...")
val_acc_list = []
val_loss_list = []

for e in range(EPOCHS):
    print("Epoch : {}".format(e+1))
    train(e)
    test_acc, test_loss = evaluate(testloader, testset)

torch.save({
    'cnn' : net.state_dict()
    }, 'models/cgan_cnn.pth')

#test_acc, test_loss = evaluate(testloader, testset)

Training...
Epoch : 1
Test Avg. Loss: 0.035991, Accuracy: 0.142000
Epoch : 2
Test Avg. Loss: 0.035703, Accuracy: 0.247100
Epoch : 3
Test Avg. Loss: 0.035052, Accuracy: 0.434200
Epoch : 4
Test Avg. Loss: 0.032811, Accuracy: 0.628800
Epoch : 5
Test Avg. Loss: 0.021031, Accuracy: 0.680200
Epoch : 6
Test Avg. Loss: 0.010173, Accuracy: 0.826600
Epoch : 7
Test Avg. Loss: 0.007486, Accuracy: 0.859400
Epoch : 8
Test Avg. Loss: 0.006221, Accuracy: 0.884200
Epoch : 9
Test Avg. Loss: 0.005451, Accuracy: 0.901700
Epoch : 10
Test Avg. Loss: 0.004848, Accuracy: 0.912700
Epoch : 11
Test Avg. Loss: 0.004525, Accuracy: 0.919200
Epoch : 12
Test Avg. Loss: 0.004077, Accuracy: 0.925500
Epoch : 13
Test Avg. Loss: 0.003727, Accuracy: 0.930900
Epoch : 14
Test Avg. Loss: 0.003453, Accuracy: 0.936700
Epoch : 15
Test Avg. Loss: 0.003194, Accuracy: 0.938900
Epoch : 16
Test Avg. Loss: 0.002955, Accuracy: 0.942400
Epoch : 17
Test Avg. Loss: 0.002784, Accuracy: 0.948000
Epoch : 18
Test Avg. Loss: 0.002636, Accuracy

In [9]:
#loading models from file
generator = Generator()
discriminator = Discriminator()
net = LeNet()

chk = torch.load('./models/cgan.pth')
chk_cnn = torch.load('./models/cgan_cnn.pth')
generator.load_state_dict(chk['gen'])
discriminator.load_state_dict(chk['disc'])
net.load_state_dict(chk_cnn['cnn'])

generator.to(device)
discriminator.to(device)
net.to(device)

LeNet(
  (conv1): Conv2d(1, 6, kernel_size=(5, 5), stride=(1, 1))
  (conv2): Conv2d(6, 16, kernel_size=(5, 5), stride=(1, 1))
  (fc1): Linear(in_features=400, out_features=120, bias=True)
  (fc2): Linear(in_features=120, out_features=84, bias=True)
  (fc3): Linear(in_features=84, out_features=10, bias=True)
)

In [10]:
def inception_score(r):
    p_y = np.mean(r, axis=0)
    e = r/p_y
    e = r*np.log(e, where= e>0)
    e = np.sum(e, axis=1)
    e = np.mean(e, axis=0)
    return np.exp(e)

# Generated Data for IS evaluation : 1,000 total
test_size = 1000

# Generate a batch of images
z = Variable(FloatTensor(np.random.normal(0, 1, (test_size, params['latent_dim'])))).to(device)
gen_labels = Variable(LongTensor(np.random.randint(0, params['n_classes'], test_size))).to(device)
gen_imgs = generator(z, gen_labels)

# Obtain Predictions (w/ softmax)
preds = F.softmax(net(gen_imgs),dim=1)
r = preds.cpu().detach().numpy()

i_score = inception_score(r)
print("Inception Score : {}".format(i_score))

Inception Score : 7.966749668121338


### TARR
- Use label (i) to generate image w/ CGAN -> (i) becomes ground-truth
- Compare Prediction Accuracy

In [11]:
class TestDataset(Dataset):
    def __init__(self, x_data, y_data):
        self.x_data = x_data
        self.y_data = y_data
        self.len = self.y_data.shape[0]

    def __getitem__(self, index):
        return self.x_data[index], self.y_data[index]

    def __len__(self):
        return self.len

In [15]:
# Generated Data for IS evaluation : 10,000 total, 1000 of each class
test_size = 1000
criterion = nn.CrossEntropyLoss()
test_labels = torch.arange(0, params['n_classes']).repeat(test_size)

# Generate a batch of images
z = Variable(FloatTensor(np.random.normal(0, 1, (test_size*params['n_classes'], params['latent_dim'])))).to(device)
gen_labels = Variable(LongTensor(test_labels).to(device))
gen_imgs = generator(z, gen_labels)

gen_dataset = TestDataset(gen_imgs, gen_labels)
gen_loader = DataLoader(gen_dataset, batch_size=params['batch_size'], shuffle=True)

acc = evaluate(gen_loader, gen_dataset)

Test Avg. Loss: 0.138386, Accuracy: 0.100400
