This is our notebook for 6.853 project. Our goal is to reduce mode-collapse when training GANs. We aim to do this by using existing literature on multiplayer, zero-sum games to phrase the GAN problem as a polymatrix game. Then, we aim to use the reduction to the high-stakes lawyer game. 

Credit for portions of the code to Github user znxlwm.

In [37]:
import torch
import torchvision
import torchvision.datasets as datasets
from torch import nn, optim
from torch.autograd.variable import Variable
from torchvision import transforms, datasets
import matplotlib.pyplot as plt
import numpy as np
import time
import os
import scipy.misc
import pickle

In [38]:
#IMPORTS FUNCTIONS FROM UTIL
%run Utils.py

We first load the MNIST dataset and normalize

In [51]:
input_size = 28
batch_size = 100

trans = transforms.Compose(
            [transforms.Resize((input_size, input_size)),
            transforms.ToTensor(),
            transforms.Normalize(mean = (0.1307, ), std = (0.3081,))
            ])
                
data_loader = torch.utils.data.DataLoader(
               datasets.MNIST(root = './data', train=True, download = True, transform = trans), 
               batch_size = batch_size, 
               shuffle = True)

In [52]:
d = data_loader.__iter__().__next__()[0]
d.size()

torch.Size([100, 1, 28, 28])

In [31]:
#Discriminator architecture
class DiscriminatorNet(torch.nn.Module):
    #Discriminator class. Outputs whether it thinks the image is original or not.
    def __init__(self, in_dim, out_dim, input_size):
        super(DiscriminatorNet, self).__init__()
        self.in_dim = in_dim 
        self.out_dim = out_dim
        self.input_size = input_size

        self.conv = nn.Sequential(
            nn.Conv2d(self.in_dim, 64, kernel_size = 4, stride = 2, padding = 1),
            nn.LeakyReLU(0.2),
            nn.Conv2d(64, 128, kernel_size = 4, stride = 2, padding = 1),
            nn.BatchNorm2d(128),
            nn.LeakyReLU(0.2)
        )
        self.fc_layer = nn.Sequential(
            nn.Linear(128 * (self.input_size//4) * (self.input_size//4), 1024),
            nn.BatchNorm1d(1024),
            nn.LeakyReLU(0.2),
        )
        
        self.out = nn.Sequential(
            nn.Linear(1024, self.out_dim),
            nn.Sigmoid()
        )
        
        initialize_weights(self)
    
    def forward(self, inp):
        x = self.conv(inp)
        x = x.view( -1, 128 * (self.input_size // 4) * (self.input_size // 4))
        x = self.fc_layer(x)
        x = self.out(x)
        return x

In [32]:
#Generator architecture using transposed convolutions to upscale.
class GeneratorNet(torch.nn.Module):
    def __init__(self, in_dim, out_dim, input_size):
        super(GeneratorNet, self).__init__()
        self.in_dim = in_dim
        self.out_dim = out_dim
        self.input_size = input_size
        
        self.fc_layer = nn.Sequential(
            nn.Linear(self.in_dim, 1024),
            nn.BatchNorm1d(1024),
#             nn.LeakyReLU(0.2),
            nn.ReLU(),
            nn.Linear(1024, 128 * (self.input_size // 4)**2 ),
            nn.BatchNorm1d(128 * (self.input_size//4)**2),
#             nn.LeakyReLU(0.2)
            nn.ReLU()
        )
        
        self.deconv = nn.Sequential(
            nn.ConvTranspose2d(128, 64, kernel_size = 4, stride = 2, padding = 1),
            nn.BatchNorm2d(64),
#             nn.LeakyReLU(0.2),
            nn.ReLU(),
            nn.ConvTranspose2d(64, self.out_dim, kernel_size = 4, stride = 2, padding = 1),
            nn.Tanh()
        )
        
        initialize_weights(self)
    
    def forward(self, x):
        x = self.fc_layer(x)
        x = x.view(-1, 128, (self.input_size//4), (self.input_size//4))
        x = self.deconv(x)
        
        return x


In [33]:
#For the initial design with flattened images. Still learning how to use pytorch
def images_to_vectors(images):
    return images.view(images.size(0), 784)

def vectors_to_images(vectors):
    return vectors.view(vectors.size(0), 1, input_size, input_size)

In [34]:
a, b = data_loader.__iter__().__next__()
# print(x)
print(b)
print(a.size())

x = a.view(-1, 28*28)
x.size()

a.shape[1]
# x = x.view(-1, 28*28)
# z = torch.rand((self.batch_size, self.z_dim))

tensor([1, 6, 2, 2, 4, 4, 3, 3, 6, 0, 6, 0, 1, 0, 6, 6, 7, 3, 1, 3, 2, 8, 9, 7,
        0, 9, 5, 8, 9, 2, 5, 1, 3, 2, 9, 1, 4, 2, 6, 4, 1, 2, 3, 7, 1, 6, 6, 4,
        8, 3, 2, 9, 6, 4, 8, 0, 5, 1, 0, 0, 3, 0, 8, 1, 6, 2, 3, 6, 3, 5, 7, 8,
        5, 4, 6, 2, 9, 8, 9, 1, 0, 2, 8, 9, 4, 5, 8, 4, 9, 7, 2, 9, 7, 0, 0, 5,
        8, 8, 9, 5])
torch.Size([100, 1, 28, 28])


1

In [46]:
class GAN(object):
    def __init__(self):
        self.epoch = 100
        self.sample_num = 100
        self.gpu_mode = True
        self.batch_size = 100
        self.z_dim = 62
        self.input_size = 28
        
        self.dataset = 'MNIST'
        self.save_dir = 'models/'
        self.result_dir = 'results/'
        self.model_name = 'ConvModel'
        
        trans = transforms.Compose([
#             [transforms.Resize((self.input_size, self.input_size)),
            transforms.ToTensor(),
            transforms.Normalize(mean = (0.1307, ), std = (0.3081,))
            ])
                
        self.data_loader = torch.utils.data.DataLoader(
               datasets.MNIST(root = './data', train=True, download = True, transform = trans), 
               batch_size = self.batch_size, 
               shuffle = True)
        data = self.data_loader.__iter__().__next__()[0]
        
        self.num_batches = len(data_loader)
        
        #Change out_dim when I make networks use convolutions
        self.G = GeneratorNet(in_dim = self.z_dim, out_dim = data.shape[1], input_size = self.input_size)
        self.D = DiscriminatorNet(in_dim = data.shape[1], out_dim = 1, input_size = self.input_size)
        self.D_optimizer = optim.Adam(self.D.parameters(), lr=0.0002, betas=(0.5, 0.999))
        self.G_optimizer = optim.Adam(self.G.parameters(), lr=0.0002, betas=(0.5, 0.999))
        
        self.BCE_loss = nn.BCELoss()
        
        self.sample_z = torch.rand((self.batch_size, self.z_dim))
        
        if self.gpu_mode:
            self.G.cuda()
            self.D.cuda()
            self.BCE_loss = nn.BCELoss().cuda()
            self.sample_z = self.sample_z.cuda()
            
    def train(self):
        self.train_hist = {}
        self.train_hist['D_loss'] = []
        self.train_hist['G_loss'] = []
        self.train_hist['per_epoch_time'] = []
        self.train_hist['total_time'] = []
        
        self.y_real = torch.ones(self.batch_size, 1)
        self.y_fake = torch.zeros(self.batch_size, 1)
        
        if self.gpu_mode:
            self.y_real, self.y_fake = Variable(self.y_real.cuda()), Variable(self.y_fake.cuda())
            
        start_time = time.time()
        
        self.D.train()
        for epoch in range(self.epoch):
            
            self.G.train()
            epoch_start_time = time.time()
            
            for iter, (x, _) in enumerate(data_loader):
                
                z = torch.rand((self.batch_size, self.z_dim))
                
                if self.gpu_mode:
                    x, z = Variable(x.cuda()), Variable(z.cuda())
                
                #First train D on real and fake samples
                self.D_optimizer.zero_grad()
                D_real = self.D(x)
                D_real_loss = self.BCE_loss(D_real, self.y_real)
                
                gen_ims = self.G(z)
                D_fake = self.D(gen_ims)
                D_fake_loss = self.BCE_loss(D_fake, self.y_fake)
                
                D_loss = D_real_loss + D_fake_loss
                self.train_hist['D_loss'].append(D_loss.item())
                
                D_loss.backward()
                self.D_optimizer.step()
                
                #Now train G
                self.G_optimizer.zero_grad()
                
                z = torch.rand((self.batch_size, self.z_dim))
                if self.gpu_mode:
                    z = Variable(z.cuda())
                
                gen_ims = self.G(z)
                D_fake = self.D(gen_ims)
                G_loss = self.BCE_loss(D_fake, self.y_real)
                
                self.train_hist['G_loss'].append(G_loss.item())
                
                G_loss.backward()
                self.G_optimizer.step()
                
                self.train_hist['per_epoch_time'].append(time.time() - epoch_start_time)
            
            self.train_hist['total_time'].append(time.time() - start_time)
            with torch.no_grad():
                self.visualize_results((epoch+1))
            print("Completed epoch {}".format(epoch+1))
                
        print("Done Training!")
        self.save()
        
        
        generate_animation('{}/{}/{}/{}'.format(self.result_dir, self.dataset, self.model_name, self.model_name),
                                 self.epoch)
        loss_plot(self.train_hist, os.path.join(self.save_dir, self.dataset, self.model_name), self.model_name)
        
    def visualize_results(self, epoch, fix=True):
        self.G.eval()
        
        if not os.path.exists(self.result_dir + '/' + self.dataset + '/' + self.model_name):
            os.makedirs(self.result_dir + '/' + self.dataset + '/' + self.model_name)
            
        tot_num_samples = min(self.sample_num, self.batch_size)
        image_frame_dim = int(np.floor(np.sqrt(tot_num_samples)))
        
        samples = self.G(self.sample_z)
#         samples = vectors_to_images(samples)
        
        if self.gpu_mode:
            samples = samples.cpu().data.numpy().transpose(0, 2, 3, 1)
        else:
            samples = samples.data.numpy().transpose(0, 2, 3, 1)
            
        samples = (samples + 1)/2
        
        
        save_images(samples[:image_frame_dim * image_frame_dim, :, :, :], 
                    [image_frame_dim, image_frame_dim],
        '{}/{}/{}/{}_epoch{:03}.png'.format(self.result_dir, self.dataset, self.model_name, self.model_name, epoch))
        
    
    def save(self):
        save_dir = os.path.join(self.save_dir, self.dataset, self.model_name)
        
        if not os.path.exists(save_dir):
            os.makedirs(save_dir)
        
        torch.save(self.G.state_dict(), os.path.join(save_dir, self.model_name + '_G.pkl'))
        torch.save(self.D.state_dict(), os.path.join(save_dir, self.model_name + '_D.pkl'))
        
        with open(os.path.join(save_dir, self.model_name + '_history.pkl'), 'wb') as f:
            pickle.dump(self.train_hist, f)
    
    def load(self):
        save_dir = os.path.join(self.save_dir, self.dataset, self.model_name)
        
        self.G.load_state_dict(torch.load(os.path.join(save_dir, self.model_name + '_G.pkl')))
        self.D.load_state_dict(torch.load(os.path.join(save_dir, self.model_name + '_D.pkl')))    
    

In [47]:
my_GAN = GAN()
my_GAN.train()

RuntimeError: shape '[-1, 6272]' is invalid for input of size 819200

In [14]:
my_GAN.visualize_results(100)

In [15]:
my_GAN.train()

Completed epoch 1
Completed epoch 2
Completed epoch 3
Completed epoch 4
Completed epoch 5
Completed epoch 6
Completed epoch 7
Completed epoch 8
Completed epoch 9
Completed epoch 10
Completed epoch 11
Completed epoch 12
Completed epoch 13
Completed epoch 14
Completed epoch 15
Completed epoch 16
Completed epoch 17
Completed epoch 18
Completed epoch 19
Completed epoch 20
Completed epoch 21
Completed epoch 22
Completed epoch 23
Completed epoch 24
Completed epoch 25
Completed epoch 26
Completed epoch 27
Completed epoch 28
Completed epoch 29
Completed epoch 30
Completed epoch 31
Completed epoch 32


KeyboardInterrupt: 

In [37]:
my_GAN.train_hist['per_epoch_time'][-1]

16.68210436105728

Below stuff is OBSOLETE. Consolidated into GAN class + added the GPU integration.

Keeping for now while ensuring above is working.

In [None]:
def noise(size):
    n = Variable(torch.randn(size, 100))
    return n

In [None]:
loss = nn.BCELoss()

In [None]:
def ones_target(size):
    '''
    Tensor containing ones, with shape = size
    '''
    data = Variable(torch.ones(size, 1))
    return data

def zeros_target(size):
    '''
    Tensor containing zeros, with shape = size
    '''
    data = Variable(torch.zeros(size, 1))
    return data

In [None]:
def train_discriminator(optimizer, real_data, fake_data):
    N = real_data.size(0)
    optimizer.zero_grad()
    
    pred_real = discriminator(real_data)
    err_real = loss(pred_real, ones_target(N))
    err_real.backward()
    
    pred_fake = discriminator(fake_data)
    err_fake = loss(pred_fake, zeros_target(N))
    err_fake.backward()
    
    optimizer.step()
    
    return err_real + err_fake, pred_real, pred_fake

In [None]:
def train_generator(optimizer, fake_data):
    N = fake_data.size(0)
    
    optimizer.zero_grad()
    pred = discriminator(fake_data)
    err = loss(pred, ones_target(N))
    err.backward()
    
    optimizer.step()
    
    return err
    

In [None]:
num_test_samples = 16
test_noise = noise(num_test_samples)

In [None]:
GPU = True

if GPU:
    loss = loss.cuda()
    generator = generator.cuda()
    discriminator = discriminator.cuda()
    test_noise = test_noise.cuda()

In [None]:



num_epochs = 100

start = time.time()

for epoch in range(num_epochs):
    for n_batch, (real_batch, _) in enumerate(data_loader):
        N = real_batch.size(0)
        
        real_data = Variable(images_to_vectors(real_batch))
        fake_data = generator(noise(N).cuda()).detach()
        d_error, d_pred_real, d_pred_fake = train_discriminator(d_optimizer, real_data, fake_data)
        
        fake_data = generator(noise(N))
        g_error = train_generator(g_optimizer, fake_data)
        
        if (n_batch)%100 == 0:
            test_images = vectors_to_images(generator(test_noise))
            test_images = test_images.data
    print("Epoch:{} \t Time: {}".format(epoch, time.time() - start))

end = time.time() - start

In [None]:
dev = torch.device('cuda:0')

torch.cuda.current_device()

torch.cuda.get_device_name(0)

In [None]:
discriminator.to(dev)