In [1]:
import os
import time
import sys
import numpy as np

import torch
import torch.nn as nn
import torch.nn.parallel
import torch.backends.cudnn as cudnn
import torch.optim as optim
import torch.utils.data

import torchvision
import torchvision.transforms as transforms
import torchvision.utils as vutils
import torch.nn.functional as F
from torch.autograd import Variable
import matplotlib.pyplot as plt
import matplotlib.gridspec as gridspec
%matplotlib inline


In [2]:
#fake_2 = fake.detach()

#Usar o fixed_noise para salvar as imagens!

### System properties and libs currently in use
- We have developed using python 3.5.x, pytorch 0.2.1
- No significant attention was given to backwards compatibility

In [3]:
print('__Python VERSION:', sys.version)
print('__pyTorch VERSION:', torch.__version__)
print('__CUDA VERSION')
print('__CUDNN VERSION:', torch.backends.cudnn.version())
print('__Number CUDA Devices:', torch.cuda.device_count())
print('__Devices')
print('Active CUDA Device: GPU', torch.cuda.current_device())

__Python VERSION: 3.5.3 |Anaconda 4.4.0 (64-bit)| (default, Mar  6 2017, 11:58:13) 
[GCC 4.4.7 20120313 (Red Hat 4.4.7-1)]
__pyTorch VERSION: 0.2.0_4
__CUDA VERSION
__CUDNN VERSION: 6021
__Number CUDA Devices: 1
__Devices
Active CUDA Device: GPU 0


### Utilities
- Saving images and models

In [4]:
def save_images(netG, fixed_noise, outputDir,epoch):
    '''
    Generates a batch of images from the given 'noise'.
    Saves 64 of the generated samples to 'outputDir' system path.
    Inputs are the network (netG), a 'noise' input, system path to which images will be saved (outputDir) and current 'epoch'.
    '''
    assert isinstance(fixed_noise,torch.autograd.variable.Variable)
    netG.eval()
    fake = netG(noise)
    netG.train()
    vutils.save_image(fake.data[0:64,:,:,:],'%s/fake_samples_epoch_%03d.png' % (outputDir, epoch), nrow=8)

def save_models(netG, netD, outputDir, epoch):
    '''
    Saves model state dictionary for generator and discriminator networks.
    Inputs are the networks (netG, netD), the system path in which to save(outputDir) and the current 'epoch'.
    '''
    torch.save(netG.state_dict(), '%s/netG_epoch_%d.pth' % (outputDir, epoch))
    torch.save(netD.state_dict(), '%s/netD_epoch_%d.pth' % (outputDir, epoch))

In [5]:
def plot_samples(samples,imageSize):
    fig = plt.figure(figsize=(5, 5))
    gs = gridspec.GridSpec(5, 5)
    gs.update(wspace=0.05, hspace=0.05)
    for i, sample in enumerate(samples[:25]):
        ax = plt.subplot(gs[i])
        plt.axis('off')
        ax.set_xticklabels([])
        ax.set_yticklabels([])
        ax.set_aspect('equal')
        plt.imshow(sample.reshape(imageSize, imageSize), cmap='Greys_r')
    return fig

In [6]:
def save_images2(g_net,fixed_noise,epoch,imageSize):
    g_net.eval()
    fake_data = g_net(fixed_noise)
    g_net.train()
    fig = plot_samples(fake_data.data.cpu().numpy(), imageSize)
    plt.savefig(outputDir + '/dcgan_img_{:04d}.png'.format(epoch, bbox_inches='tight'))
    plt.close(fig)


In [7]:
cudnn.benchmark = True

use_gpu = torch.cuda.is_available()
if use_gpu:
    print("You are using CUDA. If it is not what you want, manually set this as False!")
print(use_gpu)

You are using CUDA. If it is not what you want, manually set this as False!
True


### Output Directory
This is where images will be saved to.

If directory does not exist, it is created.

In [8]:
outputDir = 'outputdir_train_classifier_bigger'

try:
    os.makedirs(outputDir)
except OSError as err:
    print("OS error: {0}".format(err))

OS error: [Errno 17] File exists: 'outputdir_train_classifier_bigger'


### Dataset definition and hyperparameter setting
- Changing dataset name alters network architecture parameters
- Currently supporting few datasets
- Hyperparameters defined according to Radford et al. (2015)

Valores típicos são

nc = 3,

ngpu = 1,

nz = 100,

ngf = 64,

ndf = 64,

n_extra_d = 0,

n_extra_g = 1,

imageSize = 64

In [9]:
batch_size = 64

chosen_dataset = 'MNIST'

datasets = {
    'MNIST': torchvision.datasets.MNIST,
    'CIFAR10': torchvision.datasets.CIFAR10,
    'ANIME': '/home/gabriel/Redes Neurais/Projeto_Final_GANS/Tutorial_2/dataset/min_anime-faces',
}

dataset = datasets[chosen_dataset]

In [10]:
possible_parameters = {
    'MNIST': {
        'ndf': 64,
        'ngf': 64,
        'nz': 100,
        'nc': 1,
        'imageSize': 64,
        'n_classes' : 10,
        'ngpu': 1,
    },
    'CIFAR10': {
        'ndf': 64,
        'ngf': 64,
        'nz': 100,
        'nc': 3,
        'imageSize' : 64,
        'n_classes' : 10,
        'ngpu' : 1,
    },
    'ANIME': {
        'nc' : 3,
        'ngpu' : 1,
        'nz' : 100,
        'ngf' : 64,
        'ndf' : 64,
        'imageSize' : 64,
        'n_classes' : 1
    }
}

In [11]:
ngf = possible_parameters[chosen_dataset]['ngf']
ndf = possible_parameters[chosen_dataset]['ndf']
nz = possible_parameters[chosen_dataset]['nz']
nc = possible_parameters[chosen_dataset]['nc']
imageSize = possible_parameters[chosen_dataset]['imageSize']
n_classes = possible_parameters[chosen_dataset]['n_classes']
ngpu = possible_parameters[chosen_dataset]['ngpu']

## Creating the Dataset!

In [12]:
!ls ../datasets/

cifar-10-batches-py  cifar-10-python.tar.gz  processed	raw


In [13]:
if dataset == 'ANIME':
    dataset = torchvision.datasets.ImageFolder(
        root='/home/gabriel/Redes Neurais/Projeto_Final_GANS/Tutorial_2/dataset/min_anime-faces',
        transform=transforms.Compose([
                transforms.Scale((imageSize, imageSize)),
                transforms.ToTensor(),
            ])
    )
else:
    transform = transforms.Compose([
                    transforms.Scale((imageSize, imageSize)),
                    transforms.ToTensor(),
                    transforms.Normalize((0.5,0.5,0.5), (0.5,0.5,0.5)), # bring images to (-1,1)
                ]) 
    dataset_done = dataset('../Gans_teste/datasets', train=True, download=False, transform=transform)
    dataloader = torch.utils.data.DataLoader(dataset_done, batch_size=batch_size, shuffle=True, num_workers=4)
print('Dataloader length:', len(dataloader))
print("Dataset:", dataloader.dataset)


Dataloader length: 938
Dataset: <torchvision.datasets.mnist.MNIST object at 0x7f8387934518>


## Definição dos modelos
- Model is a DCGAN
- Images are sized (nc, 64, 64)

In [14]:
class _netD_DCGAN(nn.Module):
    def __init__(self, ngpu, nz, nc, ndf, n_classes):
        super(_netD_DCGAN, self).__init__()
        self.ngpu = ngpu
        self.conv1 = nn.Conv2d(in_channels = nc, out_channels = ndf, kernel_size=4, stride=2, padding=1, bias=False)
        self.conv2 = nn.Conv2d(in_channels = ndf, out_channels = ndf*2, kernel_size=4, stride=2, padding=1, bias=False)
        self.batch2 = nn.BatchNorm2d(ndf * 2)
        self.conv3 = nn.Conv2d(in_channels = ndf*2, out_channels = ndf*4, kernel_size=4, stride=2, padding=1, bias=False)
        self.batch3 = nn.BatchNorm2d(ndf * 4)
        self.conv4 = nn.Conv2d(in_channels = ndf*4, out_channels = ndf*8, kernel_size=4, stride=2, padding=1, bias=False)
        self.batch4 = nn.BatchNorm2d(ndf * 8)
        
        self.final_conv = nn.Conv2d(in_channels=ndf*8, out_channels=n_classes+1,kernel_size=4,stride=1,padding=0,bias=False)
        
    def forward(self, x):
        x = F.leaky_relu(self.conv1(x), 0.2, inplace=True)
        x = F.leaky_relu(self.batch2(self.conv2(x)), 0.2, inplace=True)
        x = F.leaky_relu(self.batch3(self.conv3(x)), 0.2, inplace=True)
        x = F.leaky_relu(self.batch4(self.conv4(x)), 0.2, inplace=True)
        
        x = self.final_conv(x)
        return(x)

In [32]:
class _netG_DCGAN(nn.Module):
    def __init__(self, ngpu, nz, nc , ngf):
        super(_netG_DCGAN, self).__init__()
        self.ngpu = ngpu
        self.convt1 = nn.ConvTranspose2d(in_channels=nz, out_channels=ngf * 8, kernel_size=4, stride=1, padding=0, bias=False)
        self.batch1 = nn.BatchNorm2d(ngf*8)
        self.convt2 = nn.ConvTranspose2d(in_channels=ngf * 8, out_channels=ngf * 4, kernel_size=4, stride=2, padding=1, bias=False)
        self.batch2 = nn.BatchNorm2d(ngf*4)
        self.convt3 = nn.ConvTranspose2d(in_channels=ngf * 4, out_channels=ngf * 2, kernel_size=4, stride=2, padding=1, bias=False)
        self.batch3 = nn.BatchNorm2d(ngf*2)
        self.convt4 = nn.ConvTranspose2d(in_channels=ngf*2, out_channels=ngf, kernel_size=4, stride=2, padding=1, bias=False)
        self.batch4 = nn.BatchNorm2d(ngf)
        
        self.final_convt = nn.ConvTranspose2d(in_channels=ngf, out_channels=nc, kernel_size=4, stride=2, padding=1, bias=False)
        
    def forward(self, x):
        x = F.leaky_relu(self.batch1(self.convt1(x)), 0.2, inplace=True)
        x = F.leaky_relu(self.batch2(self.convt2(x)), 0.2, inplace=True)
        x = F.leaky_relu(self.batch3(self.convt3(x)), 0.2, inplace=True)
        x = F.leaky_relu(self.batch4(self.convt4(x)), 0.2, inplace=True)
        
        x = self.final_convt(x)
        x = F.tanh(x)
        return (x)

In [33]:
netG = _netG_DCGAN(ngpu, nz, nc, ngf = 28)
netD = _netD_DCGAN(ngpu, nz, nc, ndf, n_classes)


## Inicializador de pesos

In [34]:
def weights_init(m):
    classname = m.__class__.__name__
    if classname.find('Conv') != -1:
        m.weight.data.normal_(0.0, 0.02)
    elif classname.find('BatchNorm') != -1:
        m.weight.data.normal_(1.0, 0.02)
        m.bias.data.fill_(0)

In [35]:
netG.apply(weights_init)
netD.apply(weights_init)
print(netG, '\n', netD)

_netG_DCGAN (
  (convt1): ConvTranspose2d(100, 224, kernel_size=(4, 4), stride=(1, 1), bias=False)
  (batch1): BatchNorm2d(224, eps=1e-05, momentum=0.1, affine=True)
  (convt2): ConvTranspose2d(224, 112, kernel_size=(4, 4), stride=(2, 2), padding=(1, 1), bias=False)
  (batch2): BatchNorm2d(112, eps=1e-05, momentum=0.1, affine=True)
  (convt3): ConvTranspose2d(112, 56, kernel_size=(4, 4), stride=(2, 2), padding=(1, 1), bias=False)
  (batch3): BatchNorm2d(56, eps=1e-05, momentum=0.1, affine=True)
  (convt4): ConvTranspose2d(56, 28, kernel_size=(4, 4), stride=(2, 2), padding=(1, 1), bias=False)
  (batch4): BatchNorm2d(28, eps=1e-05, momentum=0.1, affine=True)
  (final_convt): ConvTranspose2d(28, 1, kernel_size=(4, 4), stride=(2, 2), padding=(1, 1), bias=False)
) 
 _netD_DCGAN (
  (conv1): Conv2d(1, 64, kernel_size=(4, 4), stride=(2, 2), padding=(1, 1), bias=False)
  (conv2): Conv2d(64, 128, kernel_size=(4, 4), stride=(2, 2), padding=(1, 1), bias=False)
  (batch2): BatchNorm2d(128, eps=1e-

## Losses
- Binary Cross-Entropy is used to differentiate real and fake images
- Class loss should be Cross-Entropy

In [36]:
#criterion = nn.CrossEntropyLoss()
criterion = nn.BCELoss()

# Teste se as redes estão funcionando

In [37]:
test_input_G = torch.randn(64,100,1,1)
test_input_G = Variable(test_input_G)
begin = time.time()
test_output_G = netG(test_input_G)
end = time.time()
print('Time elapsed for G creating an image = {0:.6f} seconds.'.format(end-begin))
print('test_output G size', test_output_G.size())
begin = time.time()
test_output_D = netD(test_output_G)
end = time.time()
print('Time elapsed for D analysing fake image = {0:.6f} seconds.'.format(end-begin))


Time elapsed for G creating an image = 0.105659 seconds.
test_output G size torch.Size([64, 1, 64, 64])
Time elapsed for D analysing fake image = 0.378391 seconds.


## Sizes of the tensors

In [38]:
input = torch.FloatTensor(batch_size, 3, imageSize, imageSize)
print('Input images size:', input.size())
noise = torch.FloatTensor(batch_size, nz, 1, 1)
fixed_noise = torch.FloatTensor(batch_size, nz, 1, 1).normal_(0, 1)
print('Code size:', noise.size())

Input images size: torch.Size([64, 3, 64, 64])
Code size: torch.Size([64, 100, 1, 1])


In [46]:
label = torch.FloatTensor(batch_size,n_classes)
print('Label size:', label.size())
fake_label = 10
real_label = 1

Label size: torch.Size([64, 10])


## Broadcast to GPU

In [47]:
if use_gpu:
    netD.cuda()
    netG.cuda()
    criterion = criterion.cuda()
    input,label = input.cuda(), label.cuda()
    noise, fixed_noise = noise.cuda(), fixed_noise.cuda()

## Turning tensors into Variables

## Optimizer Parameters
- Following the lead of Radford et al., 2015:

    <b>
    1. beta1 = 0.5
    2. beta2 = 0.999
    3. lr = 0.0002
    </b>

In [48]:
beta1, beta2 = 0.5, 0.999
lr = 2.0e-4

optimizerD = optim.Adam(netD.parameters(), lr = lr, betas = (beta1, beta2))
optimizerG = optim.Adam(netG.parameters(), lr = lr, betas = (beta1, beta2))


## Treinamento 

In [51]:
def train_gan(num_epochs, dataloader, netD, netG, outputDir,
              real_labelSmooth=0, epoch_interval=100, D_steps=1, G_steps=1):
    
    # This validation is subjective. WGAN-GP uses 100 steps on the critic (netD).
    assert D_steps < 5, "Keep it low, D_steps is too high."
    assert G_steps < 3, "Keep it low, G_steps is too high."
    #assert batch_size % D_steps == 0, "Use batch_size multiple of D_steps."
    real_label = 1
    size = int(len(dataloader.dataset)/dataloader.batch_size)
    
    print('Lets train!')
    for epoch in range(num_epochs):
        start_iter = time.time()  
        D_x = 0
        D_G_z1 = 0
        D_G_z2 = 0
        errD_acum = 0
        errG_acum = 0
        loss_D = []
        loss_G = []
        print('In epoch = ', epoch, 'real_label_smooth = ',real_labelSmooth )
        for batch, data in enumerate(dataloader, 0):
            real_labelSmooth = np.minimum(real_labelSmooth * (1 - 0.05*epoch), 0)
            #real_labelSmooth = np.maximum(real_labelSmooth * (1 - 0.05*epoch), 0)
            if (epoch == 0 and batch == 0):
                fig = plot_samples(data[0][0:data[0].size(0),:,:,:].cpu().numpy(), imageSize = imageSize)
                plt.savefig(outputDir + '/real_samples.png.png'.format(epoch, bbox_inches='tight'))
                plt.close(fig)
                
            for step in range(D_steps):
                #############################################################
                # (1) Update D network: maximize log(D(x)) + log(1 - D(G(z)))
                # 1A - Train the detective network in the Real Dataset
                #############################################################
                netD.zero_grad()
                start = step*(int(data[0].size()[0]/D_steps))
                end = (step+1)*int(data[0].size()[0]/D_steps)
                
                real_cpu = data[0][start:end]
                real_cpu = real_cpu.cuda()
                batch_size = real_cpu.size(0)
                if np.random.random_sample() > real_labelSmooth:
                    target = data[1][start:end].long().cuda()
                else:
                     target = torch.from_numpy(np.random.randint(0, n_classes, batch_size)).type(torch.LongTensor).cuda()
                
                input, label = Variable(real_cpu), Variable(target)

                output = netD(input)
                print(output.squeeze().size(),label.size())
                errD_real = criterion(output.squeeze(),label)
                errD_real.backward()
                
                D_x += output.data.mean()
                
                #######################################################
                # 1B - Train the detective network in the False Dataset
                #######################################################
                
                noise = Variable(torch.FloatTensor(batch_size, nz, 1, 1).normal_(0,1).cuda())
                fake = netG(noise)
                label = Variable(torch.ones(batch_size).long().fill_(fake_label).cuda())
                output = netD(fake.detach()) # ".detach()" to avoid backprop through G
                errD_fake = criterion(output.squeeze(), label)
                errD_fake.backward() # gradients for fake and real data will be accumulated
                
                D_G_z1 += output.data.mean()
                errD_acum += errD_real.data[0] + errD_fake.data[0]
                optimizerD.step()

            for step in range(G_steps):
                ####################################################################################
                # (2) Update G network: maximize log(D(G(z)))
                # Train the faker with the output from the Detective (but don't train the Detective)
                ####################################################################################
                
                netG.zero_grad()
                label = Variable(torch.from_numpy(np.random.randint(0, n_classes, batch_size)).type(torch.LongTensor).cuda())
                output = netD(fake)
                errG = criterion(output.squeeze(), label)
                errG.backward()
                
                D_G_z2 += output.data.mean()
                errG_acum += errG.data[0]
                optimizerG.step()

        print('epoch = ',epoch)

        end_iter = time.time()        

        print('[%d/%d] Loss_D: %.4f Loss_G: %.4f D(x): %.4f D(G(z1)): %.4f D(G(z2)) %.4f Time Elapsed %.2f s'
            % (epoch, num_epochs, (errD_acum/D_steps)/size, (errG_acum/G_steps)/size, D_x, D_G_z1, D_G_z2, end_iter-start_iter))

        loss_D.append((errD_acum/D_steps)/size)
        loss_G.append((errG_acum/G_steps)/size)
        #Save a grid with the pictures from the dataset, up until 64
        save_images(netG = netG, fixed_noise=  fixed_noise, outputDir = outputDir, epoch = epoch)

        if epoch % epoch_interval == 0:
            # do checkpointing
            save_models(netG = netG, netD = netD, outputDir = outputDir, epoch = epoch)
    return(loss_D,loss_G)   

In [52]:
num_epochs = 25
real_labelSmooth = 0.3

loss_D,loss_G = train_gan(num_epochs = num_epochs, dataloader = dataloader, netD = netD,netG = netG,
                          outputDir = outputDir, real_labelSmooth = real_labelSmooth,)


Lets train!
In epoch =  0 real_label_smooth =  0.3
torch.Size([64, 11]) torch.Size([64])


TypeError: is_same_size received an invalid combination of arguments - got ([31;1mtorch.cuda.FloatTensor[0m), but expected (torch.cuda.LongTensor other)