## This is an simple implement of ACGAN


In [1]:
import torch
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F
from torchvision import datasets, transforms
import torchvision.utils as vutils
from torch.autograd import Variable
import torch.utils.data

In [2]:
# load the data 
# using cifar10 data for DCGAN

dataset = datasets.CIFAR10(root='data/cifar10', download=True,
                           transform=transforms.Compose([
                               transforms.Scale(32),
                               transforms.ToTensor(),
                               transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5)),
                           ])
                                      )
train_loader = torch.utils.data.DataLoader(
    datasets.MNIST('data', download=True,
                   transform=transforms.Compose([
                       transforms.Scale(32),
                       transforms.ToTensor(),
                       transforms.Normalize((0.1307,), (0.3081,))
                   ])),
    batch_size=50, shuffle=True)

dataloader = torch.utils.data.DataLoader(dataset, batch_size=64,
                                         shuffle=True, num_workers=int(2))
len(train_loader)

Files already downloaded and verified
Files already downloaded


1200

* 数据加载
1. ToTensor是指把PIL.Image(RGB) 或者numpy.ndarray(H x W x C) 从0到255的值映射到0到1的范围内，并转化成Tensor格式。

### 按照DCGAN网络架构中的设计进行定义

* 首先需要定义的是BN和weight filler

In [3]:
def weight_filler(m):
    classname = m.__class__.__name__  #获取m的类型名
    if classname.find('Conv') != -1:
        m.weight.data.normal_(0.0, 0.02)
    elif classname.find('BatchNorm') != -1:
        m.weight.data.normal_(1.0, 0.02)
        m.bias.data.fill_(0)

In [4]:
#使用一个类构建网络，
#通过super函数继承 nn.Module的构造方法
#使用sequential的方法构建网络模型

class G(nn.Module):
    def __init__(self):
        super(G, self).__init__() 
        self.main = nn.Sequential(
            nn.ConvTranspose2d(110, 64 * 8, 4, 1, 0, bias=False), # 64*8 kernel nums; 4 kernel size; 1 stride; 0 padding
            nn.BatchNorm2d(64*8),
            nn.ReLU(True),
            # n kernel * 4 * 4
            nn.ConvTranspose2d(64*8, 64 * 4, 4, 2, 1, bias=False),
            nn.BatchNorm2d(64*4),
            nn.ReLU(True),
            # n kernel * 8 * 8
            nn.ConvTranspose2d(64 * 4, 64 * 2, 4, 2, 1, bias=False),
            nn.BatchNorm2d(64*2),
            nn.ReLU(True),
            # n kernel * 16 * 16
            nn.ConvTranspose2d(64 * 2, 1, 4, 2, 1, bias=False),
            nn.Tanh()
            # nc * 32 * 32
        ) 
        
    def forward(self, x):
        #x = x.view(x.size(0), x.size(1))
        return self.main(x)
g_model = G()
print(g_model)

G (
  (main): Sequential (
    (0): ConvTranspose2d(110, 512, kernel_size=(4, 4), stride=(1, 1), bias=False)
    (1): BatchNorm2d(512, eps=1e-05, momentum=0.1, affine=True)
    (2): ReLU (inplace)
    (3): ConvTranspose2d(512, 256, kernel_size=(4, 4), stride=(2, 2), padding=(1, 1), bias=False)
    (4): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True)
    (5): ReLU (inplace)
    (6): ConvTranspose2d(256, 128, kernel_size=(4, 4), stride=(2, 2), padding=(1, 1), bias=False)
    (7): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True)
    (8): ReLU (inplace)
    (9): ConvTranspose2d(128, 1, kernel_size=(4, 4), stride=(2, 2), padding=(1, 1), bias=False)
    (10): Tanh ()
  )
)


In [5]:
# 卷积操作的计算
# (W−F+2P)/S+1 : W: input size; F: kernel size; P:padding; S: stride;
class D(nn.Module):
    def __init__(self):
        super(D, self).__init__()
        self.main = nn.Sequential(
        nn.Conv2d(1, 64 * 4, 4, 2, 1, bias=False), # 64 kernel nums; 4 kernel size; 2 stride; 1 padding
        nn.LeakyReLU(0.2, inplace=True),
        # state size. (ndf) x 16 x 16
        nn.Conv2d(64 * 4, 64 * 2, 4, 2, 1, bias=False),
        nn.BatchNorm2d(64*2),
        nn.LeakyReLU(0.2, inplace=True),
         # state size. (ndf) x 8 x 8
        nn.Conv2d(64 * 2, 64 * 1, 4, 2, 1, bias=False),
        nn.BatchNorm2d(64*1),
        nn.LeakyReLU(0.2, inplace=True),
         # state size. (ndf) x 4 x 4
        )
        self.discrimator = nn.Sequential(
        nn.Conv2d(64 * 1, 1, 4, 1, 0, bias=False),
        nn.Sigmoid()
        )
         # final output)
        self.classify = nn.Sequential(
        nn.Linear(1024, 10),
        nn.Softmax()
        )
    def forward(self, x):
        x = self.main(x)
        x_ac = x.view(-1, 1024)
        output_D = self.discrimator(x)
        output_C = self.classify(x_ac)
        return output_D, output_C
    
d_model = D()
print(d_model)

D (
  (main): Sequential (
    (0): Conv2d(1, 256, kernel_size=(4, 4), stride=(2, 2), padding=(1, 1), bias=False)
    (1): LeakyReLU (0.2, inplace)
    (2): Conv2d(256, 128, kernel_size=(4, 4), stride=(2, 2), padding=(1, 1), bias=False)
    (3): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True)
    (4): LeakyReLU (0.2, inplace)
    (5): Conv2d(128, 64, kernel_size=(4, 4), stride=(2, 2), padding=(1, 1), bias=False)
    (6): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True)
    (7): LeakyReLU (0.2, inplace)
  )
  (discrimator): Sequential (
    (0): Conv2d(64, 1, kernel_size=(4, 4), stride=(1, 1), bias=False)
    (1): Sigmoid ()
  )
  (classify): Sequential (
    (0): Linear (1024 -> 10)
    (1): Softmax ()
  )
)


In [6]:
input_ = torch.FloatTensor(50, 1, 32, 32)
noise = torch.FloatTensor(50, 100, 1, 1) #(batch size; 100 dimension; 1 * 1)
fixed_noise = torch.FloatTensor(50, 100, 1, 1).normal_(0, 1)
label = torch.FloatTensor(50)
labels = torch.LongTensor(50)
fix_label = torch.FloatTensor(50)
for i in range(0,50):
    fix_label[i] = i % 10;
    
fix = torch.LongTensor(50,1).copy_(fix_label)
fix_onehot = torch.FloatTensor(50, 10)
fix_onehot.zero_()
fix_onehot.scatter_(1, fix, 1)
fix_concat = [fixed_noise, fix_onehot]
fix_concat = torch.cat(fix_concat, 1)
input_ = Variable(input_)

label = Variable(label)
#noise = Variable(noise)
fix_concat = Variable(fix_concat)
#noise.data.normal_(0,1)

In [7]:
#weight 初始化
g_model.apply(weight_filler)
d_model.apply(weight_filler)

# 设置求解器： DCGAN使用Adam进行求解； 学习率设置为0.0002
optimizerD = optim.Adam(d_model.parameters(), lr = 0.0002, betas = (0.5, 0.999)) 
optimizerG = optim.Adam(g_model.parameters(), lr = 0.0002, betas = (0.5, 0.999))

criterion_d = nn.BCELoss()
criterion_ac = nn.NLLLoss()

In [9]:
for epoch in range(100):
    for i, data in enumerate(train_loader, 0):
        #update the D model with real data
        d_model.zero_grad()
        real, labels = data
        
        real_label = torch.LongTensor(50).copy_(labels)
        labels = Variable(labels)
        batch_size = real.size(0)
        input_.data.resize_(real.size()).copy_(real)
        label.data.resize_(batch_size).fill_(1) # real label is 1
        
        output_D, output_C = d_model(input_)
        #print output_C
        loss_D_r = criterion_d(output_D, label)
        loss_C = criterion_ac(output_C, labels)
        loss_r = loss_D_r + loss_C
        loss_r.backward()
        D_real = output_D.data.mean()
        C_real = output_C.data.mean()
        #update D model with fake data
        #noise.data.resize_(batch_size, 100, 1, 1)
        #noise.data.normal_(0, 1)
        label.data.fill_(0) # fake label
        noise = torch.FloatTensor(batch_size, 100, 1, 1).normal_(0,1) 
               
        y = torch.LongTensor(batch_size,1).copy_(real_label)
        y_onehot = torch.FloatTensor(batch_size, 10)
        y_onehot.zero_()
        y_onehot.scatter_(1,y,1)
        
        z_concat = [noise, y_onehot]
        z_concat = torch.cat(z_concat, 1)
        z_concat = Variable(z_concat)
        
        fake_input = g_model(z_concat)
        output_D, output_C = d_model(fake_input.detach())
        loss_D_f = criterion_d(output_D, label)
        loss_C_f = criterion_ac(output_C, labels)
        loss_f = loss_D_f + loss_C_f
        loss_f.backward()
        D_fake = output_D.data.mean()
        
        errD = D_real + D_fake
        
        optimizerD.step()
        
        #update G mdoel
        g_model.zero_grad()
        label.data.fill_(1) # G model want the G samples be 1
        #noise.data.resize_(batch_size, 100, 1, 1)
        #noise.data.normal_(0, 1)
        #fake_input = g_model(noise)
        output_D, output_C = d_model(fake_input)
        
        loss_G = criterion_d(output_D, label)
        loss_C_G = criterion_ac(output_C, labels)
        loss_g = loss_G + loss_C_G
        loss_g.backward()
        loss_D_G = output_D.data.mean()
        
        optimizerG.step()
        
        
        if i%100 == 0:
            print('[%d/%d][%d/%d] Loss_C: %.4f Loss_D: %.4f Loss_G: %.4f D(x): %.4f D(G(z)): %.4f / %.4f'
              % (epoch, 1000, i, len(train_loader),
                 C_real, D_real + D_fake, loss_D_G, D_real, D_fake, loss_D_G ))
            
            vutils.save_image(real, 
                             '%s/real_sample.png' % 'logs')
            
            fake = g_model(fix_concat)
            vutils.save_image(fake.data,
                             '%s/fake_sample_epoch_%03d.png' % ('logs', epoch))

[0/1000][0/1200] Loss_C: 0.1000 Loss_D: 1.0705 Loss_G: 0.3250 D(x): 0.5441 D(G(z)): 0.5264 / 0.3250
[0/1000][100/1200] Loss_C: 0.1000 Loss_D: 1.1044 Loss_G: 0.0132 D(x): 0.9220 D(G(z)): 0.1824 / 0.0132
[0/1000][200/1200] Loss_C: 0.1000 Loss_D: 1.5358 Loss_G: 0.0091 D(x): 0.9758 D(G(z)): 0.5600 / 0.0091
[0/1000][300/1200] Loss_C: 0.1000 Loss_D: 0.9169 Loss_G: 0.0186 D(x): 0.8910 D(G(z)): 0.0258 / 0.0186
[0/1000][400/1200] Loss_C: 0.1000 Loss_D: 1.1023 Loss_G: 0.0520 D(x): 0.9286 D(G(z)): 0.1737 / 0.0520
[0/1000][500/1200] Loss_C: 0.1000 Loss_D: 1.0304 Loss_G: 0.0247 D(x): 0.9832 D(G(z)): 0.0473 / 0.0247
[0/1000][600/1200] Loss_C: 0.1000 Loss_D: 0.9996 Loss_G: 0.0109 D(x): 0.9892 D(G(z)): 0.0105 / 0.0109
[0/1000][700/1200] Loss_C: 0.1000 Loss_D: 1.0186 Loss_G: 0.0215 D(x): 0.9741 D(G(z)): 0.0446 / 0.0215
[0/1000][800/1200] Loss_C: 0.1000 Loss_D: 1.0861 Loss_G: 0.0920 D(x): 0.8578 D(G(z)): 0.2283 / 0.0920
[0/1000][900/1200] Loss_C: 0.1000 Loss_D: 0.7942 Loss_G: 0.1316 D(x): 0.7004 D(G(z))

KeyboardInterrupt: 