In [28]:
import torch
import torch.nn as nn
import torch.nn.parallel
from __future__ import print_function
import argparse
import random
import torch
import torch.nn as nn
import torch.nn.parallel
import torch.backends.cudnn as cudnn
import torch.optim as optim
import torch.utils.data
import torchvision.datasets as dset
import torchvision.transforms as transforms
import torchvision.utils as vutils
from torch.autograd import Variable
import torch.autograd as autograd
import os

torch.cuda.set_device(1)
batchsize=64

class DCGAN_D(nn.Module):
    def __init__(self, isize=32, nz=100, nc=3, ndf=64, ngpu=0, n_extra_layers=0):
        super(DCGAN_D, self).__init__()
        self.ngpu = ngpu
        assert isize % 16 == 0, "isize has to be a multiple of 16"

        main = nn.Sequential()
        # input is nc x isize x isize
        main.add_module('initial.conv.{0}-{1}'.format(nc, ndf),
                        nn.Conv2d(nc, ndf, 4, 2, 1, bias=False))
        main.add_module('initial.relu.{0}'.format(ndf),
                        nn.LeakyReLU(0.2, inplace=True))
        csize, cndf = isize / 2, ndf

        # Extra layers
        for t in range(n_extra_layers):
            main.add_module('extra-layers-{0}.{1}.conv'.format(t, cndf),
                            nn.Conv2d(cndf, cndf, 3, 1, 1, bias=False))
            #main.add_module('extra-layers-{0}.{1}.batchnorm'.format(t, cndf),
                            #nn.BatchNorm2d(cndf))
            main.add_module('extra-layers-{0}.{1}.relu'.format(t, cndf),
                            nn.LeakyReLU(0.2, inplace=True))

        while csize > 4:
            in_feat = cndf
            out_feat = cndf * 2
            main.add_module('pyramid.{0}-{1}.conv'.format(in_feat, out_feat),
                            nn.Conv2d(in_feat, out_feat, 4, 2, 1, bias=False))
            #main.add_module('pyramid.{0}.batchnorm'.format(out_feat),
                            #nn.BatchNorm2d(out_feat))
            main.add_module('pyramid.{0}.relu'.format(out_feat),
                            nn.LeakyReLU(0.2, inplace=True))
            cndf = cndf * 2
            csize = csize / 2

        # state size. K x 4 x 4
        main.add_module('final.{0}-{1}.conv'.format(cndf, 1),
                        nn.Conv2d(cndf, 1, 4, 1, 0, bias=False))
        self.main = main


    def forward(self, input):
        if isinstance(input.data, torch.cuda.FloatTensor) and self.ngpu > 1:
            output = nn.parallel.data_parallel(self.main, input, range(self.ngpu))
        else: 
            output = self.main(input)
            
        return output

class DCGAN_G(nn.Module):
    def __init__(self, isize=32, nz=100, nc=3, ngf=64, ngpu=0, n_extra_layers=0):
        super(DCGAN_G, self).__init__()
        self.ngpu = ngpu
        assert isize % 16 == 0, "isize has to be a multiple of 16"

        cngf, tisize = ngf//2, 4
        while tisize != isize:
            cngf = cngf * 2
            tisize = tisize * 2

        main = nn.Sequential()
        # input is Z, going into a convolution
        main.add_module('initial.{0}-{1}.convt'.format(nz, cngf),
                        nn.ConvTranspose2d(nz, cngf, 4, 1, 0, bias=False))
        #main.add_module('initial.{0}.batchnorm'.format(cngf),
                        #nn.BatchNorm2d(cngf))
        main.add_module('initial.{0}.relu'.format(cngf),
                        nn.ReLU(True))

        csize, cndf = 4, cngf
        while csize < isize//2:
            main.add_module('pyramid.{0}-{1}.convt'.format(cngf, cngf//2),
                            nn.ConvTranspose2d(cngf, cngf//2, 4, 2, 1, bias=False))
            #main.add_module('pyramid.{0}.batchnorm'.format(cngf//2),
                            #nn.BatchNorm2d(cngf//2))
            main.add_module('pyramid.{0}.relu'.format(cngf//2),
                            nn.ReLU(True))
            cngf = cngf // 2
            csize = csize * 2

        # Extra layers
        for t in range(n_extra_layers):
            main.add_module('extra-layers-{0}.{1}.conv'.format(t, cngf),
                            nn.Conv2d(cngf, cngf, 3, 1, 1, bias=False))
            #main.add_module('extra-layers-{0}.{1}.batchnorm'.format(t, cngf),
                            #nn.BatchNorm2d(cngf))
            main.add_module('extra-layers-{0}.{1}.relu'.format(t, cngf),
                            nn.ReLU(True))

        main.add_module('final.{0}-{1}.convt'.format(cngf, nc),
                        nn.ConvTranspose2d(cngf, nc, 4, 2, 1, bias=False))
        main.add_module('final.{0}.tanh'.format(nc),
                        nn.Tanh())
        self.main = main

    def forward(self, input):
        if isinstance(input.data, torch.cuda.FloatTensor) and self.ngpu > 1:
            output = nn.parallel.data_parallel(self.main, input, range(self.ngpu))
        else: 
            output = self.main(input)
        return output 
###############################################################################

In [29]:
import cv2
import numpy as np
import functions
batchSize = 64

X_train = functions.get_mnist()
X_label = torch.LongTensor(np.zeros((X_train.shape[0]),dtype=int))
X_train = torch.FloatTensor(X_train)
train = torch.utils.data.TensorDataset(X_train,X_label)
dataloader = torch.utils.data.DataLoader(train, shuffle=True, batch_size=batchsize)

dataiter = iter(dataloader)
ngpu = 0
nz = 100
ngf = 64
ndf = 64
nc = 3
n_extra_layers = 0

# custom weights initialization called on netG and netD
def weights_init(m):
    classname = m.__class__.__name__
    if classname.find('Conv') != -1:
        m.weight.data.normal_(0.0, 0.02)
    elif classname.find('BatchNorm') != -1:
        m.weight.data.normal_(1.0, 0.02)
        m.bias.data.fill_(0)

netG = DCGAN_G()
netG.apply(weights_init)
print(netG)

netD = DCGAN_D()
netD.apply(weights_init)
print(netD)

input = torch.FloatTensor(batchSize, 3, 32, 32)
noise = torch.FloatTensor(batchSize, nz, 1, 1)
fixed_noise = torch.FloatTensor(batchSize, nz, 1, 1).normal_(0, 1)
one = torch.FloatTensor([1])
mone = one * -1

netD.cuda()
netG.cuda()
input = input.cuda()
one, mone = one.cuda(), mone.cuda()
noise, fixed_noise = noise.cuda(), fixed_noise.cuda()

optimizerD = optim.Adam(netD.parameters(), lr=1e-4, betas=(0.5, 0.9))
optimizerG = optim.Adam(netG.parameters(), lr=1e-4, betas=(0.5, 0.9))

DCGAN_G (
  (main): Sequential (
    (initial.100-256.convt): ConvTranspose2d(100, 256, kernel_size=(4, 4), stride=(1, 1), bias=False)
    (initial.256.relu): ReLU (inplace)
    (pyramid.256-128.convt): ConvTranspose2d(256, 128, kernel_size=(4, 4), stride=(2, 2), padding=(1, 1), bias=False)
    (pyramid.128.relu): ReLU (inplace)
    (pyramid.128-64.convt): ConvTranspose2d(128, 64, kernel_size=(4, 4), stride=(2, 2), padding=(1, 1), bias=False)
    (pyramid.64.relu): ReLU (inplace)
    (final.64-3.convt): ConvTranspose2d(64, 3, kernel_size=(4, 4), stride=(2, 2), padding=(1, 1), bias=False)
    (final.3.tanh): Tanh ()
  )
)
DCGAN_D (
  (main): Sequential (
    (initial.conv.3-64): Conv2d(3, 64, kernel_size=(4, 4), stride=(2, 2), padding=(1, 1), bias=False)
    (initial.relu.64): LeakyReLU (0.2, inplace)
    (pyramid.64-128.conv): Conv2d(64, 128, kernel_size=(4, 4), stride=(2, 2), padding=(1, 1), bias=False)
    (pyramid.128.relu): LeakyReLU (0.2, inplace)
    (pyramid.128-256.conv): Conv2

In [None]:
def calc_gradient_penalty(netD, real_data, fake_data,lamda,batch_size):
    alpha_1 = torch.rand(batch_size,1,1,1).expand(real_data.size()).cuda()
    alpha_2 = torch.rand(batch_size,1,1,1).expand(real_data.size()).cuda()
    interpolates_1 = alpha_1 * real_data + ((1 - alpha_1) * fake_data)
    interpolates_2 = alpha_2 * real_data + ((1 - alpha_2) * fake_data)
    interpolates_1 = Variable(interpolates_1, requires_grad=True)
    interpolates_2 = Variable(interpolates_2, requires_grad=True)
    disc_interpolates_1 = netD(interpolates_1).view(batch_size,-1)
    disc_interpolates_2 = netD(interpolates_2).view(batch_size,-1)
    gradient_penalty = lamda*((disc_interpolates_2 - disc_interpolates_1).norm(1, dim=1)/(interpolates_1.view(batch_size,-1)-interpolates_2.view(batch_size,-1)).norm(2, dim=1)-1)**2
    return gradient_penalty

'''
def calc_gradient_penalty(netD, real_data, fake_data,lamda,batch_size):
    #print real_data.size()
    alpha = torch.rand(batch_size,1,1,1)
    #print (real_data.size())
    alpha = alpha.expand(real_data.size())
    alpha = alpha.cuda()

    interpolates = alpha * real_data + ((1 - alpha) * fake_data)
    interpolates = interpolates.cuda()
    interpolates = Variable(interpolates, requires_grad=True)

    disc_interpolates = netD(interpolates)

    gradients, = autograd.grad(outputs=disc_interpolates.sum(), inputs=interpolates,
                              create_graph=True)
    
    #gradients, = autograd.grad(outputs=disc_interpolates.sum(), inputs=interpolates,
                              #grad_outputs=torch.ones(disc_interpolates.size()).cuda(),
                              #create_graph=True, retain_graph=True, only_inputs=True)
    #gradients*gradients
    
    gradient_penalty = ((gradients.view(batch_size,-1).norm(2, dim=1) - 1) ** 2).mean().view(1)* lamda
    return gradient_penalty
'''

'\ndef calc_gradient_penalty(netD, real_data, fake_data,lamda,batch_size):\n    #print real_data.size()\n    alpha = torch.rand(batch_size,1,1,1)\n    #print (real_data.size())\n    alpha = alpha.expand(real_data.size())\n    alpha = alpha.cuda()\n\n    interpolates = alpha * real_data + ((1 - alpha) * fake_data)\n    interpolates = interpolates.cuda()\n    interpolates = Variable(interpolates, requires_grad=True)\n\n    disc_interpolates = netD(interpolates)\n\n    gradients, = autograd.grad(outputs=disc_interpolates.sum(), inputs=interpolates,\n                              create_graph=True)\n    \n    #gradients, = autograd.grad(outputs=disc_interpolates.sum(), inputs=interpolates,\n                              #grad_outputs=torch.ones(disc_interpolates.size()).cuda(),\n                              #create_graph=True, retain_graph=True, only_inputs=True)\n    #gradients*gradients\n    \n    gradient_penalty = ((gradients.view(batch_size,-1).norm(2, dim=1) - 1) ** 2).mean().view(1

In [None]:
gen_iterations = 0
critic_iters = 5
lamda = 10

for epoch in range(10000):
    data_iter = iter(dataloader)
    i = 0
    while i < len(dataloader):
        ############################
        # (1) Update D network
        ###########################
        for p in netD.parameters(): 
            p.requires_grad = True 
        
        for iter_d in range(0,5):
            if i >=len(dataloader):
                continue
            real_cpu, _ = data_iter.next()
            i +=1
            batch_size = real_cpu.size(0)
            real_cpu = real_cpu.cuda()
            input.resize_as_(real_cpu).copy_(real_cpu)
            inputv = Variable(input)
            netD.zero_grad()
            netG.zero_grad()
            
            #train with real
            errD_real = netD(inputv).mean().view(1)
            errD_real.backward(mone)

            # train with fake
            noise.resize_(batch_size, 100, 1, 1).normal_(0, 1)
            noisev = Variable(noise, volatile = True) # totally freeze netG
            fake = Variable(netG(noisev).data)
            #inputv = fake
            errD_fake = netD(fake).mean().view(1)
            errD_fake.backward(one)
            
            # train with gradient penalty
            gradient_penalty = calc_gradient_penalty(netD, inputv.data, fake.data,lamda,batch_size).mean().view(1)
            gradient_penalty.backward(one)
            
            D_cost = -errD_real + errD_fake + gradient_penalty
            errD = errD_real - errD_fake 
            optimizerD.step()
            
            
        ############################
        # (2) Update G network
        ###########################
        
        for p in netD.parameters():
            p.requires_grad = False # to avoid computation
        netG.zero_grad()
        netD.zero_grad()
        
        # in case our last batch was the tail batch of the dataloader,
        # make sure we feed a full batch of noise
        noise.resize_(64, 100, 1, 1).normal_(0, 1)
        noisev = Variable(noise)
        fake = netG(noisev)
        errG = netD(fake).mean().view(1)
        errG.backward(mone)
        optimizerG.step()
        gen_iterations += 1


        if gen_iterations % 20 == 0:
            
            print('[%d/%d][%d/%d][%d] Loss_D: %f Loss_G: %f Loss_D_real: %f Loss_D_fake %f'
            % (epoch, 10000, i, len(dataloader), gen_iterations,
            errD.data[0], errG.data[0], errD_real.data[0], errD_fake.data[0]))
            real_cpu = real_cpu.mul(0.5).add(0.5)
            vutils.save_image(real_cpu, 'WGANGP.png')
            fake = netG(Variable(fixed_noise, volatile=True))
            fake.data = fake.data.mul(0.5).add(0.5)
            vutils.save_image(fake.data, 'WGANGP_fake.png')

[0/10000][100/938][20] Loss_D: 157.201614 Loss_G: 34.641701 Loss_D_real: 192.667374 Loss_D_fake 35.465759
[0/10000][200/938][40] Loss_D: 114.777359 Loss_G: 43.424484 Loss_D_real: 160.440002 Loss_D_fake 45.662647
[0/10000][300/938][60] Loss_D: 125.514709 Loss_G: 23.186188 Loss_D_real: 150.139557 Loss_D_fake 24.624851
[0/10000][400/938][80] Loss_D: 109.868286 Loss_G: 43.990837 Loss_D_real: 147.647278 Loss_D_fake 37.778996
[0/10000][500/938][100] Loss_D: 102.109276 Loss_G: 74.989090 Loss_D_real: 180.435303 Loss_D_fake 78.326027
[0/10000][600/938][120] Loss_D: 105.582039 Loss_G: 95.860771 Loss_D_real: 194.214020 Loss_D_fake 88.631981
[0/10000][700/938][140] Loss_D: 97.907967 Loss_G: 82.020920 Loss_D_real: 202.778503 Loss_D_fake 104.870537
[0/10000][800/938][160] Loss_D: 104.850861 Loss_G: 127.611526 Loss_D_real: 237.260468 Loss_D_fake 132.409607
[0/10000][900/938][180] Loss_D: 101.088303 Loss_G: 121.259399 Loss_D_real: 232.335648 Loss_D_fake 131.247345
[1/10000][60/938][200] Loss_D: 106.98

[8/10000][180/938][1540] Loss_D: 114.969391 Loss_G: -87.296211 Loss_D_real: -14.884798 Loss_D_fake -129.854187
[8/10000][280/938][1560] Loss_D: 107.734879 Loss_G: -151.915863 Loss_D_real: -18.334713 Loss_D_fake -126.069588
[8/10000][380/938][1580] Loss_D: 99.769836 Loss_G: -181.887344 Loss_D_real: -25.105713 Loss_D_fake -124.875549
[8/10000][480/938][1600] Loss_D: 112.684494 Loss_G: -193.305023 Loss_D_real: -51.356533 Loss_D_fake -164.041031
[8/10000][580/938][1620] Loss_D: 118.803925 Loss_G: -150.884781 Loss_D_real: -49.254345 Loss_D_fake -168.058273
[8/10000][680/938][1640] Loss_D: 123.356415 Loss_G: -183.752609 Loss_D_real: -45.049297 Loss_D_fake -168.405716
[8/10000][780/938][1660] Loss_D: 109.669586 Loss_G: -172.763535 Loss_D_real: -42.989582 Loss_D_fake -152.659164
[8/10000][880/938][1680] Loss_D: 112.787804 Loss_G: -191.313461 Loss_D_real: -88.075462 Loss_D_fake -200.863266
[9/10000][40/938][1700] Loss_D: 125.823250 Loss_G: -161.495102 Loss_D_real: -44.815376 Loss_D_fake -170.63

[15/10000][900/938][3000] Loss_D: nan Loss_G: nan Loss_D_real: nan Loss_D_fake nan
[16/10000][60/938][3020] Loss_D: nan Loss_G: nan Loss_D_real: nan Loss_D_fake nan
[16/10000][160/938][3040] Loss_D: nan Loss_G: nan Loss_D_real: nan Loss_D_fake nan
[16/10000][260/938][3060] Loss_D: nan Loss_G: nan Loss_D_real: nan Loss_D_fake nan
[16/10000][360/938][3080] Loss_D: nan Loss_G: nan Loss_D_real: nan Loss_D_fake nan
[16/10000][460/938][3100] Loss_D: nan Loss_G: nan Loss_D_real: nan Loss_D_fake nan
[16/10000][560/938][3120] Loss_D: nan Loss_G: nan Loss_D_real: nan Loss_D_fake nan
[16/10000][660/938][3140] Loss_D: nan Loss_G: nan Loss_D_real: nan Loss_D_fake nan
[16/10000][760/938][3160] Loss_D: nan Loss_G: nan Loss_D_real: nan Loss_D_fake nan
[16/10000][860/938][3180] Loss_D: nan Loss_G: nan Loss_D_real: nan Loss_D_fake nan
[17/10000][20/938][3200] Loss_D: nan Loss_G: nan Loss_D_real: nan Loss_D_fake nan
[17/10000][120/938][3220] Loss_D: nan Loss_G: nan Loss_D_real: nan Loss_D_fake nan
[17/10

In [None]:
gradient_penalty

In [None]:
a = torch.from_numpy(np.array([[1,1],[32,32]],dtype='float32'))
print(a.size())
a.norm(2,dim=1)

In [None]:
a

In [None]:
inputv.data

In [None]:
torch.save(netG.state_dict(), 'netG_try_kmeans_epoch_%d.pth' % (epoch))
torch.save(netD.state_dict(), 'netD_try_kmeans_epoch_%d.pth' % (epoch))

In [None]:
class DCGAN_D_feature(nn.Module):
    def __init__(self, isize=32, nz=100, nc=3, ndf=64, ngpu=0, n_extra_layers=0):
        super(DCGAN_D_feature, self).__init__()
        self.ngpu = ngpu
        assert isize % 16 == 0, "isize has to be a multiple of 16"

        main = nn.Sequential()
        # input is nc x isize x isize
        main.add_module('initial.conv.{0}-{1}'.format(nc, ndf),
                        nn.Conv2d(nc, ndf, 4, 2, 1, bias=False))
        main.add_module('initial.relu.{0}'.format(ndf),
                        nn.LeakyReLU(0.2, inplace=True))
        csize, cndf = isize / 2, ndf

        # Extra layers
        for t in range(n_extra_layers):
            main.add_module('extra-layers-{0}.{1}.conv'.format(t, cndf),
                            nn.Conv2d(cndf, cndf, 3, 1, 1, bias=False))
            #main.add_module('extra-layers-{0}.{1}.batchnorm'.format(t, cndf),
                            #nn.BatchNorm2d(cndf))
            main.add_module('extra-layers-{0}.{1}.relu'.format(t, cndf),
                            nn.LeakyReLU(0.2, inplace=True))

        while csize > 4:
            in_feat = cndf
            out_feat = cndf * 2
            main.add_module('pyramid.{0}-{1}.conv'.format(in_feat, out_feat),
                            nn.Conv2d(in_feat, out_feat, 4, 2, 1, bias=False))
            #main.add_module('pyramid.{0}.batchnorm'.format(out_feat),
                            #nn.BatchNorm2d(out_feat))
            main.add_module('pyramid.{0}.relu'.format(out_feat),
                            nn.LeakyReLU(0.2, inplace=True))
            cndf = cndf * 2
            csize = csize / 2

        # state size. K x 4 x 4
        #main.add_module('final.{0}-{1}.conv'.format(cndf, 1),
                        #nn.Conv2d(cndf, 1, 4, 1, 0, bias=False))
        self.main = main


    def forward(self, input):
        if isinstance(input.data, torch.cuda.FloatTensor) and self.ngpu > 1:
            output = nn.parallel.data_parallel(self.main, input, range(self.ngpu))
        else: 
            output = self.main(input)
            
        #output = output.mean(0)
        return output

In [None]:
import cv2
import numpy as np
batchSize = 1

X_train = functions.get_mnist()
X_train = X_train[0:1000,:,:,:]
X_label = torch.LongTensor(np.zeros((X_train.shape[0]),dtype=int))
X_train_ = torch.FloatTensor(X_train)
train = torch.utils.data.TensorDataset(X_train_,X_label)
dataloader = torch.utils.data.DataLoader(train, shuffle=False, batch_size=batchSize)

dataiter = iter(dataloader)
ngpu = 0
nz = 100
ngf = 64
ndf = 64
nc = 3
n_extra_layers = 0

In [None]:
dict_G = torch.load('netG_try_kmeans_epoch_%d.pth' % (1910))
dict_D = torch.load('netD_try_kmeans_epoch_%d.pth' % (1910))
del(dict_D['main.final.256-1.conv.weight'])

feature_dict = {}
netD_feature = DCGAN_D_feature()
netD_feature.load_state_dict(dict_D)
print(netD_feature)
data_iter = iter(dataloader)
for i,data in enumerate(data_iter):
    feature_dict[i] = netD_feature(Variable(data[0])).data.numpy().reshape((1,256*4*4))
    
feature = np.zeros((max(feature_dict.keys()),4096),dtype=np.float32)
for i,data in enumerate(feature):
    data[:] = feature_dict[i] 

In [None]:
from sklearn.cluster import KMeans

kmeans = KMeans(n_clusters=10, random_state=0).fit_predict(feature)

In [None]:
arg = [np.argwhere(kmeans==n) for n in range(0,10)]
for i in range(0,10):
    cluster = np.take(X_train, arg[i],axis=0).reshape(-1,3,32,32)
    vutils.save_image(torch.from_numpy(cluster), 'cluster%d_%d.png'% (10,i),normalize=True)

In [None]:
X_train.shape

In [None]:
np.take(X_train, [0],axis=0).shape

In [None]:
X_train[6]

In [None]:
arg = [np.argwhere(kmeans==n) for n in range(0,10)]
pic_list = []
for i,index in enumerate(arg):
    X_train[n,:,:,:]
