# Initializations

In [None]:
import numpy as np
import pandas as pd

import sys, os, time
import glob

from matplotlib import pyplot as plt
%matplotlib inline

# these magics ensure that external modules that are modified are also automatically reloaded
%load_ext autoreload
%autoreload 2

# widgets and interaction
from ipywidgets import FloatProgress
from IPython.display import display, clear_output

import seaborn as sns
sns.set_style("whitegrid", {'axes.grid' : False})

from skimage.io import imread, imsave

import warnings
warnings.filterwarnings('ignore')

In [None]:
from __future__ import print_function
import argparse
import os
import random
import torch
import torch.nn as nn
import torch.nn.parallel
import torch.backends.cudnn as cudnn
import torch.optim as optim
import torch.utils.data
import torchvision.datasets as dset
import torchvision.transforms as transforms
import torchvision.utils as vutils
from torch.autograd import Variable

In [None]:
from collections import namedtuple
import random

opt = [
 ("dataroot", "/home/adalbert/data/world-cities/"), 
 ("workers", 2),
 ("batchSize",64), 
 ("imageSize",64),
 ("nz",100),
 ("ngf",64), # nr filters for generator
 ("ndf",64), # nr filters for discriminator
 ("niter",25),
 ("lr",0.0002),
 ("beta1",0.5), 
 ("cuda",True),
 ("gpu_ids",[0,1]),
 ("netG",""),
 ("netD",""),
 ("outf","/home/adalbert/nbserver/pytorch-workspace/dcgan/"),
 ("manualSeed",  random.randint(1, 10000)) 
]
opt = namedtuple("opt", dict(opt).keys())(**dict(opt))

opt

In [None]:
try:
    os.makedirs(opt.outf)
except OSError:
    pass
print("Random Seed: ", opt.manualSeed)
random.seed(opt.manualSeed)
torch.manual_seed(opt.manualSeed)

cudnn.benchmark = True

if torch.cuda.is_available() and not opt.cuda:
    print("WARNING: You have a CUDA device, so you should probably run with --cuda")
    
    

# Set up data sources

In [1]:
opt.dataroot

NameError: name 'opt' is not defined

In [None]:
import glob

files = glob.glob(opt.dataroot + "*/*/*/*.png")
files_df = []
for f in files:
    s = f.split("/")
    fname, cls, res, scale = \
        s[-1], s[-2], int(s[-3].split("-")[0]), int(s[-4].split("-")[0])
    _,country,city,_,pop,lat,lon = fname.split("_")
    files_df.append((f, cls, res, scale, country, pop))
    
files_df = pd.DataFrame(files_df, \
                columns=["filename", "class", "res-px", "scale-km", "country", "population"])

In [None]:
len(files_df)

In [None]:
sel_df = files_df[(files_df['res-px']==224) & 
                  (files_df['scale-km']==100)]

idx = np.random.choice(range(len(sel_df)), int(len(sel_df)*0.95))
train_df = sel_df.iloc[idx]
test_df  = sel_df.iloc[~idx]

train_df.to_csv(opt.dataroot + "/train.csv")
test_df.to_csv(opt.dataroot + "/test.csv")

len(train_df)

In [None]:
import sys
sys.path.append("./pytorch_utils")
from loader_dataframe import ImageDataFrame, grayscale_loader

dataset = ImageDataFrame(df=train_df,
                           loader=grayscale_loader,
                           transform=transforms.Compose([
                               transforms.RandomHorizontalFlip(),
                               transforms.Scale(opt.imageSize),
                               transforms.CenterCrop(opt.imageSize),
                               transforms.ToTensor(),
                               transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5)),
                           ]))
assert dataset
dataloader = torch.utils.data.DataLoader(dataset, batch_size=opt.batchSize,
                                         shuffle=True, num_workers=int(opt.workers))

# Set up generator and discriminator models

In [None]:
gpu_ids = opt.gpu_ids
nz = int(opt.nz)
ngf = int(opt.ngf)
ndf = int(opt.ndf)
nc = 1

In [None]:
# custom weights initialization called on netG and netD
def weights_init(m):
    classname = m.__class__.__name__
    if classname.find('Conv') != -1:
        m.weight.data.normal_(0.0, 0.02)
    elif classname.find('BatchNorm') != -1:
        m.weight.data.normal_(1.0, 0.02)
        m.bias.data.fill_(0)

In [None]:
# Generator network

class _netG(nn.Module):
    def __init__(self, gpu_ids):
        super(_netG, self).__init__()
        self.gpu_ids = gpu_ids
        self.main = nn.Sequential(
            # input is Z, going into a convolution
            nn.ConvTranspose2d(     nz, ngf * 8, 4, 1, 0, bias=False),
            nn.BatchNorm2d(ngf * 8),
            nn.ReLU(True),
            # state size. (ngf*8) x 4 x 4
            nn.ConvTranspose2d(ngf * 8, ngf * 4, 4, 2, 1, bias=False),
            nn.BatchNorm2d(ngf * 4),
            nn.ReLU(True),
            # state size. (ngf*4) x 8 x 8
            nn.ConvTranspose2d(ngf * 4, ngf * 2, 4, 2, 1, bias=False),
            nn.BatchNorm2d(ngf * 2),
            nn.ReLU(True),
            # state size. (ngf*2) x 16 x 16
            nn.ConvTranspose2d(ngf * 2,     ngf, 4, 2, 1, bias=False),
            nn.BatchNorm2d(ngf),
            nn.ReLU(True),
            # state size. (ngf) x 32 x 32
            nn.ConvTranspose2d(    ngf,      nc, 4, 2, 1, bias=False),
            nn.Tanh()
            # state size. (nc) x 64 x 64
        )
    def forward(self, input):
        gpu_ids = None
        if isinstance(input.data, torch.cuda.FloatTensor) and len(self.gpu_ids) > 1:
            gpu_ids = self.gpu_ids
        return nn.parallel.data_parallel(self.main, input, gpu_ids)

netG = _netG(gpu_ids)
netG.apply(weights_init)
if opt.netG != '':
    netG.load_state_dict(torch.load(opt.netG))
print(netG)

In [None]:
class _netD(nn.Module):
    def __init__(self, gpu_ids):
        super(_netD, self).__init__()
        self.gpu_ids = gpu_ids
        self.main = nn.Sequential(
            # input is (nc) x 64 x 64
            nn.Conv2d(nc, ndf, 4, 2, 1, bias=False),
            nn.LeakyReLU(0.2, inplace=True),
            # state size. (ndf) x 32 x 32
            nn.Conv2d(ndf, ndf * 2, 4, 2, 1, bias=False),
            nn.BatchNorm2d(ndf * 2),
            nn.LeakyReLU(0.2, inplace=True),
            # state size. (ndf*2) x 16 x 16
            nn.Conv2d(ndf * 2, ndf * 4, 4, 2, 1, bias=False),
            nn.BatchNorm2d(ndf * 4),
            nn.LeakyReLU(0.2, inplace=True),
            # state size. (ndf*4) x 8 x 8
            nn.Conv2d(ndf * 4, ndf * 8, 4, 2, 1, bias=False),
            nn.BatchNorm2d(ndf * 8),
            nn.LeakyReLU(0.2, inplace=True),
            # state size. (ndf*8) x 4 x 4
            nn.Conv2d(ndf * 8, 1, 4, 1, 0, bias=False),
            nn.Sigmoid()
        )
    def forward(self, input):
        gpu_ids = None
        if isinstance(input.data, torch.cuda.FloatTensor) and len(self.gpu_ids) > 1:
            gpu_ids = self.gpu_ids
        output = nn.parallel.data_parallel(self.main, input, gpu_ids)
        return output.view(-1, 1)

netD = _netD(gpu_ids)
netD.apply(weights_init)
if opt.netD != '':
    netD.load_state_dict(torch.load(opt.netD))
print(netD)

# Train 

In [None]:
criterion = nn.BCELoss()

input = torch.FloatTensor(opt.batchSize, nc, opt.imageSize, opt.imageSize)
noise = torch.FloatTensor(opt.batchSize, nz, 1, 1)
fixed_noise = torch.FloatTensor(opt.batchSize, nz, 1, 1).normal_(0, 1)
label = torch.FloatTensor(opt.batchSize)
real_label = 1
fake_label = 0

if opt.cuda:
    netD.cuda()
    netG.cuda()
    criterion.cuda()
    input, label = input.cuda(), label.cuda()
    noise, fixed_noise = noise.cuda(), fixed_noise.cuda()

input = Variable(input)
label = Variable(label)
noise = Variable(noise)
fixed_noise = Variable(fixed_noise)

In [None]:
# setup optimizer
optimizerD = optim.Adam(netD.parameters(), lr = opt.lr, betas = (opt.beta1, 0.999))
optimizerG = optim.Adam(netG.parameters(), lr = opt.lr, betas = (opt.beta1, 0.999))

for epoch in range(opt.niter):
    for i, data in enumerate(dataloader, 0):
        ############################
        # (1) Update D network: maximize log(D(x)) + log(1 - D(G(z)))
        ###########################
        # train with real
        netD.zero_grad()
        real_cpu, _ = data
        batch_size = real_cpu.size(0)
        input.data.resize_(real_cpu.size()).copy_(real_cpu)
        label.data.resize_(batch_size).fill_(real_label)

        output = netD(input)
        errD_real = criterion(output, label)
        errD_real.backward()
        D_x = output.data.mean()

        # train with fake
        noise.data.resize_(batch_size, nz, 1, 1)
        noise.data.normal_(0, 1)
        fake = netG(noise)
        label.data.fill_(fake_label)
        output = netD(fake.detach())
        errD_fake = criterion(output, label)
        errD_fake.backward()
        D_G_z1 = output.data.mean()
        errD = errD_real + errD_fake
        optimizerD.step()

        ############################
        # (2) Update G network: maximize log(D(G(z)))
        ###########################
        netG.zero_grad()
        label.data.fill_(real_label) # fake labels are real for generator cost
        output = netD(fake)
        errG = criterion(output, label)
        errG.backward()
        D_G_z2 = output.data.mean()
        optimizerG.step()

        print('[%d/%d][%d/%d] Loss_D: %.4f Loss_G: %.4f D(x): %.4f D(G(z)): %.4f / %.4f'
              % (epoch, opt.niter, i, len(dataloader),
                 errD.data[0], errG.data[0], D_x, D_G_z1, D_G_z2))
        if i % 100 == 0:
            vutils.save_image(real_cpu,
                    '%s/real_samples.png' % opt.outf)
            fake = netG(fixed_noise)
            vutils.save_image(fake.data,
                    '%s/fake_samples_epoch_%03d.png' % (opt.outf, epoch))

    # do checkpointing
    torch.save(netG.state_dict(), '%s/netG_epoch_%d.pth' % (opt.outf, epoch))
    torch.save(netD.state_dict(), '%s/netD_epoch_%d.pth' % (opt.outf, epoch))

# Plot fake and real samples

In [None]:
files_fake = glob.glob(opt.outf + "/100km/fake*.png")
files_fake.sort()

files_real = glob.glob(opt.outf + "/100km/real*.png")

files_ckpt = glob.glob(opt.outf + "/100km/netD*.pth")
files_ckpt.sort()

In [None]:
plt.imshow(imread(files_real[0]))

In [None]:
for f in files_fake:
    img = imread(f)
    plt.imshow(img)
    plt.title(os.path.basename(f))
    plt.show()

# Clustering with GAN generator features

In [None]:
files_ckpt = {int(os.path.basename(f).split(".")[0].split("_")[-1]):f \
              for f in files_ckpt}

In [None]:
netD.load_state_dict(torch.load(files_ckpt[max(files_ckpt.keys())]))

In [None]:
netD.forward()