In [1]:
from tensorboardX import SummaryWriter

In [2]:
import argparse
import torch
import torch.utils.data
import torch.nn as nn
import torch.optim as optim
from torch.autograd import Variable
import torchvision
from torchvision import models
from torchvision import datasets, transforms
from torchvision.datasets import ImageFolder
import matplotlib.pyplot as plt
import time
from glob import glob
import numpy as np

# Utils

In [3]:
import pickle as pk
import sys

class NormalizeInverse(torchvision.transforms.Normalize):
    """
    Undoes the normalization and returns the reconstructed images in the input domain.
    """

    def __init__(self, mean, std):
        mean = torch.as_tensor(mean)
        std = torch.as_tensor(std)
        std_inv = 1 / (std + 1e-7)
        mean_inv = -mean * std_inv
        super().__init__(mean=mean_inv, std=std_inv)

    def __call__(self, tensor):
        return super().__call__(tensor.clone())

def disp_to_term(msg):
    sys.stdout.write(msg + '\r')
    sys.stdout.flush()

def load_pickle(filename):
    try:
        p = open(filename, 'r')
    except IOError:
        print ("Pickle file cannot be opened.")
        return None
    try:
        picklelicious = pk.load(p)
    except ValueError:
        print ('load_pickle failed once, trying again')
        p.close()
        p = open(filename, 'r')
        picklelicious = pk.load(p)

    p.close()
    return picklelicious

def save_pickle(data_object, filename):
    pickle_file = open(filename, 'w')
    pk.dump(data_object, pickle_file)
    pickle_file.close()
    
def unnormalize(y, mean, std):
    x = y.new(*y.size())
    x[:, 0, :, :] = y[:, 0, :, :] * std[0] + mean[0]
    x[:, 1, :, :] = y[:, 1, :, :] * std[1] + mean[1]
    x[:, 2, :, :] = y[:, 2, :, :] * std[2] + mean[2]
    return x

def data_mean_std(train_data_gen):
    pop_mean = []
    pop_std = []
    for inputs in train_data_gen:
        # shape (batch_size, 3, height, width)
        data , _ = inputs
        numpy_image = data.numpy()

        # shape (3,)
        batch_mean = np.mean(numpy_image, axis=(0,2,3))
        batch_std = np.std(numpy_image, axis=(0,2,3), ddof=1)

        pop_mean.append(batch_mean)
        pop_std.append(batch_std)

    # shape (num_iterations, 3) -> (mean across 0th axis) -> shape (3,)
    pop_mean = np.array(pop_mean).mean(axis=0)
    pop_std = np.array(pop_std).mean(axis=0)
    return pop_mean, pop_std


# VAE

In [4]:
def conv3x3(in_planes, out_planes, stride=1):
    """3x3 convolution with padding"""
    return nn.Conv2d(in_planes, out_planes, kernel_size=3, stride=stride,
                     padding=1, bias=False)

In [5]:
class BasicBlock(nn.Module):
    expansion = 1

    def __init__(self, inplanes, planes, stride=1, downsample=None):
        super(BasicBlock, self).__init__()
        self.conv1 = conv3x3(inplanes, planes, stride)
        self.bn1 = nn.BatchNorm2d(planes)
        self.relu = nn.ReLU(inplace=True)
        self.conv2 = conv3x3(planes, planes)
        self.bn2 = nn.BatchNorm2d(planes)
        self.downsample = downsample
        self.stride = stride

    def forward(self, x):
        residual = x

        out = self.conv1(x)
        out = self.bn1(out)
        out = self.relu(out)

        out = self.conv2(out)
        out = self.bn2(out)

        if self.downsample is not None:
            residual = self.downsample(x)

        out += residual
        out = self.relu(out)

        return out

In [201]:
class VAE(nn.Module):
    
    def __init__(self, block, layers, latent_variable_size, nc, ngf, ndf, is_cuda=False):
        super(VAE, self).__init__()
        self.nc = nc # nubmer of channels
        self.ngf = ngf # image size, i.e 200
        self.ndf = ndf # image size, i.e 200
        
        self.is_cuda = is_cuda
        
        #Encoder
        self.inplanes = 64
        self.conv1 = nn.Conv2d(3, 64, kernel_size=7, stride=2, padding=3,
                               bias=False)
        self.bn1 = nn.BatchNorm2d(64)
        self.relu = nn.ReLU(inplace=True)
        self.sigmoid = nn.Sigmoid()
        self.maxpool = nn.MaxPool2d(kernel_size=3, stride=2, padding=1)
        self.layer1 = self._make_layer(block, 64, layers[0])
        self.layer2 = self._make_layer(block, 128, layers[1], stride=2)
        self.layer3 = self._make_layer(block, 256, layers[2], stride=2)
        self.layer4 = self._make_layer(block, 512, layers[3], stride=2)
        self.avgpool = nn.AvgPool2d(7, stride=1)
        self.fc = nn.Linear(512 * block.expansion, 512)
        self.fc1 = nn.Linear(512 , latent_variable_size)
        self.fc2 = nn.Linear(512 , latent_variable_size)
        
        
        #Decoder
        self.fc3 = nn.Linear(latent_variable_size, 500)
        self.fc4 = nn.Linear(500, 25*25*10) # 14*14*32
        self.deconv1 = nn.ConvTranspose2d(10,32, kernel_size=3, stride =2, padding=1, output_padding=1)
        self.deconv2 = nn.ConvTranspose2d(32,16, kernel_size=3, stride =2, padding=1, output_padding=1)
        self.deconv3 = nn.ConvTranspose2d(16,3, kernel_size=3, stride =2, padding=1, output_padding=1)
        #self.deconv4 = nn.ConvTranspose2d(16,3, kernel_size=3, stride =2, padding=1, output_padding=1)
        
        for m in self.modules():
            if isinstance(m, nn.Conv2d):
                nn.init.kaiming_normal_(m.weight, mode='fan_out', nonlinearity='relu')
            elif isinstance(m, nn.BatchNorm2d):
                nn.init.constant_(m.weight, 1)
                nn.init.constant_(m.bias, 0)
        
        
        
        
    def _make_layer(self, block, planes, blocks, stride=1):
        downsample = None
        if stride != 1 or self.inplanes != planes * block.expansion:
            downsample = nn.Sequential(
                nn.Conv2d(self.inplanes, planes * block.expansion,
                          kernel_size=1, stride=stride, bias=False),
                nn.BatchNorm2d(planes * block.expansion),
            )

        layers = []
        layers.append(block(self.inplanes, planes, stride, downsample))
        self.inplanes = planes * block.expansion
        for i in range(1, blocks):
            layers.append(block(self.inplanes, planes))

        return nn.Sequential(*layers)
    
    def encode(self, x):

        x = self.conv1(x)
        x = self.bn1(x)
        x = self.relu(x)
        x = self.maxpool(x)
        x = self.layer2(x)
        x = self.layer3(x)
        x = self.layer4(x)

        x = self.avgpool(x)
        x = x.view(x.size(0), -1)
        x = self.fc(x)
        x = self.relu(x)
        w_mean = self.fc1(x)
        w_std  = self.fc2(x)
        return w_mean, w_std
    
    def decode(self, z):
        x = self.fc3(z)
        x = self.relu(x)
        
        x = self.fc4(x)
        x = self.relu(x)
        
        x = x.view(-1, 10, 25, 25)
        x = self.deconv1(x)
        x = self.relu(x)
        
        x = self.deconv2(x)
        x = self.relu(x)
        
        x = self.deconv3(x)
        x = self.sigmoid(x)

        return x
    
    def reparametrize(self, mu, logvar):
        std = logvar.mul(0.5).exp_()
        if self.is_cuda:
            eps = torch.cuda.FloatTensor(std.size()).normal_()
        else:
            eps = torch.FloatTensor(std.size()).normal_()
        eps = Variable(eps)
        return eps.mul(std).add_(mu)
    
    def get_latent_var(self, x):
        mu, logvar = self.encode(x.view(-1, self.nc, self.ndf, self.ngf))
        z = self.reparametrize(mu, logvar)
        return z

    def forward(self, x):
        mu, logvar = self.encode(x.view(-1, self.nc, self.ndf, self.ngf))
        z = self.reparametrize(mu, logvar)
        res = self.decode(z)
        return res, mu, logvar

# Preprocessing images

In [155]:
import os

In [19]:
os.getcwd()

'/Users/prophet/Desktop/Deep Learning/VAE_pet_project'

In [60]:
path_to_images = "../datasets/anime_chars/dataset/dataset/"

In [61]:
image_names_list = os.listdir(path_to_images)

In [62]:
len(image_names_list)

58083

17426 - valid

In [63]:
from PIL import Image

for i in image_names_list:
    image  = Image.open(path_to_images + i)
    image = image.convert('RGB')
    width  = image.size[0]
    height = image.size[1]

    aspect = width / float(height)

    ideal_width = 200
    ideal_height = 200

    ideal_aspect = ideal_width / float(ideal_height)

    if aspect > ideal_aspect:
        # Then crop the left and right edges:
        new_width = int(ideal_aspect * height)
        offset = (width - new_width) / 2
        resize = (offset, 0, width - offset, height)
    else:
        # ... crop the top and bottom:
        new_height = int(width / ideal_aspect)
        offset = (height - new_height) / 2
        resize = (0, offset, width, height - offset)

    thumb = image.crop(resize).resize((ideal_width, ideal_height), Image.ANTIALIAS)
    thumb.save(path_to_images + i)



In [58]:
os.remove('../datasets/anime_chars/dataset/dataset/.DS_Store')

In [65]:
path_to_images = "../datasets/anime_chars/dataset/dataset_copy/"

In [66]:
image_names_list = os.listdir(path_to_images)

In [68]:
train_names = image_names_list[:40657]

In [69]:
valid_names = image_names_list[40657:]

In [70]:
len(train_names) + len(valid_names)

58083

In [157]:
path_destin_train = '../datasets/anime_chars/dataset/train_images/train/'

In [158]:
train_names = os.listdir(path_destin_train)

In [170]:
path_destin_val = '../datasets/anime_chars/dataset/valid_images/valid/Abel_Bauer.jpg''

In [174]:
valid_names = os.listdir(path_destin_val)

In [175]:
train_names = train_names[:5000]
valid_names = valid_names[:700]

In [73]:
path_destin_val = '../datasets/anime_chars/dataset/valid/'

In [163]:
path_final_train = '../datasets/anime_chars/dataset/train_1k/train/'

In [165]:
path_final_valid = '../datasets/anime_chars/dataset/valid_1k/valid/'

In [162]:
import shutil

In [None]:
train_1k = tra

# Training

In [7]:
is_cuda = False

In [184]:
BATCH_SIZE = 100
EPOCH = 10
LOG_INTERVAL = 5

In [185]:
path_train = '../datasets/anime_chars/dataset/train_1k/'
path_test  = '../datasets/anime_chars/dataset/valid_1k//'

In [186]:
num_workers = 6

In [187]:
simple_transform = transforms.Compose([transforms.ToTensor(), transforms.Normalize([0.48829153, 0.45526633, 0.41688013],[0.25974154, 0.25308523, 0.25552085])])

In [188]:
train = ImageFolder(path_train, simple_transform)
valid = ImageFolder(path_test, simple_transform)

In [189]:
train_data_gen = torch.utils.data.DataLoader(train,shuffle=True,batch_size=BATCH_SIZE,num_workers=num_workers, )
valid_data_gen = torch.utils.data.DataLoader(valid,batch_size=BATCH_SIZE,num_workers=num_workers)

In [190]:
dataset_sizes = {'train':len(train_data_gen.dataset),'valid':len(valid_data_gen.dataset)}
dataloaders = {'train':train_data_gen,'valid':valid_data_gen}

In [191]:
dataset_sizes

{'train': 2000, 'valid': 300}

In [202]:
model = VAE(BasicBlock, [2, 2, 2, 2], latent_variable_size=500, nc=3, ngf=200, ndf=200, is_cuda=is_cuda)

In [203]:
reconstruction_function = nn.MSELoss()
reconstruction_function.size_average = False

In [204]:
def loss_function(recon_x, x, mu, logvar):

    MSE = reconstruction_function(recon_x, x)

    # https://arxiv.org/abs/1312.6114 (Appendix B)
    # 0.5 * sum(1 + log(sigma^2) - mu^2 - sigma^2)
    KLD_element = mu.pow(2).add_(logvar.exp()).mul_(-1).add_(1).add_(logvar)
    KLD = torch.sum(KLD_element).mul_(-0.5)

    return MSE + KLD

In [205]:
optimizer = optim.Adam(model.parameters(), lr=1e-4)

In [206]:
def train(epoch):

    model.train()
    train_loss = 0
    batch_idx = 1
    for data in dataloaders['train']:
        # get the inputs
        inputs, _ = data

        # wrap them in Variable
        if torch.cuda.is_available():
            inputs = Variable(inputs.cuda())
        else:
            inputs = Variable(inputs)
        
        optimizer.zero_grad()
        recon_batch, mu, logvar = model(inputs)
        inputs.data = unnormalize(inputs.data,[0.48829153, 0.45526633, 0.41688013],[0.25974154, 0.25308523, 0.25552085])

        loss = loss_function(recon_batch, inputs, mu, logvar)
        loss.backward()
        train_loss += loss.data#[0]
        optimizer.step()

        if batch_idx % LOG_INTERVAL == 0:
            print('Train Epoch: {} [{}/{} ({:.0f}%)]\tLoss: {:.6f}'.format(
                epoch, batch_idx * len(inputs), (len(dataloaders['train'])*128),
                100. * batch_idx / len(dataloaders['train']),
                loss.data / len(inputs)))
        batch_idx+=1

    print('====> Epoch: {} Average loss: {:.4f}'.format(
          epoch, train_loss / (len(dataloaders['train'])*BATCH_SIZE)))
    return train_loss / (len(dataloaders['train'])*BATCH_SIZE)

In [207]:
def test(epoch):
    model.eval()
    test_loss = 0
    counter = 1
    for data in dataloaders['valid']:
        
        # get the inputs
        inputs, _ = data

        # wrap them in Variable
        if torch.cuda.is_available():
            inputs = Variable(inputs.cuda())
        else:
            inputs = Variable(inputs)
        recon_batch, mu, logvar = model(inputs)
        inputs.data = unnormalize(inputs.data,[0.48829153, 0.45526633, 0.41688013],[0.25974154, 0.25308523, 0.25552085])
        test_loss += loss_function(recon_batch, inputs, mu, logvar).data
        if((epoch + 1) % 1 == 0):
            torchvision.utils.save_image(inputs.data, './imgs/Epoch_{}_data.jpg'.format(epoch), nrow=8, padding=2)
            torchvision.utils.save_image(recon_batch.data, './imgs/Epoch_{}_recon.jpg'.format(epoch), nrow=8, padding=2)

    test_loss /= (len(dataloaders['valid'])*128)
    print('====> Test set loss: {:.4f}'.format(test_loss))
    return test_loss

In [208]:
writer = SummaryWriter('runs/exp-1')
since = time.time()
for epoch in range(EPOCH):
    train_loss = train(epoch)
    test_loss = test(epoch)
    writer.add_scalar('train_loss', train_loss, epoch)
    writer.add_scalar('test_loss',test_loss, epoch)
    torch.save(model.state_dict(), './models/Epoch_{}_Train_loss_{:.4f}_Test_loss_{:.4f}.pth'.format(epoch, train_loss, test_loss))
time_elapsed = time.time() - since    
print('Training complete in {:.0f}m {:.0f}s'.format(
        time_elapsed // 60, time_elapsed % 60))

====> Epoch: 0 Average loss: 3.3706
====> Test set loss: 0.2312
====> Epoch: 1 Average loss: 0.1891
====> Test set loss: 0.1113
====> Epoch: 2 Average loss: 0.1152
====> Test set loss: 0.0772
====> Epoch: 3 Average loss: 0.0774
====> Test set loss: 0.0518
====> Epoch: 4 Average loss: 0.0541
====> Test set loss: 0.0368
====> Epoch: 5 Average loss: 0.0393
====> Test set loss: 0.0283
====> Epoch: 6 Average loss: 0.0297
====> Test set loss: 0.0217
====> Epoch: 7 Average loss: 0.0236
====> Test set loss: 0.0179
====> Epoch: 8 Average loss: 0.0191
====> Test set loss: 0.0148
====> Epoch: 9 Average loss: 0.0156
====> Test set loss: 0.0124
Training complete in 52m 19s
