In [0]:
from google.colab import drive
drive.mount('/content/gdrive')

!nvcc --version

In [0]:
from google.colab import files
uploaded = files.upload()

In [0]:
import os
import numpy as np
import matplotlib.pyplot as plt
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader
from torchvision import transforms

import util
import dataset as dset
from dataset import pokemonDataset

In [0]:
class generator(nn.Module):

    def __init__(self, dim_noise=100, dim_output_img=64, n_channel=3):
        super(generator, self).__init__()

        inplace = True
        init_kernel_sise = int(dim_output_img / (2 ** 5))
        
        self.conv_trans_2d1 = nn.ConvTranspose2d(dim_noise, dim_output_img*8, init_kernel_sise, 1, 0, bias=False)
        self.batchnorm1 = nn.BatchNorm2d(dim_output_img*8)
        self.relu1 = nn.ReLU(inplace=inplace)

        self.conv_trans_2d2 = nn.ConvTranspose2d(dim_output_img*8, dim_output_img*4, 4, 2,1, bias=False)
        self.batchnorm2 = nn.BatchNorm2d(dim_output_img*4)
        self.relu2 = nn.ReLU(inplace=inplace)

        self.conv_trans_2d3 = nn.ConvTranspose2d(dim_output_img*4, dim_output_img*2,  4,2,1, bias=False)
        self.batchnorm3 = nn.BatchNorm2d(dim_output_img*2)
        self.relu3 = nn.ReLU(inplace=inplace)

        self.conv_trans_2d4 = nn.ConvTranspose2d(dim_output_img*2, dim_output_img, 4,2,1, bias=False)
        self.batchnorm4 = nn.BatchNorm2d(dim_output_img)
        self.relu4 = nn.ReLU(inplace=inplace)

        self.conv_trans_2d5 = nn.ConvTranspose2d(dim_output_img, dim_output_img, 4,2,1, bias=False)
        self.batchnorm5 = nn.BatchNorm2d(dim_output_img)
        self.relu5 = nn.ReLU(inplace=inplace)

        self.conv_trans_2d6 = nn.ConvTranspose2d(dim_output_img, n_channel, 4,2,1, bias=False)
        self.tanh = nn.Tanh()

    def forward(self, x):
        x = self.relu1(self.batchnorm1(self.conv_trans_2d1(x)))
        x = self.relu2(self.batchnorm2(self.conv_trans_2d2(x)))
        x = self.relu3(self.batchnorm3(self.conv_trans_2d3(x)))
        x = self.relu4(self.batchnorm4(self.conv_trans_2d4(x)))
        x = self.relu5(self.batchnorm5(self.conv_trans_2d5(x)))
        x = self.tanh(self.conv_trans_2d6(x))
        return x

In [0]:
class discriminator(nn.Module):

    def __init__(self, dim_input_img=64, n_channel = 3):
        super(discriminator, self).__init__()

        slope = 0.2
        inplace = True
        final_ker_size = int(dim_input_img / (2**4))
        
        self.conv1 = nn.Conv2d(n_channel, dim_input_img, 4, 2,1, bias=False)
        self.batchnorm1 = nn.BatchNorm2d(dim_input_img)
        self.lrelu1 = nn.LeakyReLU(negative_slope=slope, inplace=inplace)

        self.conv2 = nn.Conv2d(dim_input_img, dim_input_img * 2, 4, 2,1, bias=False)
        self.batchnorm2 = nn.BatchNorm2d(dim_input_img * 2)
        self.lrelu2 = nn.LeakyReLU(negative_slope=slope, inplace=inplace)

        self.conv3 = nn.Conv2d(dim_input_img * 2, dim_input_img * 4, 4, 2,1, bias=False)
        self.batchnorm3 = nn.BatchNorm2d(dim_input_img * 4)
        self.lrelu3 = nn.LeakyReLU(negative_slope=slope, inplace=inplace)

        # self.conv4 = nn.Conv2d(dim_input_img * 4, dim_input_img * 4, 4, 2,1, bias=False)
        # self.batchnorm4 = nn.BatchNorm2d(dim_input_img * 4)
        # self.lrelu4 = nn.LeakyReLU(negative_slope=slope, inplace=inplace)

        self.conv5 = nn.Conv2d(dim_input_img * 4, dim_input_img * 8, 4, 2,1, bias=False)
        self.batchnorm5 = nn.BatchNorm2d(dim_input_img * 8)
        self.lrelu5 = nn.LeakyReLU(negative_slope=slope, inplace=inplace)

        self.conv6 = nn.Conv2d(dim_input_img * 8, 1, final_ker_size, 1, 0, bias=False)
        self.sig = nn.Sigmoid()

    def forward(self, x):
        x = self.lrelu1(self.batchnorm1(self.conv1(x)))
        x = self.lrelu2(self.batchnorm2(self.conv2(x)))
        x = self.lrelu3(self.batchnorm3(self.conv3(x)))
        # x = self.lrelu4(self.batchnorm4(self.conv4(x)))
        x = self.lrelu5(self.batchnorm5(self.conv5(x)))
        x = self.sig(self.conv6(x))
        return x

In [0]:
def train_base(epochs, batch_size, dim_noise, device, dataset, generator, discriminator, loss, optimizer_gen, optimizer_dis, filepath=None):
    # load the data
    worker = 2
    dataloader = DataLoader(dataset, batch_size=batch_size, shuffle=True, num_workers=worker)
    
    # create the list to store each loss
    loss_list, score_list, img_list = [], [], []
    num_fixed_ns_img = 64
    fixed_noise = torch.randn(num_fixed_ns_img, dim_noise, 1, 1, device=device)

    # start iterating the epoch
    for e in range(epochs):
        loss_dis, loss_gen, score_dis_real, score_dis_fake, score_gen = 0, 0, 0, 0, 0

        for i, data in enumerate(dataloader):
            b_size = batch_size
            if len(data[0]) < batch_size:
                b_size = len(data[0])
            # ---------------------------
            # 1. Train the discriminator
            # ---------------------------
            # generate noise samples from the generator
            batch_noise = torch.randn(b_size, dim_noise, 1, 1, device=device)
            fake_data = generator(batch_noise)

            # start to train the discriminator
            discriminator.zero_grad()
            # calculate the loss of the noise samples, which assigns the same label 0
            # for all the samples, and get the single output(marks) from the discriminator
            output = discriminator(fake_data.detach()).view(-1) # use .detach() to stop the requirement of gradient
            label = torch.full((b_size,), 0, device=device)
            loss_d_ns = loss(output, label)
            loss_d_ns.backward()
            score_dis_fake = output.mean().item()
            
            # calculate the loss of the real samples and assigns label 1 to represent
            # all samples are true and get the single output(marks) from the discriminator
            read_data = data[0].to(device)
            output = discriminator(read_data).view(-1)
            label.fill_(1)
            loss_d_real = loss(output, label)
            loss_d_real.backward()
            score_dis_real = output.mean().item()

            loss_d = loss_d_ns + loss_d_real
            loss_dis = loss_d.item()
            optimizer_dis.step()

            # ---------------------------
            # 2. Train the generator
            # ---------------------------
            # Feed the noise samplea to the discriminator agian to geit the accurate scores
            # after training the discriminator, and assign label 1 not to see the noise as
            # real label but to let the loss function to be correct and do correct back propogation
            generator.zero_grad()            
            # batch_noise = util.torch.randn(b_size, dim_noise)
            # fake_data = generator(batch_noise)
            output = discriminator(fake_data).view(-1)
            loss_g = loss(output, label)
            loss_g.backward()
            score_gen = output.mean().item()
            loss_gen = loss_g.item()
            optimizer_gen.step()


            # print information to the console
            # print information 5 times in a epoch
            num2print = 20
            if (i + 1) % num2print == 0:
                print('epoch: %d, iter: %d, loss_D: %.4f, loss_G: %.4f;\t Scores: train D: D(x): %.4f, D(G(z)): %.4f train G: D(G(z))： %.4f'
                        % (e, (i + 1), loss_dis, loss_gen, score_dis_real, score_dis_fake, score_gen))           
                
                # store the final loss for D and G for a specific time interval of a whole epoch
                loss_list.append([loss_dis, loss_gen])
                # store the final score from D for noise and real samples for a specific time imterval on current epoch
                score_list.append([score_dis_fake, score_dis_real, score_gen])

        loss_list.append([loss_dis, loss_gen])
        score_list.append([score_dis_fake, score_dis_real, score_gen])
        # store the image that the generator create for each epoch
        test_img = generator(fixed_noise).detach().cpu()
        img_list.append(test_img.numpy())

        # save the model
        if (e+1) % 5 == 0:
            util.save_checkpoint(e+1, generator, discriminator, filepath)
    
    loss_list = list(map(list, zip(*loss_list)))
    score_list = list(map(list, zip(*score_list)))
        
    return generator, discriminator, loss_list, score_list, img_list

In [0]:
def build_gen_dis(config):
    net_gen = generator(config.DIM_NOISE, config.DIM_IMG).to(config.DEVICE)
    net_dis = discriminator(config.DIM_IMG).to(config.DEVICE)

    if config.INIT:
        net_gen.apply(init_weight)
        net_dis.apply(init_weight)
    else:
        ext = config.PATH_MODEL[-4]
        path_model = config.PATH_MODEL[:-4] + '_epoch_%d' + ext % config.IMPORT_IDX_EPOCH
        net_gen, net_dis = util.load_checkpoint(config.EPOCHS, net_gen, net_dis, path_model)

    return net_gen, net_dis

def train(dataset, net_gen, net_dis, config):
    # config = config.config_dcgan
    # net_gen = generator(config.DIM_NOISE, config.DIM_IMG).to(config.DEVICE)
    # net_dis = discriminator(config.DIM_IMG).to(config.DEVICE)
    
    loss = nn.BCELoss()
    optim_gen = optim.Adam(net_gen.parameters(), lr=config.LEARNING_RATE, betas=(config.MOMENTUM, 0.99))
    optim_dis = optim.Adam(net_dis.parameters(), lr=config.LEARNING_RATE, betas=(config.MOMENTUM, 0.99))

    net_gen, net_dis, losses, _, imgs = train_base(config.EPOCHS, config.BATCH_SIZE, config.DIM_NOISE, config.DEVICE,
                                                    dataset, net_gen, net_dis, loss, optim_gen, optim_dis, config.PATH_MODEL)
    
    return net_gen, net_dis, losses, imgs

In [0]:
def init_weight(layer):
    std = 0.02
    if type(layer) == nn.ConvTranspose2d:
        nn.init.normal_(layer.weight.data, mean=0, std=std)
    elif type(layer) == nn.Conv2d:
        nn.init.normal_(layer.weight.data, mean=0, std=std)
    elif type(layer) == nn.BatchNorm2d:
        nn.init.normal_(layer.weight.data, mean=1, std=std)
        nn.init.constant_(layer.bias.data, 0)

In [0]:
PATH_IMAGE = '/content/gdrive/My Drive/data/image'
PATH_TAG = '/content/gdrive/My Drive/data/tags'
ARTWORK_TYPE = os.listdir(PATH_IMAGE)
IS_ADD_I2V_TAG = False

class config_dcgan():
    PATH_MODEL = '/content/gdrive/My Drive/data/dcgan_128.pth'
    BATCH_SIZE = 128
    DIM_IMG = 128
    DIM_NOISE = 100
    LEARNING_RATE = 0.0002
    MOMENTUM = 0.5
    EPOCHS = 100
    INIT = True
    IMPORT_IDX_EPOCH = EPOCHS
    DEVICE = torch.device("cuda:0")

In [0]:
dataset = pokemonDataset(PATH_IMAGE, PATH_TAG, ARTWORK_TYPE, IS_ADD_I2V_TAG)

"""
mean, std = get_channel_mean_std(dataset, DIM_IMG)
print(mean)
print(std)
"""
mean = [220.43362509, 217.50907014, 212.78514176]
std = [71.7985852,  73.64374336, 78.23258064]

transform=transforms.Compose([ToDoubleTensor(), Normalize(mean, std)])
dataset.set_transform(transform)

CONFIG = config_dcgan
net_gen, net_dis = dc.build_gen_dis(CONFIG)
_, _, losses, imgs = dc.train(dataset, net_gen, net_dis, CONFIG)

In [0]:
import torchvision

def make_figure_grid(img, grid_size, vmin=None, vmax=None):
    assert type(img) == torch.Tensor or type(img) == np.ndarray
    if type(img) == np.ndarray:
        nc = np.argmin(img.shape[1:])
        if nc == 3:
            img = np.transpose(img, (0, nc, 1, 2))
        img = torch.from_numpy(img)
        print(img.size(0))
    assert img.size(0) <= grid_size ** 2
    grid_img = torchvision.utils.make_grid(img+1, nrow=grid_size).detach().cpu()
    grid_img_np = np.transpose(grid_img.numpy(), (1, 2, 0))
    return grid_img_np

In [0]:
plt.figure(figsize=(10, 5))
plt.plot(losses[0], label = 'generator')
plt.plot(losses[1], label = 'discriminator')
plt.title('Loss of training the gennerator and discriminator')
plt.xlabel('loss')
plt.ylabel('process')
plt.legend()
plt.show()

for img in imgs[90: 100]:
    grid_img = make_figure_grid(img, 8)
    plt.figure(figsize=(10,10))
    plt.imshow(grid_img)
plt.show()