# **Отчет**
В ходе данной работы была обучена генератавно-состязательная сеть (*GAN*) состоящая из Генератора и Дискриминатора. Получен результат метрики **FID = 18.939**

Первоначально была выбрана архитектура из оригинальной статьи DCGAN, где количество карт признаков (feature maps) на слоях генератора уменьшалось от 128 до 64 с фактором 2 (для дискриминатора - наоборот). Данная реализация показывала плохие результаты уже на этапе визульного анализа.

Архитектура была переработана, количество карт признаков увеличено до 512 (512->256->128->64->3). (реализация ниже в коде) Были получены следующие результаты...

Использование LeakyReLU(0.2) в генераторе:

*   FID (50 эпох) - 64.925
*   FID (100 эпох) - 55.096
*   FID (200 эпох) - 28.702
*   FID (300 эпох) - 27.431

Использование ReLU в генераторе:

*   FID (50 эпох) - 41.219
*   FID (100 эпох) -29.052
*   FID (200 эпох) - 23.548
*   FID (300 эпох) - 28.231

Таким образом ReLU активации показывают лучшие результаты по сравнению с LeakyReLU, а обучение после 200 эпох отрицательно сказывается на результате.

Также было решено протестировать использование 1024 карт признаков (1024->512->256->128->64->3). Время обучения при этом увеличилось практически в два раза:

*   FID (50 эпох) - 29.109
*   FID (100 эпох) - 18.939
*   FID (200 эпох) - 24.474
*   FID (300 эпох) - 25.896

Вывод: количество карт признаков играет существенную роль при обучении генеративно-состязательных сетей. Несмотря на то, что зачастую с увеличением количества параметров у модели появляется склонность к переобучению, в случае с GAN это необходимо.
 
Также было отмечено, что примерно после 50 эпох изображения начинают выглядеть малоконтастными. Линейная коррекция контраста (функция contrasting_Tensor()) полностью решает эту проблему.

# **Colab stuff**

In [None]:
from google.colab import drive
drive.mount('/content/drive')

In [15]:
import zipfile
with zipfile.ZipFile('drive/MyDrive/cats/archive.zip', 'r') as zip_obj:
   # Extract all the contents of zip file
   zip_obj.extractall('dataset')

# **Imports**

In [43]:
import os

In [44]:
import numpy as np 
import pandas as pd
from skimage.io import imsave
from skimage.util import img_as_ubyte

In [45]:
from torch.utils.data import DataLoader
from torchvision.datasets import ImageFolder
from torchvision import transforms
import torch
from torch import device
import torch.nn as nn
from tqdm.notebook import tqdm
import torch.nn.functional as F
from torchvision.utils import save_image
from torchvision.utils import make_grid
import matplotlib.pyplot as plt
%matplotlib inline

# **Initialization**

In [46]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

In [49]:
n_epochs = 100 # number of epochs of training
batch_size = 128 # size of the batches
lr = 0.0002 # adam: learning rate
b1 = 0.5 # adam: decay of first order momentum of gradient
b2 = 0.999 # adam: decay of first order momentum of gradient
n_cpu = 8 # number of cpu threads to use during batch generation
latent_size = 128 # dimensionality of the latent space
img_size = 64 # size of each image dimension
channels = 3 # number of image channels

dataset_path = "dataset" # path of the directory with folder containing images
model_path = "drive/MyDrive/gan_models/" # path of the models to save
sample_path = "drive/MyDrive/images" # path of the partial results to save

# **Help functions**

In [50]:
# image normalization from [-1,1] to [0,1]
def denorm(img_tensors):
    """Image normalization

    Parameters
    ----------
    img_tensors: Tensor
        Tensor of images

    Returns
    -------
    img_tensors: Tensor
        normalized images
    """
    
    return img_tensors * 0.5 + 0.5

In [51]:
# Contrast correction (linear)
def contrasting(image):
    """Linear contrast correction

        Parameters
        ----------
        image: Tensor
            batch of images to contrast

        Returns
        -------
        image: Tensor
            corrected image
    """
    image *= 255

    R = image[:, :, :, 0]
    G = image[:, :, :, 1]
    B = image[:, :, :, 2]

    Y = 0.2126*R + 0.7152*G + 0.0722*B
    U = -0.0999*R - 0.3360*G + 0.4360*B
    V = 0.6150*R - 0.5586*G - 0.0563*B

    new_y = Y.view(Y.shape[0],-1)

    x_min = torch.min(new_y, dim=1)[0]
    x_max = torch.max(new_y, dim=1)[0]

    out = (new_y-x_min[:,None])*255 / (x_max-x_min)[:,None]

    Y = out.view(Y.shape)

    R = (Y + 1.2803*V).unsqueeze(3)
    G = (Y - 0.2148*U - 0.3805*V).unsqueeze(3)
    B = (Y + 2.1279*U).unsqueeze(3)

    output = torch.cat((R, G, B),3)
    output = torch.clamp(output, 0, 255) / 255

    return output

# **Data preparation**

In [52]:
# Set DataLoader
train_transforms = transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize([0.5, 0.5, 0.5], [0.5, 0.5, 0.5]),
])

train_dataset = ImageFolder(dataset_path, train_transforms)

dataloader = torch.utils.data.DataLoader(train_dataset, batch_size=batch_size, shuffle=True, num_workers=n_cpu, pin_memory=True)

# **Models**

In [53]:
# Generator
generator = nn.Sequential(
    # in: latent_size x 1 x 1

    nn.ConvTranspose2d(latent_size, 1024, kernel_size=4, stride=1, padding=0, bias=False),
    nn.BatchNorm2d(1024),
    nn.ReLU(True),
    # out: 1024 x 4 x 4

    nn.ConvTranspose2d(1024, 512, kernel_size=4, stride=2, padding=1, bias=False),
    nn.BatchNorm2d(512),
    nn.ReLU(True),
    # out: 512 x 8 x 8

    nn.ConvTranspose2d(512, 256, kernel_size=4, stride=2, padding=1, bias=False),
    nn.BatchNorm2d(256),
    nn.ReLU(True),
    # out: 256 x 16 x 16

    nn.ConvTranspose2d(256, 128, kernel_size=4, stride=2, padding=1, bias=False),
    nn.BatchNorm2d(128),
    nn.ReLU(True),
    # out: 128 x 32 x 32

    nn.ConvTranspose2d(128, channels, kernel_size=4, stride=2, padding=1, bias=False),
    nn.Tanh())
    # out: 3 x 64 x 64

In [54]:
# Discriminator
discriminator = nn.Sequential(
    # in: 3 x 64 x 64

    nn.Conv2d(channels, 128, kernel_size=4, stride=2, padding=1, bias=False),
    nn.BatchNorm2d(128),
    nn.LeakyReLU(0.2, inplace=True),
    # out: 128 x 32 x 32

    nn.Conv2d(128, 256, kernel_size=4, stride=2, padding=1, bias=False),
    nn.BatchNorm2d(256),
    nn.LeakyReLU(0.2, inplace=True),
    # out: 256 x 16 x 16

    nn.Conv2d(256, 512, kernel_size=4, stride=2, padding=1, bias=False),
    nn.BatchNorm2d(512),
    nn.LeakyReLU(0.2, inplace=True),
    # out: 512 x 8 x 8

    nn.Conv2d(512, 1024, kernel_size=4, stride=2, padding=1, bias=False),
    nn.BatchNorm2d(1024),
    nn.LeakyReLU(0.2, inplace=True),
    # out: 1024 x 4 x 4

    nn.Conv2d(1024, 1, kernel_size=4, stride=1, padding=0, bias=False),
    # out: 1 x 1 x 1

    nn.Flatten(),
    nn.Sigmoid())

In [None]:
# Move models to the gpu if available else cpu
discriminator.to(device)
generator.to(device)

# **Optimizers**

In [56]:
# Create optimizers
optimizer_D = torch.optim.Adam(discriminator.parameters(), lr=lr, betas=(b1, b2))
optimizer_G = torch.optim.Adam(generator.parameters(), lr=lr, betas=(b1, b2))

# **Load models from checkpoint (optional)**

In [None]:
# load checkpoint dict
checkpoint_path = "checkpoint/path/model_777.tar"
checkpoint = torch.load(checkpoint_path)

# initialize models and optimizers with trained parameters
generator.load_state_dict(checkpoint['generator_state_dict'])
discriminator.load_state_dict(checkpoint['discriminator_state_dict'])
optimizer_G.load_state_dict(checkpoint['optimizer_G_state_dict'])
optimizer_D.load_state_dict(checkpoint['optimizer_D_state_dict'])

# Move models to the gpu if available else cpu
discriminator.to(device)
generator.to(device)

# **Train**

In [57]:
#To save the samples produced during epochs
def save_samples(sample_path, index, latent_tensors, show=True):
    """Image grid saving

        Parameters
        ----------
        sample_path: string
            path to the sampling output
        index: int
            index of saving
        latent_tensors: Tensor
            input latent tensors
        show: bool
            whether to show the result
    """
    
    fake_images = generator(latent_tensors).to(device)
    fake_fname = 'generated-images-{0:0=4d}.png'.format(index)
    save_image(denorm(fake_images), os.path.join(sample_path, fake_fname), nrow=8)
    print('Saving', fake_fname)
    if show:
        fig, ax = plt.subplots(figsize=(8, 8))
        ax.set_xticks([]); ax.set_yticks([])
        ax.imshow(make_grid(fake_images.cpu().detach(), nrow=8).permute(1, 2, 0))

In [58]:
def train_discriminator(real_images):
    """Discriminator training step

        Parameters
        ----------
        real_images: Tensor
            batch of real images

        Returns
        -------
        (loss, real_score, fake_score): Tuple
            discriminator loss, discriminator score on real images,
            discriminator score on fake images,
    """
    # Clear discriminator gradients
    optimizer_D.zero_grad()

    # Pass real images through discriminator
    real_preds = discriminator(real_images).to(device) #real images
    real_targets = torch.ones(real_images.size(0), 1).to(device) #setting targets as 1
    real_loss = F.binary_cross_entropy(real_preds, real_targets) #getting the loss
    real_score = torch.mean(real_preds).item()
    
    # Generate fake images
    latent = torch.randn(batch_size, latent_size, 1, 1).to(device) #generating the random noices for input image
    fake_images = generator(latent).to(device)  #getting the fake images

    # Pass fake images through discriminator
    fake_targets = torch.zeros(fake_images.size(0), 1).to(device) #setting 0 as target for fake images
    fake_preds = discriminator(fake_images).to(device)  #getting the predictions for fake images
    fake_loss = F.binary_cross_entropy(fake_preds, fake_targets)  #Comparing the two scores through loss
    fake_score = torch.mean(fake_preds).item()

    # Update discriminator weights
    loss = real_loss + fake_loss
    loss.backward()
    optimizer_D.step()
    return loss.item(), real_score, fake_score

In [59]:
def train_generator():
    """Generator training step

        Returns
        -------
        (loss, latent): Tuple
            generator loss, batch of latent tensors
    """
    # Clear generator gradients
    optimizer_G.zero_grad()
    
    # Generate fake images
    latent = torch.randn(batch_size, latent_size, 1,1).to(device) #random noice
    fake_images = generator(latent).to(device) #fake images generated
    
    # Try to fool the discriminator
    preds = discriminator(fake_images).to(device) #getting the predictions of discriminator for fake images
    targets = torch.ones(batch_size, 1).to(device) #setting 1 as targets so the discriminator can be fooled
    loss = F.binary_cross_entropy(preds, targets) #comparing
    
    # Update generator weights
    loss.backward()
    optimizer_G.step()
    
    return loss.item(),latent

In [60]:
def fit(epochs, lr, start_idx=1, model_path="models", sample_path="images"):
    """Fit loop

        Parameters
        ----------
        epochs: int
            number of epoches to train
        lr: float
            learning rate
        start_idx: int
            epoch start index
        model_path: string
            path of the models to save
        sample_path: string
            path to the sampling images output

        Returns
        -------
        (losses_g, losses_d, latent, fake_scores): Tuple
            generator losses, discriminator losses,
            discriminator scores on real images,
            discriminator scores on fake images
    """
    torch.cuda.empty_cache()
    
    os.makedirs(model_path, exist_ok=True)
    os.makedirs(sample_path, exist_ok=True)

    # Losses & scores
    losses_g = []
    losses_d = []
    real_scores = []
    fake_scores = []
    
    for epoch in range(epochs):
        for real_images, _ in tqdm(dataloader):
            
            # Train discriminator
            real_images= real_images.to(device)
            loss_d, real_score, fake_score = train_discriminator(real_images)
            
            # Train generator
            loss_g, latent = train_generator()
            
        # Record losses & scores
        losses_g.append(loss_g)
        losses_d.append(loss_d)
        real_scores.append(real_score)
        fake_scores.append(fake_score)
        
        # Log losses & scores (last batch)
        print("Epoch [{}/{}], loss_g: {}, loss_d: {}, real_score: {}, fake_score: {}".format(
            epoch+1, epochs, loss_g, loss_d, real_score, fake_score))
    
        # Save generated images
        save_samples(sample_path, epoch+start_idx, latent, show=False)

        torch.save({
            'epoch': epoch+start_idx,
            'generator_state_dict': generator.state_dict(),
            'discriminator_state_dict': discriminator.state_dict(),
            'optimizer_G_state_dict': optimizer_G.state_dict(),
            'optimizer_D_state_dict': optimizer_D.state_dict()
            }, os.path.join(model_path,f"model_{epoch+start_idx}.tar"))
    
    return losses_g, losses_d, real_scores, fake_scores

In [None]:
model = fit(epochs=n_epochs, lr=lr, model_path=model_path,sample_path = sample_path)

# **Image generation + FID**

In [39]:
def generate_dataset(output_path,num_of_images, batch_size):
    """Dataset generation

        Parameters
        ----------
        output_path: string
            path of the output
        num_of_images: int
            number of images to generate
        batch_size:
            generator input batch size of latent tensors

        Returns
        -------
        (loss, real_score, fake_score): Tuple
            discriminator loss, discriminator score on real images,
            discriminator score on fake images,
    """
    # check whether the output directory created
    os.makedirs(output_path, exist_ok=True)
    # number of saved image in its output name
    saved_num = 0
    # saving loop
    for batch_num in range(num_of_images//batch_size):
        # form random noise vector
        latent_vec = torch.randn(batch_size, latent_size, 1,1).to(device)
        # generate images from noise
        fake_images = generator(latent_vec).cpu().detach().permute(0,2,3,1)
        # image normalization from [-1,1] to [0,1]
        fake_images = denorm(fake_images)
        # image contrasting
        fake_images = contrasting(fake_images)
        # image conversion
        fake_images = img_as_ubyte(fake_images)
        # saving
        for image_num in range(batch_size):
            saved_num += 1
            imsave(os.path.join(output_path,f"{saved_num}.jpg"),fake_images[image_num])


In [41]:
# load checkpoint dict
checkpoint_path = "drive/MyDrive/model_100.tar"
checkpoint = torch.load(checkpoint_path, map_location=device)

# initialize model with trained parameters
generator.load_state_dict(checkpoint['generator_state_dict'])
generator.to(device)
generator.eval()

# generation
generate_dataset("generated_100",10000,10)

In [None]:
!pip install pytorch_fid

In [None]:
!python -m pytorch_fid generated_100 dataset/cats