# <p style='color:green; font-size:30px'>Generowanie obrazów w stylu Claude'a Monet za pomocą DCGAN</p>
## Aleksandra Buchowicz, Filip Pazio, Tomasz Markowicz
### <p style='color:green'>Wydział Matematyki i Nauk Informacyjnych, Politechnika Warszawska</p> <br> Warsztaty z Technik Uczenia Maszynowego</p>

# <p style='color:green; font-size:30px'>Wstęp</p>

* GAN - metoda opracowana przez Iana Goodfellow zestawiająca dwie sieci neuronowe - generatora (artystę) i dyskryminatora (krytyka)
* Generator tworzy własne obrazy, dyskryminator ocenia, czy są ze zbioru danych, czy spoza niego. Pierwszy dąży do 'oszukania' drugiego, zaś drugi stara się być coraz bardziej szczegółowy.
* DCGAN (Deep Convolutional Generative Adversarial Networks) jest pewną klasą konwolucyjnych sieci neuronowych (CNN), które potrafią stopniowo filtrować różne części danych uczących i wyostrzać ważne cechy w procesie dyskryminacji wykorzystanym do rozpoznawania lub klasyfikacji wzorców.

# <p style='color:green; font-size:30px'>Importowanie modułów</p>

In [1]:
import os

import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import Dataset, DataLoader
from torchvision import transforms
from PIL import Image

import numpy as np
import pickle as pkl
import matplotlib.pyplot as plt

# <p style='color:green; font-size:30px'>Dane - wczytanie i konwersja obrazów do wektorów</p>


In [5]:
class Dataset(Dataset):
    
    def __init__(self, img_dir):
        path_list = os.listdir(img_dir) #images names
        abspath = os.path.abspath(img_dir) #absolute path of images

        self.img_list = [os.path.join(abspath, path) for path in path_list] #full path

        #set of transformations
        self.transform = transforms.Compose([
            transforms.Resize(64),
            transforms.CenterCrop(64),
            transforms.ToTensor(),
            transforms.Normalize([0.5, 0.5, 0.5], [0.5, 0.5, 0.5]), #normalize [-1, 1]
        ])


    def __len__(self):
        return len(self.img_list)


    def __getitem__(self, index):
        path = self.img_list[index]
        img = Image.open(path).convert('RGB') #image size (256, 256)
        return self.transform(img)
        #return img

In [6]:
db = Dataset('monet_jpg')

In [7]:
db[0].shape

torch.Size([3, 64, 64])

##Generator

In [8]:
class Generator(nn.Module):
    
    def __init__(self, noise_size, img_dim=64): #img_dim=64
        super(Generator, self).__init__()

        self.main = nn.Sequential(
            nn.ConvTranspose2d(in_channels=noise_size, out_channels=img_dim * 8, kernel_size=4, stride=1, padding=0, bias=False), 
            nn.BatchNorm2d(img_dim * 8),
            nn.ReLU(True),
            
            nn.ConvTranspose2d(in_channels=img_dim * 8, out_channels=img_dim * 4, kernel_size=4, stride=2, padding=1, bias=False),
            nn.BatchNorm2d(img_dim * 4),
            nn.ReLU(True),
           
            nn.ConvTranspose2d(in_channels=img_dim * 4, out_channels=img_dim * 2, kernel_size=4, stride=2, padding=1, bias=False),
            nn.BatchNorm2d(img_dim * 2),
            nn.ReLU(True),
           
            nn.ConvTranspose2d(in_channels=img_dim * 2, out_channels=img_dim, kernel_size=4, stride=2, padding=1, bias=False),
            nn.BatchNorm2d(img_dim),
            nn.ReLU(True),
          
            nn.ConvTranspose2d(in_channels=img_dim, out_channels=3, kernel_size=4, stride=2, padding=1, bias=False), #out_channels RGB
            nn.Tanh()
        )


    def forward(self, x):
        x = x.view(x.size(0), x.size(1), 1, 1) 
        return self.main(x)

In [9]:
Generator(100, 64)

Generator(
  (main): Sequential(
    (0): ConvTranspose2d(100, 512, kernel_size=(4, 4), stride=(1, 1), bias=False)
    (1): BatchNorm2d(512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (2): ReLU(inplace=True)
    (3): ConvTranspose2d(512, 256, kernel_size=(4, 4), stride=(2, 2), padding=(1, 1), bias=False)
    (4): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (5): ReLU(inplace=True)
    (6): ConvTranspose2d(256, 128, kernel_size=(4, 4), stride=(2, 2), padding=(1, 1), bias=False)
    (7): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (8): ReLU(inplace=True)
    (9): ConvTranspose2d(128, 64, kernel_size=(4, 4), stride=(2, 2), padding=(1, 1), bias=False)
    (10): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (11): ReLU(inplace=True)
    (12): ConvTranspose2d(64, 3, kernel_size=(4, 4), stride=(2, 2), padding=(1, 1), bias=False)
    (13): Tanh()
  )
)

In [10]:
def noise(size):
        z = np.random.uniform(-1, 1, size=size)
        return torch.from_numpy(z).float()

In [11]:
noise(100)

tensor([-0.9548, -0.1203,  0.8817,  0.0111, -0.4274, -0.1027, -0.2647, -0.1747,
        -0.4267,  0.6905, -0.3098,  0.4478, -0.4889, -0.3943, -0.8687, -0.6387,
         0.4656, -0.2567,  0.7280,  0.0462, -0.1148,  0.2342,  0.7135, -0.5119,
        -0.1624, -0.1584,  0.9888,  0.7679,  0.5988,  0.1187, -0.4560,  0.8928,
         0.7669, -0.3044, -0.8040, -0.8643, -0.2444,  0.5506, -0.0185,  0.7331,
         0.8735,  0.4659, -0.1745,  0.4278, -0.4738,  0.7248, -0.4953,  0.8143,
         0.4053, -0.9838,  0.9182, -0.3319, -0.6388, -0.4032,  0.1812,  0.4950,
        -0.3103, -0.9802,  0.7568, -0.0794, -0.2338, -0.6118,  0.4085, -0.3002,
        -0.5407,  0.2545, -0.4874, -0.2398, -0.6231,  0.9339, -0.5545,  0.7395,
         0.4936,  0.5044,  0.4663, -0.2130, -0.0225,  0.4537,  0.8109, -0.6881,
         0.7284, -0.5717, -0.1670,  0.9628,  0.6495,  0.6959, -0.9708,  0.9342,
        -0.0550, -0.9821,  0.3412,  0.3833,  0.6039, -0.3914, -0.8125,  0.6423,
        -0.4476, -0.6163, -0.2627,  0.73

##Dyskryminator

In [12]:
class Discriminator(nn.Module):

    def __init__(self, img_dim=64):
        super(Discriminator, self).__init__()

        self.main = nn.Sequential(
            nn.Conv2d(3, img_dim, 4, 2, 1, bias=False),
            nn.LeakyReLU(0.2, inplace=True),
   
            nn.Conv2d(img_dim, img_dim * 2, 4, 2, 1, bias=False),
            nn.BatchNorm2d(img_dim * 2),
            nn.LeakyReLU(0.2, inplace=True),
            
            nn.Conv2d(img_dim * 2, img_dim * 4, 4, 2, 1, bias=False),
            nn.BatchNorm2d(img_dim * 4),
            nn.LeakyReLU(0.2, inplace=True),
            
            nn.Conv2d(img_dim * 4, img_dim * 8, 4, 2, 1, bias=False),
            nn.BatchNorm2d(img_dim * 8),
            nn.LeakyReLU(0.2, inplace=True),
          
            nn.Conv2d(img_dim * 8, 1, 4, 1, 0, bias=False),
            nn.Sigmoid()
        )


    def forward(self, x):
        return self.main(x)

##DCGAN 

In [13]:
class DCGAN:

    def __init__(self, noise_size, img_dim):
        self.noise_size = noise_size

        self.D = Discriminator(img_dim)
        self.G = Generator(noise_size, img_dim)

        self.device = torch.device("cuda") if torch.cuda.is_available() else torch.device("cpu")
    
        self.D.to(self.device)
        self.G.to(self.device)

        self.D.apply(self.weights_init)
        self.G.apply(self.weights_init)

        
    def weights_init(self, m):
        classname = m.__class__.__name__
        if classname.find('Conv') != -1:
            nn.init.normal_(m.weight.data, 0.0, 0.02)
        elif classname.find('BatchNorm') != -1:
            nn.init.normal_(m.weight.data, 1.0, 0.02)
            nn.init.constant_(m.bias.data, 0)

            
    def describe(self):
        print('Discriminator')
        print(self.D)

        print('\nGenerator')
        print(self.G)

    
    def __calculate_loss(self, output, labels):
        criterion = nn.BCELoss()
        return criterion(output.squeeze(), labels)


    def real_loss(self, D_out):
        batch_size = D_out.size(0)
        labels = torch.ones(batch_size).to(self.device)*0.8

        return self.__calculate_loss(D_out, labels) 


    def fake_loss(self, D_out):
        batch_size = D_out.size(0)
        labels = torch.ones(batch_size).to(self.device)*0.1
 
        return self.__calculate_loss(D_out, labels)


    def noise(self, size):
        z = np.random.uniform(-1, 1, size=size)
        return torch.from_numpy(z).float().to(self.device)


    def train_generator(self, g_optim, size):
        g_optim.zero_grad()

        z = self.noise(size)
        fake_images = self.G(z)
        
        d_fake = self.D(fake_images)

        g_loss = self.real_loss(d_fake)

        g_loss.backward()
        g_optim.step()

        return g_loss.item()

    
    def train_discriminator(self, d_optim, real_images, size):
        d_optim.zero_grad()

        d_real = self.D(real_images.to(self.device)).view(-1)
        d_real_loss = self.real_loss(d_real)

        z = self.noise(size)
        fake_images = self.G(z)
      
        d_fake = self.D(fake_images)
        d_fake_loss = self.fake_loss(d_fake)

        d_loss = d_real_loss + d_fake_loss

        d_loss.backward()
        d_optim.step()

        return d_loss.item()


    def train(self, num_epochs, d_optim, g_optim, data_loader, z_size, sample_size, print_every=500):
        samples, losses = [], []

        z = self.noise((sample_size, z_size))

        self.D.train()
        self.G.train()

        print(f'Running on {self.device}')
        for epoch in range(num_epochs):
            for i, real_images in enumerate(data_loader):                    
                batch_size = real_images.size(0)

                d_loss = self.train_discriminator(d_optim, real_images, (sample_size, z_size))
                g_loss = self.train_generator(g_optim, (sample_size, z_size))

                if i % print_every == 0:
                    print('Epoch [{:5d}/{:5d}] | d_loss {:6.4f} | g_loss {:6.4f}'.format(
                        epoch+1,
                        num_epochs,
                        d_loss,
                        g_loss
                    ))

            losses.append( (d_loss, g_loss) )

            self.G.eval()
            samples.append( self.G(z) )
            self.G.train()

        with open('DCGAN_Sample_Output.pkl', 'wb') as f:
            pkl.dump(samples, f)

        return samples, losses

##TRENING

In [None]:
# Dataset
monet_dataset = Dataset('monet_jpg') #data_dir
data_loader = DataLoader(monet_dataset, batch_size=16, shuffle=True)

noise_size = 128
img_size = 64

# Model
dcgan_model = DCGAN(noise_size, img_size)

# Optimizer
lr = 0.0002
beta1=0.5
beta2=0.999

d_optimizer = optim.Adam(dcgan_model.D.parameters(), lr, [beta1, beta2])
g_optimizer = optim.Adam(dcgan_model.G.parameters(), lr, [beta1, beta2])

# train
EPOCHS = 50
sample_size = 16 #nie wiem co to
sample_result, losses_history = dcgan_model.train(EPOCHS, d_optimizer, g_optimizer, data_loader, noise_size, 16, print_every=1000)

Running on cpu
Epoch [    1/   50] | d_loss 1.7593 | g_loss 3.7251
Epoch [    2/   50] | d_loss 1.0935 | g_loss 5.0222
Epoch [    3/   50] | d_loss 1.4951 | g_loss 8.3511
Epoch [    4/   50] | d_loss 1.9128 | g_loss 12.7523
Epoch [    5/   50] | d_loss 1.1358 | g_loss 7.0221
Epoch [    6/   50] | d_loss 1.6708 | g_loss 4.7288
Epoch [    7/   50] | d_loss 1.7643 | g_loss 8.1294
Epoch [    8/   50] | d_loss 0.8737 | g_loss 2.2486
Epoch [    9/   50] | d_loss 0.9156 | g_loss 2.8011
Epoch [   10/   50] | d_loss 0.8941 | g_loss 1.9960
Epoch [   11/   50] | d_loss 1.1292 | g_loss 1.7881
Epoch [   12/   50] | d_loss 0.9936 | g_loss 1.7927
Epoch [   13/   50] | d_loss 1.0587 | g_loss 2.2001
Epoch [   14/   50] | d_loss 1.1102 | g_loss 2.2017
Epoch [   15/   50] | d_loss 1.0742 | g_loss 2.4338
Epoch [   16/   50] | d_loss 1.0034 | g_loss 2.1054
Epoch [   17/   50] | d_loss 1.0880 | g_loss 1.6877
Epoch [   18/   50] | d_loss 1.1160 | g_loss 1.9800
Epoch [   19/   50] | d_loss 0.8953 | g_loss 2.3

In [None]:
for i, real_images in enumerate(data_loader):                    
  print(real_images.size(0))

##Wystawa

In [None]:
fig, axes = plt.subplots(figsize=(15,10), nrows=2, ncols=4, sharey=True, sharex=True)
for ax, img in zip(axes.flatten(), sample_result[EPOCHS-1]):
    _, w, h = img.size()
     
    img = img.detach().cpu().numpy()

    img = np.transpose(img, (1, 2, 0))
    
    img = ((img +1)*255 / (2)).astype(np.uint8)

    ax.xaxis.set_visible(False)
    ax.yaxis.set_visible(False)
    
    im = ax.imshow(img.reshape((w,h,3)))

plt.show()

##tensorflow

In [None]:
import tensorflow as tf

In [None]:
import glob
import imageio
import matplotlib.pyplot as plt
import numpy as np
import os
import PIL
from tensorflow.keras import layers
import time

from IPython import display

In [None]:
def make_generator_model():
    model = tf.keras.Sequential()
    model.add(layers.Dense(7*7*256, use_bias=False, input_shape=(100,)))
    model.add(layers.BatchNormalization())
    model.add(layers.LeakyReLU())

    model.add(layers.Reshape((7, 7, 256)))
    assert model.output_shape == (None, 7, 7, 256)  # Note: None is the batch size

    model.add(layers.Conv2DTranspose(128, (5, 5), strides=(1, 1), padding='same', use_bias=False))
    assert model.output_shape == (None, 7, 7, 128)
    model.add(layers.BatchNormalization())
    model.add(layers.LeakyReLU())

    model.add(layers.Conv2DTranspose(64, (5, 5), strides=(2, 2), padding='same', use_bias=False))
    assert model.output_shape == (None, 14, 14, 64)
    model.add(layers.BatchNormalization())
    model.add(layers.LeakyReLU())

    model.add(layers.Conv2DTranspose(1, (5, 5), strides=(2, 2), padding='same', use_bias=False, activation='tanh'))
    assert model.output_shape == (None, 28, 28, 1)

    return model

In [None]:
generator = make_generator_model()

noise = tf.random.normal([1, 100])
generated_image = generator(noise, training=False)

plt.imshow(generated_image[0, :, :, 0], cmap='gray')