In [1]:
import torch

print(f"Is CUDA supported by this system? {torch.cuda.is_available()}")
print(f"CUDA version: {torch.version.cuda}")

cuda_id = torch.cuda.current_device()
print(f"ID of current CUDA device: {torch.cuda.current_device()}")

print(f"Name of current CUDA device: {torch.cuda.get_device_name(cuda_id)}")


Is CUDA supported by this system? True
CUDA version: 11.1
ID of current CUDA device: 0
Name of current CUDA device: GeForce RTX 3060 Ti


In [2]:
import sys
import os
from tqdm import tqdm
import numpy as np
import torch
import torch.nn as nn
import torch.nn.functional as F
from torchvision import utils as vutils

In [3]:
from model.discriminator import Discriminator
from model.perceptualloss import LPIPS
from model.vqgan import VQGAN

In [4]:
import numpy as np
from PIL import Image
from torch.utils.data import Dataset, DataLoader
import matplotlib.pyplot as plt

In [1]:
import albumentations

In [5]:
# from utils.utils import load_data, weights_init

class ImagePaths(Dataset):
    def __init__(self, path, size=None):
        self.size = size

        self.images = [os.path.join(path, file) for file in os.listdir(path)]
        self._length = len(self.images)

        self.rescaler = albumentations.SmallestMaxSize(max_size=self.size)
        self.cropper = albumentations.CenterCrop(height=self.size, width=self.size)
        self.preprocessor = albumentations.Compose([self.rescaler, self.cropper])

    def __len__(self):
        return self._length

    # image normalization
    def preprocess_image(self, image_path):
        image = Image.open(image_path)
        if not image.mode == "RGB":
            image = image.convert("RGB")
        image = np.array(image).astype(np.uint8)
        image = self.preprocessor(image=image)["image"]
        image = (image / 127.5 - 1.0).astype(np.float32)
        image = image.transpose(2, 0, 1)
        return image

    def __getitem__(self, i):
        example = self.preprocess_image(self.images[i])
        return example


# load the data
def load_data(dataset_path):
    train_data = ImagePaths(dataset_path, size=256)
    train_loader = DataLoader(train_data, batch_size=2, shuffle=False)
    return train_loader


# Module Utils for Encoder, Decoder etc.
def weights_init(m):
    classname = m.__class__.__name__
    if classname.find('Conv') != -1:
        nn.init.normal_(m.weight.data, 0.0, 0.02)
    elif classname.find('BatchNorm') != -1:
        nn.init.normal_(m.weight.data, 1.0, 0.02)
        nn.init.constant_(m.bias.data, 0)


def plot_images(images):
    x = images["input"]
    reconstruction = images["rec"]
    half_sample = images["half_sample"]
    full_sample = images["full_sample"]

    fig, axarr = plt.subplots(1, 4)
    axarr[0].imshow(x.cpu().detach().numpy()[0].transpose(1, 2, 0))
    axarr[1].imshow(reconstruction.cpu().detach().numpy()[0].transpose(1, 2, 0))
    axarr[2].imshow(half_sample.cpu().detach().numpy()[0].transpose(1, 2, 0))
    axarr[3].imshow(full_sample.cpu().detach().numpy()[0].transpose(1, 2, 0))
    plt.show()

import os
import argparse
from tqdm import tqdm
import numpy as np
import torch
import torch.nn.functional as F
from torchvision import utils as vutils
from discriminator import Discriminator
from perceptualloss import LPIPS
from vqgan import VQGAN
from utils import load_data, weights_init

In [6]:
def configure_optimizers():
    lr = 2.25e-05
    
    opt_vq = torch.optim.Adam(
        list(vqgan.encoder.parameters()) +
        list(vqgan.decoder.parameters()) +
        list(vqgan.codebook.parameters()) +
        list(vqgan.quant_conv.parameters()) +
        list(vqgan.post_quant_conv.parameters()),
        lr=lr, eps=1e-08, betas=(0.5, 0.9)
    )
    
    opt_disc = torch.optim.Adam(discriminator.parameters(),
                                lr=lr, eps=1e-08, betas=(0.5, 0.9))

    return opt_vq, opt_disc

In [7]:
vqgan = VQGAN().to(device="cuda:0")
discriminator = Discriminator().to(device="cuda:0")
discriminator.apply(weights_init)
perceptual_loss = LPIPS().eval().to(device="cuda:0")
opt_vq, opt_disc = configure_optimizers()

In [8]:
torch.cuda.empty_cache()

In [9]:
train_dataset = load_data(dataset_path=r"C:\Users\P_CHOUH\Documents\GMProject\coco2017")
steps_per_epoch = len(train_dataset)
for epoch in range(10):
    with tqdm(range(len(train_dataset))) as pbar:
        for i, imgs in zip(pbar, train_dataset):
            imgs = imgs.to(device="cuda:0")
            decoded_images, _, q_loss = vqgan(imgs)

            disc_real = discriminator(imgs)
            disc_fake = discriminator(decoded_images)

            disc_factor = vqgan.adopt_weight(1., epoch * steps_per_epoch + i, threshold=10000)

            _perceptual_loss = perceptual_loss(imgs, decoded_images)
            rec_loss = torch.abs(imgs - decoded_images)
            perceptual_rec_loss = 1. * _perceptual_loss + 1. * rec_loss
            perceptual_rec_loss = perceptual_rec_loss.mean()
            g_loss = -torch.mean(disc_fake)

            λ = vqgan.calculate_lambda(perceptual_rec_loss, g_loss)
            vq_loss = perceptual_rec_loss + q_loss + disc_factor * λ * g_loss

            d_loss_real = torch.mean(F.relu(1. - disc_real))
            d_loss_fake = torch.mean(F.relu(1. + disc_fake))
            gan_loss = disc_factor * 0.5 * (d_loss_real + d_loss_fake)

            opt_vq.zero_grad()
            vq_loss.backward(retain_graph=True)

            opt_disc.zero_grad()
            gan_loss.backward()

            opt_vq.step()
            opt_disc.step()

            if i % 100 == 0:
                with torch.no_grad():
                    real_fake_images = torch.cat((imgs.add(1).mul(0.5)[:4], decoded_images.add(1).mul(0.5)[:4]))
                    vutils.save_image(real_fake_images, os.path.join(r"C:/Users/P_CHOUH/Documents/GMProject/results/", f"{epoch}_{i}.jpg"), nrow=4)

            pbar.set_postfix(
                VQ_Loss=np.round(vq_loss.cpu().detach().numpy().item(), 5),
                GAN_Loss=np.round(gan_loss.cpu().detach().numpy().item(), 3)
            )
            pbar.update(0)

            if (epoch % 2) == 0:
                torch.save(vqgan.state_dict(), os.path.join(r"C:/Users/P_CHOUH/Documents/GMProject/checkpoints/", f"vqgan_epoch_{epoch}.pt"))
                
torch.save(vqgan.state_dict(), os.path.join(r"C:/Users/P_CHOUH/Documents/GMProject/checkpoints", f"vqgan_final_.pt"))

100%|██████████| 4910/4910 [1:19:45<00:00,  1.03it/s, GAN_Loss=0, VQ_Loss=0.254] 
100%|██████████| 4910/4910 [35:38<00:00,  2.30it/s, GAN_Loss=0, VQ_Loss=0.147] 
100%|██████████| 4910/4910 [1:19:43<00:00,  1.03it/s, GAN_Loss=0, VQ_Loss=0.175]     
100%|██████████| 4910/4910 [35:37<00:00,  2.30it/s, GAN_Loss=0, VQ_Loss=0.153]     
100%|██████████| 4910/4910 [1:20:01<00:00,  1.02it/s, GAN_Loss=0, VQ_Loss=0.162]     
100%|██████████| 4910/4910 [35:26<00:00,  2.31it/s, GAN_Loss=0.004, VQ_Loss=0.154] 
100%|██████████| 4910/4910 [1:18:05<00:00,  1.05it/s, GAN_Loss=0, VQ_Loss=0.173]     
100%|██████████| 4910/4910 [35:26<00:00,  2.31it/s, GAN_Loss=0, VQ_Loss=0.132]     
100%|██████████| 4910/4910 [1:20:02<00:00,  1.02it/s, GAN_Loss=0.001, VQ_Loss=0.118] 
100%|██████████| 4910/4910 [35:32<00:00,  2.30it/s, GAN_Loss=0, VQ_Loss=0.143]     
