In [10]:
import torch
import torch.nn as nn
import torch.nn.functional as F
from torch import optim
import os
import matplotlib.image as img
import tqdm
from vae import Basic_VAE

#### Create Dataset


In [7]:
cats = []
directory = "data/cats"
for catpic in os.listdir(directory):
    # read from image and convert to tensor
    im = torch.tensor(img.imread(os.path.join(directory, catpic))).float()
    # permute to (channels, height, width) for conv2d layer
    im = torch.permute(im, (2, 0, 1))
    # normalize to range between -1 and 1
    im = im / 128 - 1
    cats.append(im)
print(cats[0].shape)

torch.Size([3, 64, 64])


#### Choose Hyperparameters and Build Model


In [None]:
hidden_dims = [16, 32, 64, 128]
latent_dim = 64
model = Basic_VAE(64, hidden_dims, latent_dim)

#### Implement Gradient Descent


In [11]:
def gradient_descent(model, loss_func, x, y, xvalid, yvalid, lr=0.1, steps=5000):
    optimizer = optim.SGD(model.parameters(), lr)

    losses = []
    valid_losses = []
    for _ in tqdm.trange(steps):
        model.train()
        loss = loss_func(model(x), y)
        loss.backward()
        optimizer.step()
        optimizer.zero_grad()

        model.eval()
        valid_loss = loss_func(model(xvalid), yvalid)
        losses.append(loss.detach().numpy())
        valid_losses.append(valid_loss.detach().numpy())

    print(f"Final training loss: {losses[-1]}")

    return losses, valid_losses

#### Implement Loss Function


In [None]:
def loss_function(self, reconstructed_img, input_img, mu, log_var, kld_weight=2):
    img_loss = F.mse_loss(reconstructed_img, input_img)
    # article on calculating kl divergence between 2 gaussians:
    # https://medium.com/@outerrencedl/variational-autoencoder-and-a-bit-kl-divergence-with-pytorch-ce04fd55d0d7
    kld_loss = torch.mean(
        torch.sum(-log_var + (log_var.exp() ** 2 + mu**2) / 2 - 1 / 2)
    )
    kld_loss *= kld_weight

    return img_loss + kld_loss