In [2]:
from IPython.display import display, clear_output
import plotly.graph_objs as go
import matplotlib.pyplot as plt

import numpy as np
import pandas as pd
import time
import os

import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.utils.data import DataLoader, random_split
from torchvision import datasets, transforms

from sklearn.decomposition import PCA

# Testing GPU acceleration

To set the GPU acceleration properly up on Pytorch for a .ipynb file on Axon. <br>
You have to first use nvidia-smi to see an unused GPU,  <br>
then use nvidia-smi -L to see its MIG ID, and set it as an environmental variable.<br>


In [4]:
# Prompt for the CUDA_VISIBLE_DEVICES value
cuda_device = input("Enter the CUDA_VISIBLE_DEVICES value: ")
os.environ['CUDA_VISIBLE_DEVICES'] = cuda_device

In [5]:
# Check if CUDA is available
is_cuda_available = torch.cuda.is_available()
print("Is CUDA available:", is_cuda_available)

# Determine the device to use: GPU (CUDA), Apple Silicon (MPS), or CPU
DEVICE = (
    "cuda"
    if torch.cuda.is_available()
    else "mps"
    if torch.backends.mps.is_available()
    else "cpu"
)
print("Using device:", DEVICE)

Is CUDA available: True
Using device: cuda


In [6]:
if torch.cuda.is_available():
    # Test tensor operation on GPU
    test_tensor = torch.tensor([1.0, 2.0, 3.0], device="cuda")
    print("Test tensor on CUDA:", test_tensor)

Test tensor on CUDA: tensor([1., 2., 3.], device='cuda:0')


In [5]:
if is_cuda_available:
    try:
        test_tensor = torch.tensor([1, 2, 3], device=DEVICE)
        print("Successfully moved a tensor to the device:", test_tensor)
    except RuntimeError as e:
        print("Error moving a tensor to the device:", e)

Successfully moved a tensor to the device: tensor([1, 2, 3], device='cuda:0')


# Defining VAE classes

In [19]:
class VAE(nn.Module):
    def __init__(self, latent_dim=20, w_dim=10):
        super(VAE, self).__init__()
        self.latent_dim = latent_dim

        # Encoder layers
        # Input: [bs, 1, 28, 28]
        self.enc_conv1 = nn.Conv2d(1, 16, kernel_size=3, stride=2, padding=1)  # Output: [bs, 16, 14, 14]
        self.enc_conv2 = nn.Conv2d(16, 32, kernel_size=3, stride=2, padding=1) # Output: [bs, 32, 7, 7]
        self.enc_fc1 = nn.Linear(32 * 7 * 7, w_dim)  # Output: [bs, 128]
        # Two output layers for the latent space
        self.enc_fc2 = nn.Linear(w_dim, latent_dim)  # For mu, Output: [bs, latent_dim]
        self.enc_fc3 = nn.Linear(w_dim, latent_dim)  # For logvar, Output: [bs, latent_dim]

        # Decoder layers
        self.dec_fc1 = nn.Linear(latent_dim, w_dim)  # Output: [bs, 128]
        self.dec_fc2 = nn.Linear(w_dim, 32 * 7 * 7)  # Output: [bs, 1568]
        self.dec_conv1 = nn.ConvTranspose2d(32, 16, kernel_size=3, stride=2, padding=1, output_padding=1)  # Output: [bs, 16, 14, 14]
        self.dec_conv2 = nn.ConvTranspose2d(16, 1, kernel_size=3, stride=2, padding=1, output_padding=1)  # Output: [bs, 1, 28, 28]

    def encode(self, x):
        h = F.relu(self.enc_conv1(x))
        h = F.relu(self.enc_conv2(h))
        h = torch.flatten(h, start_dim=1)
        h = F.relu(self.enc_fc1(h))
        return self.enc_fc2(h), self.enc_fc3(h)

    def reparameterize(self, mu, logvar):
        std = torch.exp(0.5 * logvar)
        # Returns a tensor with the same size as input that is filled with random numbers from a normal distribution with mean 0 and variance 1.
        eps = torch.randn_like(std)
        return mu + eps * std

    def decode(self, z):
        h = F.relu(self.dec_fc1(z)) 
        h = F.relu(self.dec_fc2(h)).view(-1, 32, 7, 7) # .view reshapes [bs, 1568] to [bs, 32, 7, 7]
        h = F.relu(self.dec_conv1(h))
        return torch.sigmoid(self.dec_conv2(h))

    def forward(self, x):
        # Encoder layers
        # Input: [bs, 1, 28, 28]
        enc_conv1_out = F.relu(self.enc_conv1(x))  # Output: [bs, 16, 14, 14]
        enc_conv2_out = F.relu(self.enc_conv2(enc_conv1_out))  # Output: [bs, 32, 7, 7]
        flattened = torch.flatten(enc_conv2_out, start_dim=1)  # Output: [bs, 1568]
        
        # w = F.relu(self.enc_fc1(flattened))  # Output: [bs, 128] # Eqn (5) in LAKE paper
        w = self.enc_fc1(flattened)  # Not sure if relu is used in the paper or not. Output: [bs, 128] # Eqn (5) in LAKE paper
        
        mu, logvar = self.enc_fc2(w), self.enc_fc3(w)  # Output: [bs, latent_dim], [bs, latent_dim] # Eqn (6) in LAKE paper

        # Reparameterization and Decoding layers
        z = self.reparameterize(mu, logvar)  # Output: [bs, latent_dim] # Eqn (7) in LAKE paper
        dec_fc1_out = F.relu(self.dec_fc1(z))  # Output: [bs, 128]
        dec_fc2_out = F.relu(self.dec_fc2(dec_fc1_out)).view(-1, 32, 7, 7)  # Output: [bs, 1568], then reshaped to [bs, 32, 7, 7]
        dec_conv1_out = F.relu(self.dec_conv1(dec_fc2_out))  # Output: [bs, 16, 14, 14]
        recon_x = torch.sigmoid(self.dec_conv2(dec_conv1_out))  # Output: [bs, 1, 28, 28]

        return recon_x, z, mu, logvar, enc_conv1_out, enc_conv2_out, w, dec_fc1_out, dec_fc2_out, dec_conv1_out


In [20]:
def loss_function(recon_x, z, mu, logvar, x, layer_loss_weight=0, intermediate_layers=None):
    BCE = F.binary_cross_entropy(recon_x, x, reduction='sum')
    KLD = -0.5 * torch.sum(1 + logvar - mu.pow(2) - logvar.exp())

    layer_loss = 0
    if layer_loss_weight > 0 and intermediate_layers is not None:
        enc_conv1_out, enc_conv2_out, w, dec_fc1_out, dec_fc2_out, dec_conv1_out = intermediate_layers
        layer_loss = F.mse_loss(enc_conv1_out, dec_conv1_out) + F.mse_loss(enc_conv2_out, dec_fc2_out) + F.mse_loss(w, dec_fc1_out)

    return BCE + KLD + layer_loss_weight * layer_loss

Loading Datasets

In [13]:
transform = transforms.Compose([transforms.ToTensor()])

# Download the MNIST dataset
mnist_trainset = datasets.MNIST(root='~/.pytorch/MNIST_data/', train=True, download=True, transform=transform)

# Splitting the dataset into train and validation sets
train_size = int(0.8 * len(mnist_trainset))
validation_size = len(mnist_trainset) - train_size
train_dataset, validation_dataset = random_split(mnist_trainset, [train_size, validation_size])

# Download and load the test data
test_dataset = datasets.MNIST(root='~/.pytorch/MNIST_data/', train=False, download=True, transform=transform)

In [21]:
# Training parameters
batch_size = 64
learning_rate = 1e-3
epochs = 100

trainloader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True, drop_last=False)
validationloader = DataLoader(validation_dataset, batch_size=batch_size, shuffle=True, drop_last=False)
testloader = DataLoader(test_dataset, batch_size=batch_size, shuffle=True, drop_last=False)

Standard_VAE = VAE().to(DEVICE)
Lake_VAE = VAE().to(DEVICE)

In [24]:
!pip install wandb

Collecting wandb
  Downloading wandb-0.16.0-py3-none-any.whl.metadata (9.8 kB)
Collecting GitPython!=3.1.29,>=1.0.0 (from wandb)
  Downloading GitPython-3.1.40-py3-none-any.whl.metadata (12 kB)
Collecting sentry-sdk>=1.0.0 (from wandb)
  Downloading sentry_sdk-1.35.0-py2.py3-none-any.whl.metadata (9.7 kB)
Collecting docker-pycreds>=0.4.0 (from wandb)
  Downloading docker_pycreds-0.4.0-py2.py3-none-any.whl (9.0 kB)
Collecting setproctitle (from wandb)
  Downloading setproctitle-1.3.3-cp39-cp39-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (9.9 kB)
Collecting appdirs>=1.4.3 (from wandb)
  Downloading appdirs-1.4.4-py2.py3-none-any.whl (9.6 kB)
Collecting protobuf!=4.21.0,<5,>=3.15.0 (from wandb)
  Downloading protobuf-4.25.1-cp37-abi3-manylinux2014_x86_64.whl.metadata (541 bytes)
Collecting gitdb<5,>=4.0.1 (from GitPython!=3.1.29,>=1.0.0->wandb)
  Downloading gitdb-4.0.11-py3-none-any.whl.metadata (1.2 kB)
Collecting smmap<6,>=3.0.1 (from 

In [26]:
import wandb
wandb.login()



Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.
[34m[1mwandb[0m: Logging into wandb.ai. (Learn how to deploy a W&B server locally: https://wandb.me/wandb-server)
[34m[1mwandb[0m: You can find your API key in your browser here: https://wandb.ai/authorize
[34m[1mwandb[0m: Paste an API key from your profile and hit enter, or press ctrl+c to quit:[34m[1mwandb[0m: Appending key for api.wandb.ai to your netrc file: /home/msxaj10/.netrc


True

In [27]:
import wandb
import torch
from torch.utils.data import DataLoader
from torchvision.utils import make_grid
import matplotlib.pyplot as plt

# Function to visualize images
def show_images(images, nmax=64):
    fig, ax = plt.subplots(figsize=(8, 8))
    ax.set_xticks([]); ax.set_yticks([])
    ax.imshow(make_grid(images.detach()[:nmax], nrow=8).permute(1, 2, 0))

def visualize_reconstruction(model, device, data_loader):
    model.eval()
    with torch.no_grad():
        for i, (x, _) in enumerate(data_loader):
            x = x.to(device)
            recon_x, _, _, _ = model(x)
            if i == 0:  # Only visualize for the first batch
                return recon_x

# Initialize a W&B run
wandb.init(project='vae_mnist', entity='exiomius', config={
    'learning_rate': 0.001,
    'epochs': 50,
    'batch_size': 64,
    'latent_dim': 20,
    'layer_loss_weight': 0  # Set to >0 for Lake VAE
})

# Data loaders
train_loader = DataLoader(train_dataset, batch_size=wandb.config.batch_size, shuffle=True)
val_loader = DataLoader(validation_dataset, batch_size=wandb.config.batch_size)

# Initialize the VAE model
model = VAE(latent_dim=wandb.config.latent_dim)
model.to(DEVICE)

# Optimizer
optimizer = torch.optim.Adam(model.parameters(), lr=wandb.config.learning_rate)

# Training loop
for epoch in range(wandb.config.epochs):
    model.train()
    train_loss, train_bce, train_kld, train_layer_loss = 0, 0, 0, 0
    for x, _ in train_loader:
        x = x.to(DEVICE)
        optimizer.zero_grad()

        # Forward pass
        recon_x, z, mu, logvar, *intermediate_layers = model(x)

        # Compute loss using the loss function
        loss = loss_function(recon_x, z, mu, logvar, x, wandb.config.layer_loss_weight, intermediate_layers)

        # Extract individual loss components
        bce, kld, layer_loss = loss.bce, loss.kld, loss.layer_loss

        # Backward pass and optimize
        loss.backward()
        optimizer.step()

        train_loss += loss.item()
        train_bce += bce.item()
        train_kld += kld.item()
        train_layer_loss += layer_loss

    # Validation
    model.eval()
    val_loss, val_bce, val_kld, val_layer_loss = 0, 0, 0, 0
    with torch.no_grad():
        for x, _ in val_loader:
            x = x.to(DEVICE)
            recon_x, z, mu, logvar, *intermediate_layers = model(x)
            loss = loss_function(recon_x, z, mu, logvar, x, wandb.config.layer_loss_weight, intermediate_layers)
            bce, kld, layer_loss = loss.bce, loss.kld, loss.layer_loss
            val_loss += loss.item()
            val_bce += bce.item()
            val_kld += kld.item()
            val_layer_loss += layer_loss

    # Visualize reconstructed images
    recon_images = visualize_reconstruction(model, DEVICE, val_loader)
    wandb.log({'recon_images': [wandb.Image(recon_images.cpu(), caption='Reconstructed Images')]})

    # Log metrics
    wandb.log({
        'train_loss': train_loss / len(train_loader.dataset),
        'train_bce': train_bce / len(train_loader.dataset),
        'train_kld': train_kld / len(train_loader.dataset),
        'train_layer_loss': train_layer_loss / len(train_loader.dataset),
        'val_loss': val_loss / len(val_loader.dataset),
        'val_bce': val_bce / len(val_loader.dataset),
        'val_kld': val_kld / len(val_loader.dataset),
        'val_layer_loss': val_layer_loss / len(val_loader.dataset),
        'epoch': epoch
    })

    # Optional: Save model checkpoints
    # torch.save(model.state_dict(), 'model_checkpoint.pth')

# Close the W&B run
wandb.finish()


[34m[1mwandb[0m: Currently logged in as: [33mexiomius[0m. Use [1m`wandb login --relogin`[0m to force relogin


AttributeError: 'Tensor' object has no attribute 'bce'