In [10]:
import torch
import torch.nn as nn
import torch.nn.functional as F
import numpy as np
from tqdm import tqdm
from torch.utils.data import DataLoader, TensorDataset

In [15]:
import torch
import torch.nn as nn
# from torchsummary import summary

"""
Implementation based on original paper NeurIPS 2016
https://papers.nips.cc/paper/6096-learning-a-probabilistic-latent-space-of-object-shapes-via-3d-generative-adversarial-modeling.pdf
"""


class Discriminator(torch.nn.Module):
    def __init__(self, in_channels=1, dim=64, out_conv_channels=512):
        super(Discriminator, self).__init__()
        conv1_channels = int(out_conv_channels / 8)
        conv2_channels = int(out_conv_channels / 4)
        conv3_channels = int(out_conv_channels / 2)
        self.out_conv_channels = out_conv_channels
        self.out_dim = int(dim / 16)

        self.conv1 = nn.Sequential(
            nn.Conv3d(
                in_channels=in_channels, out_channels=conv1_channels, kernel_size=4,
                stride=2, padding=1, bias=False
            ),
            nn.BatchNorm3d(conv1_channels),
            nn.LeakyReLU(0.2, inplace=True)
        )
        self.conv2 = nn.Sequential(
            nn.Conv3d(
                in_channels=conv1_channels, out_channels=conv2_channels, kernel_size=4,
                stride=2, padding=1, bias=False
            ),
            nn.BatchNorm3d(conv2_channels),
            nn.LeakyReLU(0.2, inplace=True)
        )
        self.conv3 = nn.Sequential(
            nn.Conv3d(
                in_channels=conv2_channels, out_channels=conv3_channels, kernel_size=4,
                stride=2, padding=1, bias=False
            ),
            nn.BatchNorm3d(conv3_channels),
            nn.LeakyReLU(0.2, inplace=True)
        )
        self.conv4 = nn.Sequential(
            nn.Conv3d(
                in_channels=conv3_channels, out_channels=out_conv_channels, kernel_size=4,
                stride=2, padding=1, bias=False
            ),
            nn.BatchNorm3d(out_conv_channels),
            nn.LeakyReLU(0.2, inplace=True)
        )
        self.out = nn.Sequential(
            nn.Linear(out_conv_channels * self.out_dim * self.out_dim * self.out_dim, 1),
            nn.Sigmoid(),
        )

    def forward(self, x):
        x = self.conv1(x)
        x = self.conv2(x)
        x = self.conv3(x)
        x = self.conv4(x)
        # Flatten and apply linear + sigmoid
        x = x.view(-1, self.out_conv_channels * self.out_dim * self.out_dim * self.out_dim)
        x = self.out(x)
        return x


class Generator(torch.nn.Module):
    def __init__(self, in_channels=512, out_dim=64, out_channels=1, noise_dim=200, activation="sigmoid"):
        super(Generator, self).__init__()
        self.in_channels = in_channels
        self.out_dim = out_dim
        self.in_dim = int(out_dim / 16)
        conv1_out_channels = int(self.in_channels / 2.0)
        conv2_out_channels = int(conv1_out_channels / 2)
        conv3_out_channels = int(conv2_out_channels / 2)

        self.linear = torch.nn.Linear(noise_dim, in_channels * self.in_dim * self.in_dim * self.in_dim)

        self.conv1 = nn.Sequential(
            nn.ConvTranspose3d(
                in_channels=in_channels, out_channels=conv1_out_channels, kernel_size=(4, 4, 4),
                stride=2, padding=1, bias=False
            ),
            nn.BatchNorm3d(conv1_out_channels),
            nn.ReLU(inplace=True)
        )
        self.conv2 = nn.Sequential(
            nn.ConvTranspose3d(
                in_channels=conv1_out_channels, out_channels=conv2_out_channels, kernel_size=(4, 4, 4),
                stride=2, padding=1, bias=False
            ),
            nn.BatchNorm3d(conv2_out_channels),
            nn.ReLU(inplace=True)
        )
        self.conv3 = nn.Sequential(
            nn.ConvTranspose3d(
                in_channels=conv2_out_channels, out_channels=conv3_out_channels, kernel_size=(4, 4, 4),
                stride=2, padding=1, bias=False
            ),
            nn.BatchNorm3d(conv3_out_channels),
            nn.ReLU(inplace=True)
        )
        self.conv4 = nn.Sequential(
            nn.ConvTranspose3d(
                in_channels=conv3_out_channels, out_channels=out_channels, kernel_size=(4, 4, 4),
                stride=2, padding=1, bias=False
            )
        )
        if activation == "sigmoid":
            self.out = torch.nn.Sigmoid()
        else:
            self.out = torch.nn.Tanh()

    def project(self, x):
        """
        projects and reshapes latent vector to starting volume
        :param x: latent vector
        :return: starting volume
        """
        return x.view(-1, self.in_channels, self.in_dim, self.in_dim, self.in_dim)

    def forward(self, x):
        x = self.linear(x)
        x = self.project(x)
        x = self.conv1(x)
        x = self.conv2(x)
        x = self.conv3(x)
        x = self.conv4(x)
        return self.out(x)


def test_gan3d():
    noise_dim = 200
    in_channels = 512
    dim = 64  # cube volume
    model_generator = Generator(in_channels=512, out_dim=dim, out_channels=1, noise_dim=noise_dim)
    noise = torch.rand(1, noise_dim)
    generated_volume = model_generator(noise)
    print("Generator output shape", generated_volume.shape)
    model_discriminator = Discriminator(in_channels=1, dim=dim, out_conv_channels=in_channels)
    out = model_discriminator(generated_volume)


test_gan3d()

Generator output shape torch.Size([1, 1, 64, 64, 64])


In [6]:
import torch
from torch.autograd.variable import Variable


def ones_target(size):
    '''
    Tensor containing ones, with shape = size
    '''
    data = Variable(torch.ones(size, 1))
    return data


def zeros_target(size):
    '''
    FAKE data
    Tensor containing zeros, with shape = size
    '''
    data = Variable(torch.zeros(size, 1))
    return data


def train_discriminator(discriminator, optimizer, real_data, fake_data, loss):
    cuda = next(discriminator.parameters()).is_cuda
    N = real_data.size(0)
    # Reset gradients
    optimizer.zero_grad()
    # 1.1 Train on Real Data
    prediction_real = discriminator(real_data)
    # Calculate error and backpropagate
    target_real = ones_target(N)
    if cuda:
        target_real.cuda()

    error_real = loss(prediction_real, target_real)
    error_real.backward()

    # 1.2 Train on Fake Data
    prediction_fake = discriminator(fake_data)
    # Calculate error and backpropagate
    target_fake = zeros_target(N)
    if cuda:
        target_fake.cuda()
    error_fake = loss(prediction_fake, target_fake)
    error_fake.backward()

    # 1.3 Update weights with gradients
    optimizer.step()

    # Return error and predictions for real and fake inputs
    return error_real + error_fake, prediction_real, prediction_fake


def train_generator(discriminator, optimizer, fake_data, loss):
    cuda = next(discriminator.parameters()).is_cuda
    N = fake_data.size(0)  # Reset gradients
    optimizer.zero_grad()  # Sample noise and generate fake data
    prediction = discriminator(fake_data)  # Calculate error and backpropagate
    target = ones_target(N)
    if cuda:
        target.cuda()

    error = loss(prediction, target)
    error.backward()  # Update weights with gradients
    optimizer.step()  # Return error
    return error

In [22]:
def data_loader(path):
    # Code adapted from assignment text
    data = np.load(path, allow_pickle=True)
    train_voxel = torch.from_numpy(data["train_voxel"]).float().unsqueeze(1) # Training 3D voxel samples
    test_voxel = torch.from_numpy(data["test_voxel"]).float().unsqueeze(1) # Test 3D voxel samples
    train_labels = torch.from_numpy(data["train_labels"]).long() # Training labels (integers from 0 to 9)
    test_labels = torch.from_numpy(data["test_labels"]).long() # Test labels (integers from 0 to 9)
    class_map = data["class_map"] # Dictionary mapping the labels to their class names.

    return train_voxel, test_voxel, train_labels, test_labels, class_map

In [23]:
# Loading data
train_v, test_v, train_l, test_l, class_map = data_loader("data/modelnet10.npz")

# Create a TensorDataset
train_dataset = TensorDataset(train_v)
# Define a DataLoader
batch_size = 50 # Somewhere between 10 and 100 as outlined in the assignment
train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)

In [24]:
from torch.utils.data import DataLoader, TensorDataset
import torch.optim as optim

# Initialize your models and optimizers here
discriminator = Discriminator()
generator = Generator()

optimizer_d = optim.Adam(discriminator.parameters(), lr=0.0002)
optimizer_g = optim.Adam(generator.parameters(), lr=0.0002)

# Loss function
loss = nn.BCELoss()

# Number of epochs
num_epochs = 3

# Your DataLoader should be defined as train_loader
# train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)

for epoch in range(num_epochs):
    for n_batch, (real_batch, _) in enumerate(train_loader):

        N = real_batch.size(0)

        # Generate fake data
        noise = torch.randn(N, 200)
        fake_data = generator(noise).detach()

        # Train Discriminator
        real_data = real_batch
        d_error, d_pred_real, d_pred_fake = train_discriminator(discriminator, optimizer_d, real_data, fake_data, loss)

        # Generate fake data again but don't detach to compute gradients
        noise = torch.randn(N, 200)
        fake_data = generator(noise)

        # Train Generator
        g_error = train_generator(discriminator, optimizer_g, fake_data, loss)

        # Log batch error and other details if needed
        print(f"Epoch {epoch} Batch {n_batch} D Error: {d_error} G Error: {g_error}")

ValueError: not enough values to unpack (expected 2, got 1)

In [None]:
# Loss function
# Hyperparameters
lambda1 = 5.0  # Weight for the KL divergence loss
lambda2 = 1e-4  # Weight for the reconstruction loss

# Initialize the Binary Cross Entropy loss for the GAN loss
bce_loss = nn.BCELoss()

def gan_loss(logits_real, logits_fake):
    """
    Compute the GAN loss.
    """
    # Targets for real and fake data
    label_real = torch.ones_like(logits_real)
    label_fake = torch.zeros_like(logits_fake)

    # Compute the loss for the discriminator
    loss_real = bce_loss(logits_real, label_real)
    loss_fake = bce_loss(logits_fake, label_fake)

    # Compute the total GAN loss
    loss_gan = loss_real + loss_fake

    return loss_gan

def kl_divergence_loss(mu, log_var):
    """
    Compute the KL divergence loss.
    """
    loss_kl = -0.5 * torch.sum(1 + log_var - mu.pow(2) - log_var.exp())
    return loss_kl

def reconstruction_loss(x, x_recon):
    """
    Compute the reconstruction loss.
    """
    loss_recon = F.mse_loss(x_recon, x)
    return loss_recon

# Example usage
# Assuming you have the following tensors:
# logits_real and logits_fake from the discriminator
# mu and log_var from the encoder
# x and x_recon for the real and reconstructed 3D shapes

# Compute the individual losses
loss_gan = gan_loss(logits_real, logits_fake)
loss_kl = kl_divergence_loss(mu, log_var)
loss_recon = reconstruction_loss(x, x_recon)

# Compute the total loss
total_loss = loss_gan + lambda1 * loss_kl + lambda2 * loss_recon

In [None]:
from tqdm import tqdm

# Initialize models
generator = Generator()
discriminator = Discriminator()

# Initialize optimizers
optimizer_g = optim.Adam(generator.parameters(), lr=0.0002)
optimizer_d = optim.Adam(discriminator.parameters(), lr=0.0002)

# Initialize the Binary Cross Entropy loss
bce_loss = nn.BCELoss()

# Hyperparameters for the custom losses
lambda1 = 5.0  # Weight for the KL divergence loss
lambda2 = 1e-4  # Weight for the reconstruction loss

# Training loop
num_epochs = 20  # Number of epochs

# Outer loop with tqdm
for epoch in tqdm(range(num_epochs), desc='Epochs'):

    # Inner loop with tqdm
    for batch_idx, (real_voxels, _) in tqdm(enumerate(train_loader), desc='Batches', leave=False):

        # Discriminator update
        optimizer_d.zero_grad()

        # Generate fake voxels
        z = torch.randn(real_voxels.size(0), 200)
        fake_voxels = generator(z)

        # Compute discriminator logits for real and fake voxels
        logits_real = discriminator(real_voxels)
        logits_fake = discriminator(fake_voxels.detach())

        # Compute the GAN loss for the discriminator
        loss_d = gan_loss(logits_real, logits_fake)

        # Backprop and optimize the discriminator
        loss_d.backward()
        optimizer_d.step()

        # Generator update
        optimizer_g.zero_grad()

        # Compute discriminator logits for fake voxels
        logits_fake = discriminator(fake_voxels)

        # Compute the GAN loss for the generator
        loss_g = -torch.mean(torch.log(logits_fake))

        # Here you would typically compute the KL divergence and reconstruction losses
        # For example:
        # loss_kl = kl_divergence_loss(mu, log_var)
        # loss_recon = reconstruction_loss(x, x_recon)

        # Compute the total loss for the generator
        # total_loss_g = loss_g + lambda1 * loss_kl + lambda2 * loss_recon

        # Backprop and optimize the generator
        loss_g.backward()
        optimizer_g.step()

        # Print losses
        if batch_idx % 100 == 0:
            print(f"Epoch [{epoch+1}/{num_epochs}], Batch [{batch_idx+1}], D Loss: {loss_d.item()}, G Loss: {loss_g.item()}")