# Model Checks

In this notebook, we will import and initialise all models to ensure that they are working correctly.
This notebook serves as a sanity check to ensure that all models are correctly implemented and can be initialised without any errors.
We use the **MNIST Sequential** dataset as a sample dataset to test the models.
It does not serve any purpose for training or evaluation.

## Initialisation

In [None]:
import logging
%load_ext autoreload
%autoreload 2

logging.basicConfig(level=logging.INFO)
# Print Python Version & PyTorch version
import torch
import sys
import os
print(f"Python version\t=\t{sys.version}\nPyTorch version\t=\t{torch.__version__}")
# Make torch deterministic
torch.manual_seed(0)

In [None]:
RunningInCOLAB = 'google.colab' in str(get_ipython())
if RunningInCOLAB:
    # Move to default colab folder
    %cd /content
    # Check if repository is already cloned
    if not os.path.isdir("stg"):
        # Clone repository
        !git clone {config.GITHUB_URL} {config.MODULE_NAME}
    # Change to repository directory
    %cd {config.MODULE_NAME}
    # Only install requirements not already installed by Colab
    # !pip install opacus
    # SLOW: Only execute the following line if you encounter an error regarding a package not being installed
    # !pip install -r requirements.txt
else:
    import sys
    # Add parent directory (absolute!) to path
    sys.path.insert(0, os.path.abspath(os.path.join(os.getcwd(), '..')))

In [None]:
from conv_gan.models.noise_trajgan import Noise_TrajGAN

## Sample Dataset

We use the MNIST Sequential dataset as a sample dataset to test the models.

In [None]:
from conv_gan.datasets.mnist_data import mnist_sequential, show_mnist_samples

BATCH_SIZE = 64

ds = mnist_sequential(28)
# Print Shape of one sample
print(f"Sample:\t{ds[0][0].shape}\nLabel:\t{type(ds[0][1])}")

# Create collate function that drops the label and puts features first
def collate_fn(batch) -> torch.Tensor:
    batch = torch.stack([b[0] for b in batch])
    # Add another feature dimension in the front
    batch = batch.unsqueeze(0)
    return batch
    
from torch.utils.data import DataLoader
dl = DataLoader(ds, batch_size=BATCH_SIZE, collate_fn=collate_fn)

# Print Shape of one batch
print(f"Batch:\t{next(iter(dl)).shape}")

# Print 25 random samples
samples = torch.stack([ds[i][0] for i in range(25)])
_ = show_mnist_samples(samples)

In [None]:
# Constants
FEATURES = ['mnist']
vocab_size = {'mnist': 28}
embedding_size = {'mnist': 64}
LATENT_DIM = 256
NOISE_DIM = 28
GPU = 0

# Training Parameters
EPOCHS = 10

WGAN = True
LP = True  # Lipschitz Penalty required!
LR_G = 0.0001
LR_D = 0.0005
N_CRITIC = 1

## Noise-TrajGAN: Baseline Model

In [None]:
# Import Noise-TrajGAN
from conv_gan.models.noise_trajgan import Noise_TrajGAN

name = f'NTG_MNIST_G{LR_G}_{N_CRITIC}xD{LR_D}_L{LATENT_DIM}_N{NOISE_DIM}_B{BATCH_SIZE}_{"WGAN" if WGAN else "GAN"}'

# Create a Noise-TrajGAN model
ntg = Noise_TrajGAN(
    features=FEATURES,
    vocab_size=vocab_size,
    embedding_size=embedding_size,
    latent_dim=LATENT_DIM,
    noise_dim=NOISE_DIM,
    wgan=WGAN,
    gradient_penalty=LP,
    lipschitz_penalty=LP,
    lr_g=LR_G,
    lr_d=LR_D,
    gpu = 0,
    name=name,
)
# Print and compare feature number of generator and discriminator
count_params_torch = lambda model: sum(p.numel() for p in model.parameters() if p.requires_grad)
print(f"Generator Parameters:\t\t{count_params_torch(ntg.gen)}")
print(f"Discriminator Parameters:\t{count_params_torch(ntg.dis)}")
print(f"Relationship [G / D]:\t\t{count_params_torch(ntg.gen) / count_params_torch(ntg.dis) * 100 :.2f}%")

In [None]:
# Print an initial output of NTG
ntg_output = torch.as_tensor(ntg.generate(25, 28))
print(ntg_output.shape)
show_mnist_samples(ntg_output)

In [None]:
# Train model
ntg.training_loop(dl, epochs=EPOCHS, dataset_name='mnist', n_critic=N_CRITIC, plot_freq=30, save_freq=-1, tensorboard=True, notebook=True)

In [None]:
# Print an output of NTG after completed Training
ntg_output = torch.as_tensor(ntg.generate(25, 28))
print(ntg_output.shape)
show_mnist_samples(ntg_output)

## Noise-TrajGAN with Differential Privacy

In [None]:
# DP Parameters
EPSILON = 10.0
ACCOUNTANT = 'prv'  # Default is 'prv', but I found that 'rdp' is more stable in some situation 
MAX_GRAD_NORM = 0.1
# Delta should be 1/n where n is the number of samples according to DPfy-ML
DELTA = 1 / len(ds)
print(f"Epsilon:\t{EPSILON:.1f}\nDelta:\t\t{DELTA:.2e}\nMax Grad Norm:\t{MAX_GRAD_NORM}\nAccountant:\t{ACCOUNTANT}")


DP_IN_DIS = False  # Whether to apply DP-SGD to discriminator or generator
# WGAN Clipping does not work if DP is applied to the discriminator
LP = not DP_IN_DIS
WGAN = True
if not DP_IN_DIS and N_CRITIC > 1:
    print("Warning: Training with DP and N_CRITIC is a bit of a gamble because we might actually be wasting privacy budget on the discriminator which does not even uses DP.")

# Increase learning rate of DP model to make up for the gradient clipping
if DP_IN_DIS:
    LR_D = LR_D / MAX_GRAD_NORM 
else:
    LR_G = LR_G / MAX_GRAD_NORM

print(f"LR_G:\t{LR_G}\nLR_D:\t{LR_D}")

# Create new DataLoader
# The number of steps should be the same as without DP, but DP-SGD works better for large batches
# --> Increase batches and epochs by same factor
FACTOR = 10
DP_BATCH_SIZE = BATCH_SIZE * FACTOR
DP_EPOCHS = EPOCHS * FACTOR
dp_dl = DataLoader(ds, batch_size=DP_BATCH_SIZE, collate_fn=collate_fn)
print(f"Batch Size:\t{DP_BATCH_SIZE}\nEpochs:\t\t{DP_EPOCHS}")

In [None]:
# Initialize DP-Noise-TrajGAN
name = f'DP-NTG_MNIST_G{LR_G}_{N_CRITIC}xD{LR_D}_L{LATENT_DIM}_N{NOISE_DIM}_B{DP_BATCH_SIZE}_C{MAX_GRAD_NORM}'

dp_ntg = Noise_TrajGAN(
    features=FEATURES,
    vocab_size=vocab_size,
    embedding_size=embedding_size,
    latent_dim=LATENT_DIM,
    noise_dim=NOISE_DIM,
    lr_g=LR_G,
    lr_d=LR_D,
    gpu = 0,
    name=name,
    wgan=WGAN,
    gradient_penalty=LP,
    lipschitz_penalty=LP,
    dp=True,
    dp_in_dis=DP_IN_DIS,
    privacy_accountant=ACCOUNTANT
)

In [None]:
# Initialize DP --> Returns DP dataloader
dp_dl = dp_ntg.init_dp(
    dataloader=dp_dl,
    epochs=DP_EPOCHS,
    max_grad_norm=MAX_GRAD_NORM,
    target_epsilon=EPSILON,
    delta=DELTA,
)

In [None]:
# Print an initial output of DP-NTG
dp_ntg_output = torch.as_tensor(dp_ntg.generate(25, 28))
show_mnist_samples(dp_ntg_output)

In [None]:
# Train the DP Model
dp_ntg.training_loop(dp_dl, epochs=DP_EPOCHS, dataset_name='mnist', n_critic=N_CRITIC, plot_freq=200, save_freq=-1, tensorboard=True, notebook=True)

In [None]:
# Print an output of DP-NTG after completed Training
dp_ntg_output = torch.as_tensor(dp_ntg.generate(25, 28))
show_mnist_samples(dp_ntg_output)

In [None]:
# Print resulting privacy loss
print("Final Delta:\t", dp_ntg.delta)
print("Final Epsilon:\t", dp_ntg.epsilon)