In [1]:
import sys
import os
repo_dir = os.path.dirname(os.getcwd())
sys.path.append(repo_dir)

In [2]:
from models.cm import ContinuousMixture, GaussianDecoder
from torchvision.datasets import MNIST, FashionMNIST
from utils.bins_samplers import GaussianQMCSampler
from utils.reproducibility import seed_everything
from utils.datasets import UnsupervisedDataset
import torchvision.transforms as transforms
from models.nets import mnist_conv_decoder
from torch.utils.data import DataLoader
import pytorch_lightning as pl
import torch.nn as nn
import torch

device = 'cuda' if torch.cuda.is_available() else 'cpu'
gpus = None if device == 'cpu' else 1
print(device)

cuda


## Choose the dataset

In [3]:
# dataset, dataset_name = FashionMNIST, 'fashion_mnist'
dataset, dataset_name = MNIST, 'mnist'

In [4]:
transf = transforms.Compose([transforms.ToTensor()])

train = UnsupervisedDataset(dataset(root=repo_dir + '/data', train=True, download=True, transform=transf))
train, valid = torch.utils.data.random_split(train, [50_000, 10_000])

batch_size = 256
train_loader = DataLoader(train, batch_size=batch_size, shuffle=True, drop_last=True)
valid_loader = DataLoader(valid, batch_size=batch_size)

print(next(iter(train_loader)).shape)

torch.Size([256, 1, 28, 28])


## Hyper-parameters below

In [5]:
max_epochs = 500
latent_dim = 16
n_filters = 16
batch_norm = True
mu_activation = nn.Sigmoid()
bias = False
resblock = True
learn_std = True
min_std = 0.1
max_std = 1.0
n_bins = 2**14

## Train

In [7]:
seed_everything(0)
model = ContinuousMixture(
    GaussianDecoder(
        mnist_conv_decoder(
            latent_dim=latent_dim,
            n_filters=n_filters,
            batch_norm=batch_norm,
            learn_std=learn_std,
            bias=bias,
            resblock=resblock),
        learn_std,
        min_std,
        max_std,
        mu_activation),
    sampler=GaussianQMCSampler(
        latent_dim,
        n_bins),
    k=2
)
model.n_chunks = 32
model.missing = None

cp_best_model_valid = pl.callbacks.ModelCheckpoint(
    save_top_k=1,
    monitor='valid_loss_epoch',
    mode='min',
    filename='best_model_valid-{epoch}'
)
early_stop_callback = pl.callbacks.early_stopping.EarlyStopping(
    monitor="valid_loss_epoch",
    min_delta=0.00,
    patience=15,
    verbose=False,
    mode='min'
)
callbacks = [cp_best_model_valid, early_stop_callback]

logger = pl.loggers.TensorBoardLogger(repo_dir + f'/logs/mnist_gaussian/FFM/latent_dim_{latent_dim}/num_bins_{n_bins}')
trainer = pl.Trainer(
    max_epochs=max_epochs,
    accelerator='gpu' if gpus and gpus > 0 else 'cpu',
    # gpu=gpus
    devices=gpus if gpus else 1, # corrected
    callbacks=callbacks,
    logger=logger,
    deterministic=True
)
trainer.fit(model, train_loader, valid_loader)

GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]

  | Name    | Type            | Params | Mode 
----------------------------------------------------
0 | decoder | GaussianDecoder | 87.2 K | train
----------------------------------------------------
87.2 K    Trainable params
0         Non-trainable params
87.2 K    Total params
0.349     Total estimated model params size (MB)
38        Modules in train mode
0         Modules in eval mode


Epoch 8:  58%|█████▊    | 114/195 [00:46<00:33,  2.44it/s, v_num=1, valid_loss_epoch=-0.945, train_loss=-0.958]


Detected KeyboardInterrupt, attempting graceful shutdown ...


NameError: name 'exit' is not defined