In [1]:

import torch
from torch import nn
from torch.func import functional_call, grad
from torch.nn import functional as F
from torch.utils.data import DataLoader
from torch.utils.data import random_split
from torchvision.datasets import CIFAR10
from torchvision import transforms
import pytorch_lightning as pl
import os
import numpy as np
from threading import Thread

from gc_module import ContNet

print(torch.cuda.is_available())
torch.zeros(1).cuda()
torch.set_float32_matmul_precision('high')

True


In [2]:

class CIFARClass(ContNet):
    def __init__(self, loglambda0: float, cont_lr: float, cont_reg: float, warmup_epochs: int):
        super().__init__(loglambda0, cont_lr, cont_reg, warmup_epochs)
        self.lossfunc = F.mse_loss

        self.dataset = CIFAR10(os.getcwd(), download=True, transform=transforms.ToTensor())
        size_train = int(len(self.dataset)*0.9)
        self.data_train, self.data_val = random_split(self.dataset, [size_train, len(self.dataset) - size_train])

        encdim = 512

        activ = nn.ReLU()
        self.net = nn.Sequential(
                                nn.Conv2d(3, 16, 3, padding=1, stride=2), activ,
                                nn.Conv2d(16, 32, 3, padding=1, stride=2), activ,
                                nn.Conv2d(32, 64, 3, padding=1, stride=2), activ,
                                nn.Conv2d(64, 128, 3, padding=1, stride=2), activ,
                                nn.Flatten(start_dim=1),
                                nn.Linear(512, encdim),
                                nn.Linear(encdim, 512),
                                nn.Unflatten(1, (128, 2, 2)), activ,
                                nn.ConvTranspose2d(128, 64, 3, padding=1, output_padding=1, stride=2), activ,
                                nn.ConvTranspose2d(64, 32, 3, padding=1, output_padding=1, stride=2), activ,
                                nn.ConvTranspose2d(32, 16, 3, padding=1, output_padding=1, stride=2), activ,
                                nn.ConvTranspose2d(16, 3, 3, padding=1, output_padding=1, stride=2), nn.Sigmoid()
                                )
        
        for layer in self.net.modules():
            if isinstance(layer, nn.Linear) or isinstance(layer, nn.Conv2d):
                nn.init.xavier_uniform_(layer.weight)
                nn.init.zeros_(layer.bias)

        self.init_params = self.state_dict()

    def forward(self, x):
        xrec = self.net(x)
        return xrec
    
    def configure_optimizers(self):
        # include logcontvar in optimizer
        optimizer = torch.optim.Adam([{'params': self.net.parameters()},
                                      {'params': (self.logcontvar,), 'lr': self.cont_lr}],
                                     lr=1e-4)
        return optimizer

    def training_step(self, train_batch, batch_idx):
        self.log('param_norm', sum(p.pow(2.0).sum() for p in self.parameters()))
        
        opt = self.optimizers()
        opt.zero_grad()

        # add gaussian noise to parameters
        rand_samp, ref_params = self.perturb_params()
        
        # compute loss
        x, y = train_batch
        xrec = self.forward(x)
        #param_norm = torch.norm(torch.cat([(p.view(-1) - self.init_params[pname].to(self.device).view(-1)) for pname, p in self.named_parameters()]))
        func_loss = self.lossfunc(xrec, x)
        #reg_loss = torch.exp(self.logcontvar) * self.cont_reg * param_norm
        loss = func_loss# + reg_loss
        self.manual_backward(loss)

        # compute contvar gradient
        self.contvar_grad(rand_samp, loss)
        
        # reload reference parameters
        self.load_state_dict(ref_params)

        opt.step()
        self.log('train_loss', func_loss.detach())

    def validation_step(self, val_batch, batch_idx):
        x, y = val_batch
        xrec = self.net(x)

        loss = self.lossfunc(xrec, x)
        self.log('val_loss', loss)
    
    def train_dataloader(self):
        return DataLoader(self.data_train, batch_size=250, shuffle=True, num_workers=16, persistent_workers=True)
        
    def val_dataloader(self):
        return DataLoader(self.data_val, batch_size=250, num_workers=16, persistent_workers=True)


In [3]:
def run_case(hyperparams: list) -> None:
    mymodel = CIFARClass(*hyperparams)

    # training
    epochs = 50
    logger = pl.loggers.tensorboard.TensorBoardLogger('.', name=f'cifar10_aenc_{epochs}_' + '_'.join([f'{p}' for p in hyperparams]))
    trainer = pl.Trainer(max_epochs=epochs, accelerator='gpu', logger=logger)
    trainer.fit(mymodel)

def sweep_hyperparams(hyperparam_list: list, n_runs: int, hyperparams: list = []) -> None:
    if len(hyperparams) == len(hyperparam_list):
        for i in range(n_runs):
            print('-'*80)
            print(f'Running case with hyperparams {hyperparams}')
            print('-'*80)

            run_case(hyperparams)

    else:
        for hyperparam_i in hyperparam_list[len(hyperparams)]:
            new_hyperparams = hyperparams + [hyperparam_i]
            sweep_hyperparams(hyperparam_list, n_runs, new_hyperparams)
    

In [4]:
def main() -> None:
    # model
    loglambda0 = [np.log(1e-4)]#, float('nan')]
    cont_lr = [1e-2]
    cont_reg = [0.0]
    warmup_epochs = [-1]
    n_runs = 5

    hyperparam_list = [loglambda0, cont_lr, cont_reg, warmup_epochs]
    sweep_hyperparams(hyperparam_list, n_runs)

if __name__ == "__main__":
    main()

--------------------------------------------------------------------------------
Running case with hyperparams [-9.210340371976182, 0.01, 0.0, -1]
--------------------------------------------------------------------------------
Files already downloaded and verified


GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]

  | Name | Type       | Params
------------------------------------
0 | net  | Sequential | 720 K 
------------------------------------
720 K     Trainable params
0         Non-trainable params
720 K     Total params
2.880     Total estimated model params size (MB)


                                                                           

  return F.conv2d(input, weight, bias, self.stride,


Epoch 49: 100%|██████████| 180/180 [00:03<00:00, 46.65it/s, v_num=5]

`Trainer.fit` stopped: `max_epochs=50` reached.


Epoch 49: 100%|██████████| 180/180 [00:03<00:00, 46.27it/s, v_num=5]
--------------------------------------------------------------------------------
Running case with hyperparams [-9.210340371976182, 0.01, 0.0, -1]
--------------------------------------------------------------------------------
Files already downloaded and verified


GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]

  | Name | Type       | Params
------------------------------------
0 | net  | Sequential | 720 K 
------------------------------------
720 K     Trainable params
0         Non-trainable params
720 K     Total params
2.880     Total estimated model params size (MB)


Epoch 49: 100%|██████████| 180/180 [00:03<00:00, 48.90it/s, v_num=6]        

`Trainer.fit` stopped: `max_epochs=50` reached.


Epoch 49: 100%|██████████| 180/180 [00:03<00:00, 48.50it/s, v_num=6]
--------------------------------------------------------------------------------
Running case with hyperparams [-9.210340371976182, 0.01, 0.0, -1]
--------------------------------------------------------------------------------
Files already downloaded and verified


GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]

  | Name | Type       | Params
------------------------------------
0 | net  | Sequential | 720 K 
------------------------------------
720 K     Trainable params
0         Non-trainable params
720 K     Total params
2.880     Total estimated model params size (MB)


Epoch 49: 100%|██████████| 180/180 [00:03<00:00, 47.07it/s, v_num=7]        

`Trainer.fit` stopped: `max_epochs=50` reached.


Epoch 49: 100%|██████████| 180/180 [00:03<00:00, 46.71it/s, v_num=7]
--------------------------------------------------------------------------------
Running case with hyperparams [-9.210340371976182, 0.01, 0.0, -1]
--------------------------------------------------------------------------------
Files already downloaded and verified


GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]

  | Name | Type       | Params
------------------------------------
0 | net  | Sequential | 720 K 
------------------------------------
720 K     Trainable params
0         Non-trainable params
720 K     Total params
2.880     Total estimated model params size (MB)


Epoch 49: 100%|██████████| 180/180 [00:03<00:00, 47.04it/s, v_num=8]        

`Trainer.fit` stopped: `max_epochs=50` reached.


Epoch 49: 100%|██████████| 180/180 [00:03<00:00, 46.68it/s, v_num=8]
--------------------------------------------------------------------------------
Running case with hyperparams [-9.210340371976182, 0.01, 0.0, -1]
--------------------------------------------------------------------------------
Files already downloaded and verified


GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]

  | Name | Type       | Params
------------------------------------
0 | net  | Sequential | 720 K 
------------------------------------
720 K     Trainable params
0         Non-trainable params
720 K     Total params
2.880     Total estimated model params size (MB)


Epoch 49: 100%|██████████| 180/180 [00:03<00:00, 47.96it/s, v_num=9]        

`Trainer.fit` stopped: `max_epochs=50` reached.


Epoch 49: 100%|██████████| 180/180 [00:03<00:00, 47.59it/s, v_num=9]
