In [3]:
import GPUtil
GPUs = GPUtil.getGPUs()
for gpu in GPUs:
  print(gpu.name, gpu.memoryTotal)

Tesla P4 7680.0


In [1]:

from eval_sgld import LightningEvalSearchSGLD, SingleImageDataset
from space import DARTS_UNet
from darts.common_utils import *
from darts.phantom import generate_phantom

from nni import trace

import nni.retiarii.serializer as serializer
import nni.retiarii.strategy as strategy

from nni.retiarii.experiment.pytorch import RetiariiExperiment, RetiariiExeConfig
from nni.retiarii.strategy import DARTS as DartsStrategy
from nni.retiarii.evaluator.pytorch import Lightning, Trainer
from nni.retiarii.evaluator.pytorch.lightning import DataLoader

import torch
torch.cuda.empty_cache()

In [2]:

dtype = torch.cuda.FloatTensor if torch.cuda.is_available() else torch.FloatTensor
print('CUDA available: {}'.format(torch.cuda.is_available()))

CUDA available: True


# lightning no search

In [None]:
model = torch.hub.load('mateuszbuda/brain-segmentation-pytorch', 'unet',
                       in_channels=1, out_channels=1, init_features=64, pretrained=False)

num_iter=1
total_iterations = 25000

resolution = 6
max_depth = resolution - 1
phantom = generate_phantom(resolution=resolution)

# Create the lightning module
module = LightningEvalSearchSGLD(
                phantom=phantom, 
                num_iter=num_iter,
                lr=0.01, # note a smaller learning rate affecs the SGLD, so overfitting happens FASTER at LOWER learning rates (start with 0.01)
                noise_type='gaussian', 
                noise_factor=0.09,
                resolution=resolution,
                burnin_iter=350,
                model_cls=model
                )

# Create a PyTorch Lightning trainer
trainer = Trainer(
            # callbacks=[module.checkpoint_callback],
            max_epochs=total_iterations
            fast_dev_run=False,
            gpus=1,
            )
            
if not hasattr(trainer, 'optimizer_frequencies'):
    trainer.optimizer_frequencies = []


# Create the lighting object for evaluator
train_loader = DataLoader(SingleImageDataset(phantom, num_iter=1), batch_size=1)
val_loader = DataLoader(SingleImageDataset(phantom, num_iter=1), batch_size=1)

lightning = Lightning(lightning_module=module, trainer=trainer, train_dataloaders=train_loader, val_dataloaders=val_loader)
lightning.fit(model)

# oneshot

In [None]:
num_iter = 1
total_iterations = 25000

resolution = 6
max_depth = resolution - 1
phantom = generate_phantom(resolution=resolution)

# Create the lightning module
module = LightningEvalSearchSGLD(
                phantom=phantom, 
                num_iter=num_iter,
                lr=0.01, # note a smaller learning rate affecs the SGLD, so overfitting happens FASTER at LOWER learning rates (start with 0.01)
                noise_type='gaussian', 
                noise_factor=0.09,
                resolution=resolution,
                burnin_iter=1800
                )

# Create a PyTorch Lightning trainer
trainer = Trainer(
            # callbacks=[module.checkpoint_callback],
            max_epochs=total_iterations/(2*num_iter),
                # it's either in my code or the lighting but total iterations will end up being twice that of the input value to max_epochs
                # that means that if you want 5000 iterations, you need to set max_epochs=2500
            fast_dev_run=False,
            gpus=1,
            )
            
if not hasattr(trainer, 'optimizer_frequencies'):
    trainer.optimizer_frequencies = []


# Create the lighting object for evaluator
train_loader = DataLoader(SingleImageDataset(phantom, num_iter=1), batch_size=1)
val_loader = DataLoader(SingleImageDataset(phantom, num_iter=1), batch_size=1)

lightning = Lightning(lightning_module=module, trainer=trainer, train_dataloaders=train_loader, val_dataloaders=val_loader)


# Create a Search Space
model_space = DARTS_UNet(depth=3)

# Select the Search Strategy
strategy = DartsStrategy()

# fast_dev_run=False

config = RetiariiExeConfig(execution_engine='oneshot')
experiment = RetiariiExperiment(model_space, evaluator=lightning, strategy=strategy)
experiment.run(config)

In [None]:
# stop experiment and clear cache
experiment.stop()
torch.cuda.empty_cache()

In [None]:

exported_arch = experiment.export_top_models()

exported_arch


# multi search

In [None]:
num_iter = 1
total_iterations = 1200

# Create the lightning module
module = LightningEvalSearch(
                phantom=phantom, 
                buffer_size=100,
                num_iter=num_iter,
                lr=0.01, 
                noise_type='gaussian', 
                noise_factor=0.075, 
                resolution=resolution,
                buffer_no_lr_schuler=600,
                patience=100,
                )

# Create a PyTorch Lightning trainer
trainer = Trainer(
            # callbacks=[early_stop_callback],
            max_epochs=total_iterations/(2*num_iter),
                # it's either in my code or the lighting but total iterations will end up being twice that of the input value to max_epochs
                # that means that if you want 5000 iterations, you need to set max_epochs=2500
            fast_dev_run=False,
            gpus=1,
            )

if not hasattr(trainer, 'optimizer_frequencies'):
    trainer.optimizer_frequencies = []


# Create the lighting object for evaluator
train_loader = trace(DataLoader)(SingleImageDataset(phantom, num_iter=1), batch_size=1)
val_loader = trace(DataLoader)(SingleImageDataset(phantom, num_iter=1), batch_size=1)
lightning = Lightning(lightning_module=module, trainer=trainer, train_dataloaders=train_loader, val_dataloaders=val_loader)

# Create a Search Space
model_space = DARTS_UNet()

# Select a Search Strategy
search_strategy = DartsStrategy()

# Configure and run the experiment for multi-strategy
experiment = RetiariiExperiment(model_space, lightning, [], search_strategy)
exp_config = RetiariiExeConfig('local')
exp_config.experiment_name = 'mnist_search'

serializer.pickle_size_limit = 1024 * 1024 * 100 # 100MB

exp_config.max_trial_number = 4   # spawn 4 trials at most
exp_config.trial_concurrency = 1  # will run two trials concurrently

exp_config.trial_gpu_number = 1
exp_config.training_service.use_active_gpu = True

experiment.run(exp_config, 8081)

In [None]:

exported_arch = experiment.export_top_models()

exported_arch

In [None]:
experiment.stop()

# clear the cuda cache
torch.cuda.empty_cache()