In [None]:
import GPUtil
GPUs = GPUtil.getGPUs()
for gpu in GPUs:
  print(gpu.name, gpu.memoryTotal)

In [5]:

from eval_sgld import LightningEvalSearchSGLD, SingleImageDataset
from space import DARTS_UNet
from darts.common_utils import *
from darts.phantom import generate_phantom

from nni import trace

import nni.retiarii.serializer as serializer
import nni.retiarii.strategy as strategy

from nni.retiarii.experiment.pytorch import RetiariiExperiment, RetiariiExeConfig
from nni.retiarii.strategy import DARTS as DartsStrategy
from nni.retiarii.evaluator.pytorch import Lightning, Trainer
from nni.retiarii.evaluator.pytorch.lightning import DataLoader

In [6]:
# input image (phantom)
import torch
torch.cuda.empty_cache()

# oneshot

In [7]:
num_iter = 1
total_iterations = 15000

resolution = 6
max_depth = resolution - 1
phantom = generate_phantom(resolution=resolution)

# Create the lightning module
module = LightningEvalSearchSGLD(
                phantom=phantom, 
                num_iter=num_iter,
                lr=0.01, 
                noise_type='gaussian', 
                noise_factor=0.05, 
                resolution=resolution,
                )

# Create a PyTorch Lightning trainer
trainer = Trainer(
            # callbacks=[early_stop_callback],
            max_epochs=total_iterations/(2*num_iter),
                # it's either in my code or the lighting but total iterations will end up being twice that of the input value to max_epochs
                # that means that if you want 5000 iterations, you need to set max_epochs=2500
            fast_dev_run=False,
            gpus=1,
            )
            
if not hasattr(trainer, 'optimizer_frequencies'):
    trainer.optimizer_frequencies = []


# Create the lighting object for evaluator
train_loader = DataLoader(SingleImageDataset(phantom, num_iter=1), batch_size=1)
val_loader = DataLoader(SingleImageDataset(phantom, num_iter=1), batch_size=1)

lightning = Lightning(lightning_module=module, trainer=trainer, train_dataloaders=train_loader, val_dataloaders=val_loader)


# Create a Search Space
model_space = DARTS_UNet(depth=3)

# Select the Search Strategy
strategy = DartsStrategy()

# fast_dev_run=False

config = RetiariiExeConfig(execution_engine='oneshot')
experiment = RetiariiExperiment(model_space, evaluator=lightning, strategy=strategy)
experiment.run(config)

GPU available: True, used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]

  | Name  | Type                    | Params
--------------------------------------------------
0 | model | LightningEvalSearchSGLD | 15.5 M
--------------------------------------------------
15.5 M    Trainable params
0         Non-trainable params
15.5 M    Total params
61.980    Total estimated model params size (MB)


Starting optimization with SGLD


Training: 0it [00:00, ?it/s]

entering forward pass: 1
completed forward pass: 1
entering closure: 1
[2023-08-28 21:32:18] [32mIntermediate result: {"loss": 0.0722808837890625}  (Index 1)[0m
completed closure: 1
entering add noise: 1
completed add noise: 1


  File "/home/joe/.pyenv/versions/3.8.10/lib/python3.8/runpy.py", line 194, in _run_module_as_main
    return _run_code(code, main_globals, None,
  File "/home/joe/.pyenv/versions/3.8.10/lib/python3.8/runpy.py", line 87, in _run_code
    exec(code, run_globals)
  File "/home/joe/.cache/pypoetry/virtualenvs/nas-test-OHy8kATa-py3.8/lib/python3.8/site-packages/ipykernel_launcher.py", line 17, in <module>
    app.launch_new_instance()
  File "/home/joe/.cache/pypoetry/virtualenvs/nas-test-OHy8kATa-py3.8/lib/python3.8/site-packages/traitlets/config/application.py", line 1043, in launch_instance
    app.start()
  File "/home/joe/.cache/pypoetry/virtualenvs/nas-test-OHy8kATa-py3.8/lib/python3.8/site-packages/ipykernel/kernelapp.py", line 736, in start
    self.io_loop.start()
  File "/home/joe/.cache/pypoetry/virtualenvs/nas-test-OHy8kATa-py3.8/lib/python3.8/site-packages/tornado/platform/asyncio.py", line 195, in start
    self.asyncio_loop.run_forever()
  File "/home/joe/.pyenv/versions/3.8

RuntimeError: Trying to backward through the graph a second time (or directly access saved tensors after they have already been freed). Saved intermediate values of the graph are freed when you call .backward() or autograd.grad(). Specify retain_graph=True if you need to backward through the graph a second time or if you need to access saved tensors after calling backward.

In [None]:

exported_arch = experiment.export_top_models()

exported_arch


In [4]:
experiment.stop()

# clear the cuda cache
torch.cuda.empty_cache()

[2023-08-28 21:32:14] [32mStopping experiment, please wait...[0m
[2023-08-28 21:32:14] [32mExperiment stopped[0m


# multi search

In [None]:
num_iter = 1
total_iterations = 1200

# Create the lightning module
module = LightningEvalSearch(
                phantom=phantom, 
                buffer_size=100,
                num_iter=num_iter,
                lr=0.01, 
                noise_type='gaussian', 
                noise_factor=0.075, 
                resolution=resolution,
                buffer_no_lr_schuler=600,
                patience=100,
                )

# Create a PyTorch Lightning trainer
trainer = Trainer(
            # callbacks=[early_stop_callback],
            max_epochs=total_iterations/(2*num_iter),
                # it's either in my code or the lighting but total iterations will end up being twice that of the input value to max_epochs
                # that means that if you want 5000 iterations, you need to set max_epochs=2500
            fast_dev_run=False,
            gpus=1,
            )

if not hasattr(trainer, 'optimizer_frequencies'):
    trainer.optimizer_frequencies = []


# Create the lighting object for evaluator
train_loader = trace(DataLoader)(SingleImageDataset(phantom, num_iter=1), batch_size=1)
val_loader = trace(DataLoader)(SingleImageDataset(phantom, num_iter=1), batch_size=1)
lightning = Lightning(lightning_module=module, trainer=trainer, train_dataloaders=train_loader, val_dataloaders=val_loader)

# Create a Search Space
model_space = DARTS_UNet()

# Select a Search Strategy
search_strategy = DartsStrategy()

# Configure and run the experiment for multi-strategy
experiment = RetiariiExperiment(model_space, lightning, [], search_strategy)
exp_config = RetiariiExeConfig('local')
exp_config.experiment_name = 'mnist_search'

serializer.pickle_size_limit = 1024 * 1024 * 100 # 100MB

exp_config.max_trial_number = 4   # spawn 4 trials at most
exp_config.trial_concurrency = 1  # will run two trials concurrently

exp_config.trial_gpu_number = 1
exp_config.training_service.use_active_gpu = True

experiment.run(exp_config, 8081)

In [None]:

exported_arch = experiment.export_top_models()

exported_arch

In [None]:
experiment.stop()

# clear the cuda cache
torch.cuda.empty_cache()