In [1]:
!pip install nni

Collecting nni
  Downloading nni-3.0-py3-none-manylinux1_x86_64.whl (61.4 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m61.4/61.4 MB[0m [31m11.8 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting astor (from nni)
  Downloading astor-0.8.1-py2.py3-none-any.whl (27 kB)
Collecting colorama (from nni)
  Downloading colorama-0.4.6-py2.py3-none-any.whl (25 kB)
Collecting filelock<3.12 (from nni)
  Downloading filelock-3.11.0-py3-none-any.whl (10.0 kB)
Collecting json-tricks>=3.15.5 (from nni)
  Downloading json_tricks-3.17.3-py2.py3-none-any.whl (27 kB)
Collecting nvidia-ml-py (from nni)
  Downloading nvidia_ml_py-12.535.133-py3-none-any.whl (37 kB)
Collecting PythonWebHDFS (from nni)
  Downloading PythonWebHDFS-0.2.3-py3-none-any.whl (10 kB)
Collecting responses (from nni)
  Downloading responses-0.25.0-py3-none-any.whl (55 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m55.2/55.2 kB[0m [31m2.1 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting schema (

In [2]:
!pip install pytorch-lightning

Collecting pytorch-lightning
  Downloading pytorch_lightning-2.2.2-py3-none-any.whl (801 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m801.9/801.9 kB[0m [31m6.3 MB/s[0m eta [36m0:00:00[0m
Collecting torchmetrics>=0.7.0 (from pytorch-lightning)
  Downloading torchmetrics-1.3.2-py3-none-any.whl (841 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m841.5/841.5 kB[0m [31m11.6 MB/s[0m eta [36m0:00:00[0m
Collecting lightning-utilities>=0.8.0 (from pytorch-lightning)
  Downloading lightning_utilities-0.11.2-py3-none-any.whl (26 kB)
Collecting nvidia-cuda-nvrtc-cu12==12.1.105 (from torch>=1.13.0->pytorch-lightning)
  Using cached nvidia_cuda_nvrtc_cu12-12.1.105-py3-none-manylinux1_x86_64.whl (23.7 MB)
Collecting nvidia-cuda-runtime-cu12==12.1.105 (from torch>=1.13.0->pytorch-lightning)
  Using cached nvidia_cuda_runtime_cu12-12.1.105-py3-none-manylinux1_x86_64.whl (823 kB)
Collecting nvidia-cuda-cupti-cu12==12.1.105 (from torch>=1.13.0->pytorc

# Neural Architecture Search

![blabla](https://nni.readthedocs.io/en/stable/_images/nas_abstract_illustration.png)

#### Example of a NN model


In [10]:
import torch
import torch.nn as nn
import torch.nn.functional as F
import nni
from nni.nas.nn.pytorch import LayerChoice, ModelSpace, MutableDropout, MutableLinear

In [11]:
class Net(ModelSpace):  # should inherit ModelSpace rather than nn.Module
    def __init__(self):
        super().__init__()
        self.conv1 = nn.Conv2d(1, 32, 3, 1)
        self.conv2 = nn.Conv2d(32, 64, 3, 1)
        self.dropout1 = nn.Dropout(0.25)
        self.dropout2 = nn.Dropout(0.5)
        self.fc1 = nn.Linear(9216, 128)
        self.fc2 = nn.Linear(128, 10)

    def forward(self, x):
        x = F.relu(self.conv1(x))
        x = F.max_pool2d(self.conv2(x), 2)
        x = torch.flatten(self.dropout1(x), 1)
        x = self.fc2(self.dropout2(F.relu(self.fc1(x))))
        output = F.log_softmax(x, dim=1)
        return output

### Model variations

In [12]:
class DepthwiseSeparableConv(nn.Module):
    def __init__(self, in_ch, out_ch):
        super().__init__()
        self.depthwise = nn.Conv2d(in_ch, in_ch, kernel_size=3, groups=in_ch)
        self.pointwise = nn.Conv2d(in_ch, out_ch, kernel_size=1)

    def forward(self, x):
        return self.pointwise(self.depthwise(x))

class MyModelSpace(ModelSpace):
    def __init__(self):
        super().__init__()
        self.conv1 = nn.Conv2d(1, 32, 3, 1)

        # LayerChoice is used to select a layer between Conv2d and DwConv.

        self.conv2 = LayerChoice([
            nn.Conv2d(32, 64, 3, 1),
            DepthwiseSeparableConv(32, 64)
        ], label='conv2')

        # nni.choice is used to select a dropout rate.
        # The result can be used as parameters of `MutableXXX`.

        self.dropout1 = MutableDropout(nni.choice('dropout', [0.25, 0.5, 0.75]))  # choose dropout rate from 0.25, 0.5 and 0.75
        self.dropout2 = nn.Dropout(0.5)
        feature = nni.choice('feature', [64, 128, 256])
        self.fc1 = MutableLinear(9216, feature)
        self.fc2 = MutableLinear(feature, 10)

    def forward(self, x):
        x = F.relu(self.conv1(x))
        x = F.max_pool2d(self.conv2(x), 2)
        x = torch.flatten(self.dropout1(x), 1)
        x = self.fc2(self.dropout2(F.relu(self.fc1(x))))
        output = F.log_softmax(x, dim=1)
        return output



In [13]:
model_space = MyModelSpace()
model_space

MyModelSpace(
  (conv1): Conv2d(1, 32, kernel_size=(3, 3), stride=(1, 1))
  (conv2): LayerChoice(
    label='conv2'
    (0): Conv2d(32, 64, kernel_size=(3, 3), stride=(1, 1))
    (1): DepthwiseSeparableConv(
      (depthwise): Conv2d(32, 32, kernel_size=(3, 3), stride=(1, 1), groups=32)
      (pointwise): Conv2d(32, 64, kernel_size=(1, 1), stride=(1, 1))
    )
  )
  (dropout1): MutableDropout(p=Categorical([0.25, 0.5, 0.75], label='dropout'))
  (dropout2): Dropout(p=0.5, inplace=False)
  (fc1): MutableLinear(in_features=9216, out_features=Categorical([64, 128, 256], label='feature'))
  (fc2): MutableLinear(in_features=Categorical([64, 128, 256], label='feature'), out_features=10)
)

Details on model space: https://nni.readthedocs.io/en/stable/nas/construct_space.html

There are basically two exploration approaches:
(1) search by evaluating each sampled model independently, which is the search approach in multi-trial NAS
(2) one-shot weight-sharing based search, which is used in one-shot NAS.
We demonstrate the first approach in this tutorial.

### Pick an exploration strategy

See all exploration strategies: https://nni.readthedocs.io/en/stable/nas/exploration_strategy.html

In [14]:
import nni.nas.strategy as strategy
search_strategy = strategy.Random()

### Pick or customize a model evaluator

In the exploration process, the exploration strategy repeatedly generates new models. A model evaluator is for training and validating each generated model to obtain the model’s performance. The performance is sent to the exploration strategy for the strategy to generate better models.

NNI NAS has provided built-in model evaluators, but to start with, it is recommended to use FunctionalEvaluator, that is, to wrap your own training and evaluation code with one single function. This function should receive one single model class and uses nni.report_final_result() to report the final score of this model.



Model evaluators: https://nni.readthedocs.io/en/stable/nas/evaluator.html

In [19]:
from torchvision import transforms
from torchvision.datasets import MNIST
from torch.utils.data import DataLoader

def train_epoch(model, device, train_loader, optimizer, epoch):
    loss_fn = torch.nn.CrossEntropyLoss()
    model.train()

    for batch_idx, (data, target) in enumerate(train_loader):
        data, target = data.to(device), target.to(device)
        optimizer.zero_grad()
        output = model(data)
        loss = loss_fn(output, target)
        loss.backward()
        optimizer.step()
        if batch_idx % 10 == 0:
            print('Train Epoch: {} [{}/{} ({:.0f}%)]\tLoss: {:.6f}'.format(
                epoch, batch_idx * len(data), len(train_loader.dataset),
                100. * batch_idx / len(train_loader), loss.item()))


def test_epoch(model, device, test_loader):
    model.eval()
    test_loss = 0
    correct = 0
    with torch.no_grad():
        for data, target in test_loader:
            data, target = data.to(device), target.to(device)
            output = model(data)
            pred = output.argmax(dim=1, keepdim=True)
            correct += pred.eq(target.view_as(pred)).sum().item()

    test_loss /= len(test_loader.dataset)
    accuracy = 100. * correct / len(test_loader.dataset)

    print('\nTest set: Accuracy: {}/{} ({:.0f}%)\n'.format(
          correct, len(test_loader.dataset), accuracy))

    return accuracy

In [21]:
def evaluate_model(model):
    # By v3.0, the model will be instantiated by default.
    device = torch.device('cuda') if torch.cuda.is_available() else torch.device('cpu')
    model.to(device)

    optimizer = torch.optim.Adam(model.parameters(), lr=1e-3)
    transf = transforms.Compose([transforms.ToTensor(), transforms.Normalize((0.1307,), (0.3081,))])
    train_loader = DataLoader(MNIST('data/mnist', download=True, transform=transf), batch_size=64, shuffle=True)
    test_loader = DataLoader(MNIST('data/mnist', download=True, train=False, transform=transf), batch_size=64)

    for epoch in range(3):
        # train the model for one epoch
        train_epoch(model, device, train_loader, optimizer, epoch)
        # test the model for one epoch
        accuracy = test_epoch(model, device, test_loader)
        # call report intermediate result. Result can be float or dict
        nni.report_intermediate_result(accuracy)

    # report final test result
    nni.report_final_result(accuracy)

Create the evaluator

In [23]:
from nni.nas.evaluator import FunctionalEvaluator
evaluator = FunctionalEvaluator(evaluate_model)

Launch an experiment

In [24]:
from nni.nas.experiment import NasExperiment
exp = NasExperiment(model_space, evaluator, search_strategy)

[2024-04-16 12:38:22] [32mConfig is not provided. Will try to infer.[0m


INFO:nni.nas.experiment.config.experiment:Config is not provided. Will try to infer.


[2024-04-16 12:38:22] [32mUsing execution engine based on training service. Trial concurrency is set to 1.[0m


INFO:nni.nas.experiment.config.experiment:Using execution engine based on training service. Trial concurrency is set to 1.


[2024-04-16 12:38:22] [32mUsing simplified model format.[0m


INFO:nni.nas.experiment.config.experiment:Using simplified model format.


[2024-04-16 12:38:22] [32mUsing local training service.[0m


INFO:nni.nas.experiment.config.experiment:Using local training service.


In [27]:
# optional

#exp.config.max_trial_number = 3   # spawn 3 trials at most
#exp.config.trial_concurrency = 1  # will run 1 trial concurrently
#exp.config.trial_gpu_number = 0   # will not use GPU

# uncomment to use GPU
# exp.config.trial_gpu_number = 1
# exp.config.training_service.use_active_gpu = True

In [28]:
exp.run(port=8081)

[2024-04-16 12:40:11] [32mCreating experiment, Experiment ID: [36myw35cm94[0m


INFO:nni.experiment:Creating experiment, Experiment ID: ${CYAN}yw35cm94
  self.pid = _posixsubprocess.fork_exec(


[2024-04-16 12:40:11] [32mStarting web server...[0m


INFO:nni.experiment:Starting web server...






[2024-04-16 12:40:13] [32mSetting up...[0m


INFO:nni.experiment:Setting up...


[2024-04-16 12:40:13] [32mWeb portal URLs: [36mhttp://127.0.0.1:8081 http://172.28.0.12:8081[0m


INFO:nni.experiment:Web portal URLs: ${CYAN}http://127.0.0.1:8081 http://172.28.0.12:8081


[2024-04-16 12:40:14] [32mSuccessfully update searchSpace.[0m


INFO:nni.experiment:Successfully update searchSpace.


[2024-04-16 12:40:14] [32mCheckpoint saved to /root/nni-experiments/yw35cm94/checkpoint.[0m


INFO:nni.nas.experiment.experiment:Checkpoint saved to /root/nni-experiments/yw35cm94/checkpoint.


[2024-04-16 12:40:14] [32mExperiment initialized successfully. Starting exploration strategy...[0m


INFO:nni.nas.experiment.experiment:Experiment initialized successfully. Starting exploration strategy...










[2024-04-16 13:00:01] [32mStopping experiment, please wait...[0m


INFO:nni.nas.experiment.experiment:Stopping experiment, please wait...


[2024-04-16 13:00:01] [32mCheckpoint saved to /root/nni-experiments/yw35cm94/checkpoint.[0m


INFO:nni.nas.experiment.experiment:Checkpoint saved to /root/nni-experiments/yw35cm94/checkpoint.


KeyboardInterrupt: 

Export top models

In [29]:
for model_dict in exp.export_top_models(formatter='dict'):
    print(model_dict)

ConnectionError: HTTPConnectionPool(host='localhost', port=8081): Max retries exceeded with url: /api/v1/nni/trial-jobs (Caused by NewConnectionError('<urllib3.connection.HTTPConnection object at 0x7a8174cdd8a0>: Failed to establish a new connection: [Errno 111] Connection refused'))