importing necessary libraries

In [1]:
import torch
import torch.nn as nn
import torch.nn.functional as F
import nni
from nni.nas.nn.pytorch import LayerChoice, ModelSpace, MutableDropout, MutableLinear
import nni.nas.strategy as strategy
from nni.nas.strategy.middleware import Filter, Chain
from nni.nas.profiler.pytorch.flops import NumParamsProfiler
from torchvision import datasets,transforms
from torch.utils.data import DataLoader
from nni.nas.evaluator import FunctionalEvaluator
from nni.nas.experiment import NasExperiment
import time
device = torch.device('cuda') if torch.cuda.is_available() else torch.device('cpu')

  interpolation: int = Image.BILINEAR,
  interpolation: int = Image.NEAREST,
  interpolation: int = Image.BICUBIC,


Define Model Space with Model

In [2]:
class MyModelSpace(ModelSpace):
    def __init__(self):
        super().__init__()
        self.conv1 = LayerChoice([
            nn.Conv2d(1, 32, 3, 1),
            nn.Conv2d(1,32,5,1,1),],label = "conv1")
        
        self.conv2 = LayerChoice([
            nn.Conv2d(32, 64,3,1),
            nn.Conv2d(32, 64,5,1,1),
            
        ], label='conv2')
        self.pool = nn.MaxPool2d(kernel_size=2)
        self.dropout1 = MutableDropout(nni.choice('dropout', [0.25, 0.5, 0.75]))  
        self.dropout2 = nn.Dropout(0.5)
        feature = nni.choice('feature', [64, 128, 256])
        self.fc1 = MutableLinear(9216, feature)
        self.fc2 = MutableLinear(feature, 10)

    def forward(self, x):
        x = self.conv1(x)
        x = F.relu(x)
        x = self.conv2(x)
        x = self.pool(x)
        x = torch.flatten(self.dropout1(x), 1)
        x = F.relu(self.fc1(x))
        x = self.dropout2(x)
        x = self.fc2(x)
        output = F.log_softmax(x, dim=1)
        return output


model_space = MyModelSpace()


Function to get memory of Model

In [3]:
def get_model_size(model):
    num_params = sum(p.numel() for p in model.parameters() if p.requires_grad)
    num_buffers = sum(b.numel() for b in model.buffers())
    total_size = num_params + num_buffers
    return total_size * 4 / (1024 ** 2)

Function to evaluate a model and report performance metric to NNI

In [4]:
def evaluate_model(model):
    
    model.to(device)

    optimizer = torch.optim.Adam(model.parameters(), lr=1e-3)
    transf = transforms.Compose([
    transforms.ToTensor(),  # Convert image to tensor
    transforms.Normalize((0.5,), (0.5,))  # Normalize the pixel values
])
    train_dataset = datasets.FashionMNIST(root='./data', train=True, transform=transf, download=True)
    test_dataset = datasets.FashionMNIST(root='./data', train=False, transform=transf   , download=True)


    batch_size = 64


    train_loader = DataLoader(dataset=train_dataset, batch_size=batch_size, shuffle=True)
    test_loader = DataLoader(dataset=test_dataset, batch_size=batch_size, shuffle=False)        

    for epoch in range(3):
        
        dict={"default":0.0,"accuracy":0.0,"latency":0.0,"memory":0.0}
        
        loss_fn = torch.nn.CrossEntropyLoss()
        model.train()
        for batch_idx, (data, target) in enumerate(train_loader):
            data, target = data.to(device), target.to(device)
            optimizer.zero_grad()
            output = model(data)
            loss = loss_fn(output, target)
            loss.backward()
            optimizer.step()
        
        tim1=time.time()
        
        model.eval()
        correct = 0
        with torch.no_grad():
            for data, target in test_loader:
                data, target = data.to(device), target.to(device)
                output = model(data)
                pred = output.argmax(dim=1, keepdim=True)
                correct += pred.eq(target.view_as(pred)).sum().item()

        
        accuracy = 100. * correct / len(test_loader.dataset)
        
        time2 = time.time()-tim1
        mem = get_model_size(model)
        met = ((accuracy/100)-((time2-1)/(2-1))-((mem-2)/(10-2)))*100
        dict['default']=float(met)
        dict['accuracy']=accuracy
        dict['latency']=time2
        dict["memory"]=mem
        
        nni.report_intermediate_result(dict)

    
    nni.report_final_result(dict)

Create the evaluator of NNI

In [5]:
evaluator = FunctionalEvaluator(evaluate_model)

Defining  different stratergies to evalaute

In [6]:
search_strategy1 = strategy.Random() 
search_strategy2 = strategy.RegularizedEvolution()  
search_strategy3 = strategy.TPE()  

[2024-05-05 18:06:25] [32mUsing random seed 2111768997[0m


Defining Experiment for different stratergies

In [7]:
experiment1 = NasExperiment(model_space, evaluator, search_strategy1)
experiment2 = NasExperiment(model_space, evaluator, search_strategy2)
experiment3 = NasExperiment(model_space, evaluator, search_strategy3)

[2024-05-05 18:06:25] [32mConfig is not provided. Will try to infer.[0m
[2024-05-05 18:06:25] [32mUsing execution engine based on training service. Trial concurrency is set to 1.[0m
[2024-05-05 18:06:25] [32mUsing simplified model format.[0m
[2024-05-05 18:06:25] [32mUsing local training service.[0m
[2024-05-05 18:06:25] [32mConfig is not provided. Will try to infer.[0m
[2024-05-05 18:06:25] [32mUsing execution engine based on training service. Trial concurrency is set to 1.[0m
[2024-05-05 18:06:25] [32mUsing simplified model format.[0m
[2024-05-05 18:06:25] [32mUsing local training service.[0m
[2024-05-05 18:06:25] [32mConfig is not provided. Will try to infer.[0m
[2024-05-05 18:06:25] [32mUsing execution engine based on training service. Trial concurrency is set to 1.[0m
[2024-05-05 18:06:25] [32mUsing simplified model format.[0m
[2024-05-05 18:06:25] [32mUsing local training service.[0m


Running experiments on differnt ports

In [8]:
experiment1.run(port=8085)

[2024-05-05 18:06:26] [32mCreating experiment, Experiment ID: [36mr9iv4tkb[0m
[2024-05-05 18:06:26] [32mStarting web server...[0m
[2024-05-05 18:06:28] [32mSetting up...[0m
[2024-05-05 18:06:29] [32mWeb portal URLs: [36mhttp://169.254.55.27:8085 http://169.254.97.166:8085 http://192.168.56.1:8085 http://169.254.83.81:8085 http://169.254.82.251:8085 http://169.254.92.2:8085 http://172.31.25.119:8085 http://169.254.253.36:8085 http://169.254.191.241:8085 http://127.0.0.1:8085[0m
[2024-05-05 18:06:29] [32mSuccessfully update searchSpace.[0m
[2024-05-05 18:06:29] [32mCheckpoint saved to C:\Users\azhar\nni-experiments\r9iv4tkb\checkpoint.[0m
[2024-05-05 18:06:29] [32mExperiment initialized successfully. Starting exploration strategy...[0m
[2024-05-05 18:16:13] [32mWaiting for models submitted to engine to finish...[0m
[2024-05-05 18:17:16] [32mExperiment is completed.[0m
[2024-05-05 18:17:16] [32mSearch process is done. You can put an `time.sleep(FOREVER)` here to block

True

In [9]:
experiment2.run(port=8086)

[2024-05-05 18:17:18] [32mCreating experiment, Experiment ID: [36mk4gef6vw[0m
[2024-05-05 18:17:18] [32mStarting web server...[0m
[2024-05-05 18:17:19] [32mSetting up...[0m
[2024-05-05 18:17:20] [32mWeb portal URLs: [36mhttp://169.254.55.27:8086 http://169.254.97.166:8086 http://192.168.56.1:8086 http://169.254.83.81:8086 http://169.254.82.251:8086 http://169.254.92.2:8086 http://172.31.25.119:8086 http://169.254.253.36:8086 http://169.254.191.241:8086 http://127.0.0.1:8086[0m
[2024-05-05 18:17:20] [32mSuccessfully update searchSpace.[0m
[2024-05-05 18:17:20] [32mCheckpoint saved to C:\Users\azhar\nni-experiments\k4gef6vw\checkpoint.[0m
[2024-05-05 18:17:20] [32mExperiment initialized successfully. Starting exploration strategy...[0m
[2024-05-05 18:17:20] [32mSpawning the initial population. 100 individuals to go.[0m
[2024-05-05 18:17:20] [32m[Individual    1] Random: {'conv1': 0, 'conv2': 1, 'dropout': 0.5, 'feature': 128}[0m
[2024-05-05 18:18:25] [32m[Metric] -65

In [None]:
experiment3.run(port=8087)

[2024-05-05 17:26:17] [32mCreating experiment, Experiment ID: [36mm1xv6sn9[0m
[2024-05-05 17:26:17] [32mStarting web server...[0m
[2024-05-05 17:26:18] [32mSetting up...[0m
[2024-05-05 17:26:19] [32mWeb portal URLs: [36mhttp://169.254.55.27:8087 http://169.254.97.166:8087 http://192.168.56.1:8087 http://169.254.83.81:8087 http://169.254.82.251:8087 http://169.254.92.2:8087 http://172.31.25.119:8087 http://169.254.253.36:8087 http://169.254.191.241:8087 http://127.0.0.1:8087[0m
[2024-05-05 17:26:19] [32mSuccessfully update searchSpace.[0m
[2024-05-05 17:26:19] [32mCheckpoint saved to C:\Users\azhar\nni-experiments\m1xv6sn9\checkpoint.[0m
[2024-05-05 17:26:19] [32mExperiment initialized successfully. Starting exploration strategy...[0m
[2024-05-05 17:38:48] [32mTuning algorithm generated duplicate parameter: {('conv1',): 0, ('conv2',): 1, ('dropout',): 2, ('feature',): 1}[0m
[2024-05-05 17:38:48] [32mUse grid search for deduplication.[0m
[2024-05-05 17:38:48] [32mGri