In [27]:
import os
import time
import numpy as np
# para incluir os gráficos no nb
%matplotlib inline
import matplotlib.pyplot as plt

from PIL import Image
from sklearn.metrics import accuracy_score, confusion_matrix, classification_report
from sklearn.metrics import confusion_matrix, classification_report

import torch
from torch.utils.data import Dataset, DataLoader

from monai.transforms import Compose, LoadImage, AddChannel, ScaleIntensity, ToTensor, RandRotate, RandFlip, RandZoom
from monai.networks.nets import densenet121
from monai.metrics import compute_roc_auc
from monai.utils import set_determinism
from monai.config import print_config

from torchinfo import summary

import nni
np.random.seed(0)
set_determinism(seed=0)
print_config()

MONAI version: 1.1.0
Numpy version: 1.23.5
Pytorch version: 2.0.1
MONAI flags: HAS_EXT = False, USE_COMPILED = False, USE_META_DICT = False
MONAI rev id: a2ec3752f54bfc3b40e7952234fbeb5452ed63e3
MONAI __file__: c:\Users\Utilizador\anaconda3\envs\AP2\lib\site-packages\monai\__init__.py

Optional dependencies:
Pytorch Ignite version: NOT INSTALLED or UNKNOWN VERSION.
Nibabel version: NOT INSTALLED or UNKNOWN VERSION.
scikit-image version: NOT INSTALLED or UNKNOWN VERSION.
Pillow version: 9.4.0
Tensorboard version: 2.12.3
gdown version: NOT INSTALLED or UNKNOWN VERSION.
TorchVision version: 0.15.2+cpu
tqdm version: 4.65.0
lmdb version: NOT INSTALLED or UNKNOWN VERSION.
psutil version: 5.9.0
pandas version: 2.0.2
einops version: NOT INSTALLED or UNKNOWN VERSION.
transformers version: NOT INSTALLED or UNKNOWN VERSION.
mlflow version: NOT INSTALLED or UNKNOWN VERSION.
pynrrd version: NOT INSTALLED or UNKNOWN VERSION.

For details about installing the optional dependencies, please visit:
    

In [28]:
import torch
import torch.nn.functional as F
import nni.retiarii.nn.pytorch as nn
from nni.retiarii import model_wrapper

class DepthwiseSeparableConv(nn.Module):
    def __init__(self, in_ch, out_ch):
        super().__init__()
        self.depthwise = nn.Conv2d(in_ch, in_ch, kernel_size=3, groups=in_ch)
        self.pointwise = nn.Conv2d(in_ch, out_ch, kernel_size=1)

    def forward(self, x):
        return self.pointwise(self.depthwise(x))


@model_wrapper
class ModelSpace(nn.Module):
    def __init__(self):
        super().__init__()
        self.conv1 = nn.Conv2d(1, 32, 3, 1)
        # LayerChoice is used to select a layer between Conv2d and DwConv.
        self.conv2 = nn.Conv2d(32, 64, 3, 1)
            
        # ValueChoice is used to select a dropout rate.
        # ValueChoice can be used as parameter of modules wrapped in `nni.retiarii.nn.pytorch`
        # or customized modules wrapped with `@basic_unit`.
        self.dropout1 = nn.Dropout(nn.ValueChoice([0.25, 0.5, 0.75]))  # choose dropout rate from 0.25, 0.5 and 0.75
        self.dropout2 = nn.Dropout(0.5)
        feature = nn.ValueChoice([64, 128, 256])
        self.fc1 = nn.Linear(9216, feature)
        self.fc2 = nn.Linear(feature, 10)

    def forward(self, x):
        x = F.relu(self.conv1(x))
        x = F.max_pool2d(self.conv2(x), 2)
        x = torch.flatten(self.dropout1(x), 1)
        x = self.fc2(self.dropout2(F.relu(self.fc1(x))))
        output = F.log_softmax(x, dim=1)
        return output


model_space = ModelSpace()
model_space

ModelSpace(
  (conv1): Conv2d(1, 32, kernel_size=(3, 3), stride=(1, 1))
  (conv2): Conv2d(32, 64, kernel_size=(3, 3), stride=(1, 1))
  (dropout1): Dropout(p=0.25, inplace=False)
  (dropout2): Dropout(p=0.5, inplace=False)
  (fc1): Linear(in_features=9216, out_features=64, bias=True)
  (fc2): Linear(in_features=64, out_features=10, bias=True)
)

In [29]:
import nni.retiarii.strategy as strategy
search_strategy = strategy.Random(dedup=True)  # dedup=False if deduplication is not wanted

In [30]:
DATA_DIR = 'archive/images/'

#conta os ficheiros do dataset por label
def file_list(dir_path):
    class_names = sorted([x for x in os.listdir(dir_path) if os.path.isdir(os.path.join(dir_path, x))])
    num_class = len(class_names)
    image_files = [[os.path.join(dir_path, class_name, x) 
                    for x in os.listdir(os.path.join(dir_path, class_name))] 
                   for class_name in class_names] #constrói uma lista de listas de ficheiros por diretoria de classe
    image_file_list = list()
    image_label_list = list()
    for i, class_name in enumerate(class_names): #para juntar as listas e construir a lista com os labels em numérico
        image_file_list.extend(image_files[i])
        image_label_list.extend([i] * len(image_files[i]))
    return image_file_list, image_label_list, class_names

image_file_list, image_label_list, class_names = file_list(DATA_DIR)
print('Total image count:', len(image_label_list))
image_width, image_height = Image.open(image_file_list[0]).size
print("Image dimensions:", image_width, "x", image_height)
labels_cout= [image_label_list.count(x) for x in set(image_label_list)]
for i,label in enumerate(class_names):
    print(f"Label: {label:11}  {labels_cout[i]:5d} cases")

Total image count: 10500
Image dimensions: 256 x 256
Label: agricultural    500 cases
Label: airplane       500 cases
Label: baseballdiamond    500 cases
Label: beach          500 cases
Label: buildings      500 cases
Label: chaparral      500 cases
Label: denseresidential    500 cases
Label: forest         500 cases
Label: freeway        500 cases
Label: golfcourse     500 cases
Label: harbor         500 cases
Label: intersection    500 cases
Label: mediumresidential    500 cases
Label: mobilehomepark    500 cases
Label: overpass       500 cases
Label: parkinglot     500 cases
Label: river          500 cases
Label: runway         500 cases
Label: sparseresidential    500 cases
Label: storagetanks    500 cases
Label: tenniscourt    500 cases


In [31]:
from PIL import Image

def holdout_dataset(image_file_list, image_label_list):
    valid_frac, test_frac = 0.1, 0.1
    trainX, trainY = list(), list()
    valX, valY = list(), list()
    testX, testY = list(), list()
    for i in range(len(image_label_list)):
        image_path = image_file_list[i]
        image = Image.open(image_path)
        image = image.resize((32, 32))  # Resize image to 32x32
        image_array = np.array(image)
        if image_array.shape == (32, 32, 3):  # Check if the image has the desired size
            rann = np.random.random()
            if rann < valid_frac:
                valX.append(image_array)
                valY.append(image_label_list[i])
            elif rann < test_frac + valid_frac:
                testX.append(image_array)
                testY.append(image_label_list[i])
            else:
                trainX.append(image_array)
                trainY.append(image_label_list[i])
    return trainX, trainY, valX, valY, testX, testY


trainX, trainY, valX, valY, testX, testY = holdout_dataset(image_file_list,image_label_list)

print("Training count =",len(trainX))
print("Validation count =", len(valX))
print("Test count =",len(testX))

Training count = 8333
Validation count = 1074
Test count = 1093


In [32]:
from torchvision.transforms import Resize

train_transforms = Compose([
    LoadImage(image_only=True),
    AddChannel(),
    ScaleIntensity(),
    RandRotate(range_x=15, prob=0.5, keep_size=True),
    RandFlip(spatial_axis=0, prob=0.5),
    RandZoom(min_zoom=0.9, max_zoom=1.1, prob=0.5, keep_size=True),
    Resize((32, 32)),  # Resize images to 32x32
    ToTensor()
])

val_transforms = Compose([
    LoadImage(image_only=True),
    AddChannel(),
    ScaleIntensity(),
    Resize((32, 32)),  # Resize images to 32x32
    ToTensor()
])



In [33]:
BATCH_SIZE = 1024


class MedNISTDataset(torch.utils.data.Dataset):
    def __init__(self, image_files, labels, transforms):
        self.image_files = image_files
        self.labels = labels
        self.transforms = transforms
    def __len__(self):
        return len(self.image_files)
    def __getitem__(self, index):
        return self.transforms(self.image_files[index]), self.labels[index]


# test_ds = MedNISTDataset(testX, testY, val_transforms)
# test_dl = DataLoader(test_ds, batch_size=BATCH_SIZE, shuffle=True, num_workers=2)

In [34]:
import nni

from torchvision import transforms
from torchvision.datasets import MNIST
from torch.utils.data import DataLoader

@nni.trace
def train_epoch(model, device, train_loader, optimizer, epoch):
    loss_fn = torch.nn.CrossEntropyLoss()
    model.train()
    for batch_idx, (data, target) in enumerate(train_loader):
        data, target = data.to(device), target.to(device)
        optimizer.zero_grad()
        output = model(data)
        loss = loss_fn(output, target)
        loss.backward()
        optimizer.step()
        if batch_idx % 10 == 0:
            print('Train Epoch: {} [{}/{} ({:.0f}%)]\tLoss: {:.6f}'.format(
                epoch, batch_idx * len(data), len(train_loader.dataset),
                100. * batch_idx / len(train_loader), loss.item()))

@nni.trace
def test_epoch(model, device, test_loader):
    model.eval()
    test_loss = 0
    correct = 0
    with torch.no_grad():
        for data, target in test_loader:
            data, target = data.to(device), target.to(device)
            output = model(data)
            pred = output.argmax(dim=1, keepdim=True)
            correct += pred.eq(target.view_as(pred)).sum().item()

    test_loss /= len(test_loader.dataset)
    accuracy = 100. * correct / len(test_loader.dataset)

    print('\nTest set: Accuracy: {}/{} ({:.0f}%)\n'.format(
          correct, len(test_loader.dataset), accuracy))

    return accuracy

@nni.trace
def evaluate_model(model_cls):
    # "model_cls" is a class, need to instantiate
    model = model_cls()

    device = torch.device('cuda') if torch.cuda.is_available() else torch.device('cpu')
    model.to('cpu')

    optimizer = torch.optim.Adam(model.parameters(), lr=1e-3)
    
    train_ds = MedNISTDataset(trainX, trainY, train_transforms)
    train_dl = DataLoader(train_ds, batch_size=BATCH_SIZE, shuffle=True, num_workers=2)
    
    
    val_ds = MedNISTDataset(valX, valY, val_transforms)
    val_dl = DataLoader(val_ds, batch_size=BATCH_SIZE*2, num_workers=2)
    
    train_loader = train_dl
    test_loader = val_dl

    for epoch in range(3):
        # train the model for one epoch
        train_epoch(model, device, train_loader, optimizer, epoch)
        # test the model for one epoch
        accuracy = test_epoch(model, device, test_loader)
        # call report intermediate result. Result can be float or dict
        nni.report_intermediate_result(accuracy)

    # report final test result
    nni.report_final_result(accuracy)

In [35]:
import nni.retiarii.evaluator.pytorch.lightning as pl
from nni.retiarii import serialize
from torchvision import transforms
from torchvision.datasets import MNIST

transform = transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize((0.1307,), (0.3081,))
])

train_dataset = serialize(MedNISTDataset,trainX, trainY, train_transforms)
test_dataset = serialize(MedNISTDataset, testX, testY, val_transforms)

evaluator = pl.Classification(
    train_dataloader=pl.DataLoader(train_dataset, batch_size=100),
    val_dataloaders=pl.DataLoader(test_dataset, batch_size=100),
    max_epochs=10
)


2023-06-02 23:50:46,207 - GPU available: True (cuda), used: True
2023-06-02 23:50:46,207 - TPU available: False, using: 0 TPU cores
2023-06-02 23:50:46,207 - IPU available: False, using: 0 IPUs
2023-06-02 23:50:46,208 - HPU available: False, using: 0 HPUs


In [36]:
from nni.retiarii.experiment.pytorch import RetiariiExperiment, RetiariiExeConfig
exp = RetiariiExperiment(model_space, evaluator, [], search_strategy)
exp_config = RetiariiExeConfig('local')
exp_config.experiment_name = 'mnist_search'

In [37]:
exp_config.max_trial_number = 4   # spawn 4 trials at most
exp_config.trial_concurrency = 1  # will run two trials concurrently

In [38]:
exp_config.trial_gpu_number = 1
exp_config.training_service.use_active_gpu = True

In [39]:
exp.run(exp_config, 8081)

[2023-06-02 23:50:48] [32mCreating experiment, Experiment ID: [36mkzveobsn[0m
2023-06-02 23:50:48,401 - Creating experiment, Experiment ID: ${CYAN}kzveobsn
[2023-06-02 23:50:48] [32mStarting web server...[0m
2023-06-02 23:50:48,407 - Starting web server...
[2023-06-02 23:50:48] [32mSetting up...[0m
2023-06-02 23:50:48,973 - Setting up...
[2023-06-02 23:50:49] [32mWeb portal URLs: [36mhttp://192.168.0.218:8081 http://169.254.129.193:8081 http://192.168.33.1:8081 http://127.0.0.1:8081[0m
2023-06-02 23:50:49,057 - Web portal URLs: ${CYAN}http://192.168.0.218:8081 http://169.254.129.193:8081 http://192.168.33.1:8081 http://127.0.0.1:8081




[2023-06-02 23:50:49] [32mDispatcher started[0m




2023-06-02 23:50:49,062 - Dispatcher started
[2023-06-02 23:50:56] [32mStart strategy...[0m
2023-06-02 23:50:56,233 - Start strategy...
[2023-06-02 23:50:56] [32mSuccessfully update searchSpace.[0m
2023-06-02 23:50:56,266 - Successfully update searchSpace.
[2023-06-02 23:50:56] [32mRandom search running in fixed size mode. Dedup: on.[0m
2023-06-02 23:50:56,267 - Random search running in fixed size mode. Dedup: on.
2023-06-02 23:57:58,575 - KeyboardInterrupt detected
[2023-06-02 23:57:58] [32mStopping experiment, please wait...[0m
2023-06-02 23:57:58,576 - Stopping experiment, please wait...
[2023-06-02 23:57:58] [32mDispatcher exiting...[0m
2023-06-02 23:57:58,596 - Dispatcher exiting...
[2023-06-02 23:58:00] [32mDispatcher terminiated[0m
2023-06-02 23:58:00,512 - Dispatcher terminiated
[2023-06-02 23:58:00] [32mExperiment stopped[0m
2023-06-02 23:58:00,514 - Experiment stopped
[2023-06-02 23:58:00] [32mSearch process is done, the experiment is still alive, `stop()` can 

In [None]:
import os
from pathlib import Path


def evaluate_model_with_visualization(model_cls):
    model = model_cls()
    # dump the model into an onnx
    if 'NNI_OUTPUT_DIR' in os.environ:
        dummy_input = torch.zeros(1, 3, 32, 32)
        torch.onnx.export(model, (dummy_input, ),
                          Path(os.environ['NNI_OUTPUT_DIR']) / 'model.onnx')
    evaluate_model(model_cls)

In [None]:
for model_dict in exp.export_top_models(formatter='dict'):
    print(model_dict)