# Install AIHWKIT
- This installation is for a GPU enabled Google Colab environment.
If you're using it on your local machine, please refer to the [documentation](https://aihwkit.readthedocs.io/en/latest/install.html)


In [None]:
!wget https://aihwkit-gpu-demo.s3.us-east.cloud-object-storage.appdomain.cloud/aihwkit-0.8.0+cuda117-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl
!pip install aihwkit-0.8.0+cuda117-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl

# Install monai package
- For advanced settings, please refer to the [documentation](https://docs.monai.io/en/latest/installation.html#installation-guide)

In [None]:
!python -c "import monai" || pip install -q "monai-weekly[gdown, nibabel, tqdm, ignite, einops]"
!python -c "import matplotlib" || pip install -q matplotlib
%matplotlib inline

2023-10-30 11:35:56.492771: E tensorflow/compiler/xla/stream_executor/cuda/cuda_dnn.cc:9342] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
2023-10-30 11:35:56.492827: E tensorflow/compiler/xla/stream_executor/cuda/cuda_fft.cc:609] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
2023-10-30 11:35:56.492864: E tensorflow/compiler/xla/stream_executor/cuda/cuda_blas.cc:1518] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered
2023-10-30 11:35:56.500947: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: AVX2 FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.


# Dataset download
- We use the same transformations and dependencies as [nested-unet](https://github.com/4uiiurz1/pytorch-nested-unet)(UNet++), so we clone the repo and use the same repo structure

In [None]:
!git clone https://github.com/4uiiurz1/pytorch-nested-unet

fatal: destination path 'pytorch-nested-unet' already exists and is not an empty directory.


In [None]:
%cd pytorch-nested-unet

/content/pytorch-nested-unet


In [None]:
!pip install -r requirements.txt

# Mount dataset from kaggle
- Upload your kaggle.json file and execute the following cells, for more details please refer to this [blog](https://www.analyticsvidhya.com/blog/2021/06/how-to-load-kaggle-datasets-directly-into-google-colab/)

In [None]:
!mkdir ~/.kaggle
!cp /content/kaggle.json ~/.kaggle
!chmod 600 ~/.kaggle/kaggle.json
!kaggle competitions download data-science-bowl-2018

mkdir: cannot create directory ‘/root/.kaggle’: File exists
Downloading data-science-bowl-2018.zip to /content/pytorch-nested-unet
 97% 349M/358M [00:03<00:00, 137MB/s]
100% 358M/358M [00:03<00:00, 119MB/s]


In [None]:
!unzip /content/pytorch-nested-unet/data-science-bowl-2018.zip -d /content/pytorch-nested-unet/inputs
!unzip /content/pytorch-nested-unet/inputs/stage1_train.zip -d /content/pytorch-nested-unet/inputs/stage1_train/
!unzip /content/pytorch-nested-unet/inputs/stage1_test.zip -d /content/pytorch-nested-unet/inputs/stage1_test/
!unzip /content/pytorch-nested-unet/inputs/stage1_train_labels.csv.zip -d /content/pytorch-nested-unet/inputs/stage1_train_labels/

In [None]:
%cd /content/pytorch-nested-unet/
!pwd

/content/pytorch-nested-unet
/content/pytorch-nested-unet


# Data loading in Colab:

In [None]:
import os
from glob import glob

import cv2
import numpy as np
from tqdm import tqdm


def main():
    img_size = 96

    paths = glob('/content/pytorch-nested-unet/inputs/stage1_train/*')

    os.makedirs('inputs/dsb2018_%d/images' % img_size, exist_ok=True)
    os.makedirs('inputs/dsb2018_%d/masks/0' % img_size, exist_ok=True)
    print("here")
    for i in tqdm(range(len(paths))):
        print(i)
        path = paths[i]
        img = cv2.imread(os.path.join(path, 'images',#
                         os.path.basename(path) + '.png'))
        mask = np.zeros((img.shape[0], img.shape[1]))
        for mask_path in glob(os.path.join(path, 'masks', '*')):
            mask_ = cv2.imread(mask_path, cv2.IMREAD_GRAYSCALE) > 127
            mask[mask_] = 1
        if len(img.shape) == 2:
            img = np.tile(img[..., None], (1, 1, 3))
        if img.shape[2] == 4:
            img = img[..., :3]
        img = cv2.resize(img, (img_size, img_size))
        mask = cv2.resize(mask, (img_size, img_size))
        cv2.imwrite(os.path.join('inputs/dsb2018_%d/images' % img_size,
                    os.path.basename(path) + '.png'), img)
        cv2.imwrite(os.path.join('inputs/dsb2018_%d/masks/0' % img_size,
                    os.path.basename(path) + '.png'), (mask * 255).astype('uint8'))


if __name__ == '__main__':
    main()


# Helper functions:

In [None]:
import os
import cv2
import numpy as np
import torch
import torch.utils.data


class Dataset(torch.utils.data.Dataset):
    def __init__(self, img_ids, img_dir, mask_dir, img_ext, mask_ext, num_classes, transform=None):
        """
        Args:
            img_ids (list): Image ids.
            img_dir: Image file directory.
            mask_dir: Mask file directory.
            img_ext (str): Image file extension.
            mask_ext (str): Mask file extension.
            num_classes (int): Number of classes.
            transform (Compose, optional): Compose transforms of albumentations. Defaults to None.

        Note:
            Make sure to put the files as the following structure:
            <dataset name>
            ├── images
            |   ├── 0a7e06.jpg
            │   ├── 0aab0a.jpg
            │   ├── 0b1761.jpg
            │   ├── ...
            |
            └── masks
                ├── 0
                |   ├── 0a7e06.png
                |   ├── 0aab0a.png
                |   ├── 0b1761.png
                |   ├── ...
                |
                ├── 1
                |   ├── 0a7e06.png
                |   ├── 0aab0a.png
                |   ├── 0b1761.png
                |   ├── ...
                ...
        """
        self.img_ids = img_ids
        self.img_dir = img_dir
        self.mask_dir = mask_dir
        self.img_ext = img_ext
        self.mask_ext = mask_ext
        self.num_classes = num_classes
        self.transform = transform

    def __len__(self):
        return len(self.img_ids)

    def __getitem__(self, idx):
        img_id = self.img_ids[idx]

        img = cv2.imread(os.path.join(self.img_dir, img_id + self.img_ext))

        mask = []
        for i in range(self.num_classes):
            mask.append(cv2.imread(os.path.join(self.mask_dir, str(i),
                        img_id + self.mask_ext), cv2.IMREAD_GRAYSCALE)[..., None])
        mask = np.dstack(mask)

        if self.transform is not None:
            augmented = self.transform(image=img, mask=mask)
            img = augmented['image']
            mask = augmented['mask']

        img = img.astype('float32') / 255
        img = img.transpose(2, 0, 1)
        mask = mask.astype('float32') / 255
        mask = mask.transpose(2, 0, 1)

        return img, mask, {'img_id': img_id}

# Metric functions


In [None]:
import numpy as np
import torch
import torch.nn.functional as F


def iou_score(output, target):
    smooth = 1e-5

    if torch.is_tensor(output):
        output = torch.sigmoid(output).data.cpu().numpy()
    if torch.is_tensor(target):
        target = target.data.cpu().numpy()
    output_ = output > 0.5
    target_ = target > 0.5
    intersection = (output_ & target_).sum()
    union = (output_ | target_).sum()

    return (intersection + smooth) / (union + smooth)


def dice_coef(output, target):
    smooth = 1e-5

    output = torch.sigmoid(output).view(-1).data.cpu().numpy()
    target = target.view(-1).data.cpu().numpy()
    intersection = (output * target).sum()

    return (2. * intersection + smooth) / \
        (output.sum() + target.sum() + smooth)

In [None]:
import argparse


def str2bool(v):
    if v.lower() in ['true', 1]:
        return True
    elif v.lower() in ['false', 0]:
        return False
    else:
        raise argparse.ArgumentTypeError('Boolean value expected.')


def count_params(model):
    return sum(p.numel() for p in model.parameters() if p.requires_grad)


class AverageMeter(object):
    """Computes and stores the average and current value"""

    def __init__(self):
        self.reset()

    def reset(self):
        self.val = 0
        self.avg = 0
        self.sum = 0
        self.count = 0

    def update(self, val, n=1):
        self.val = val
        self.sum += val * n
        self.count += n
        self.avg = self.sum / self.count

# Loss functions

In [None]:
import torch
import torch.nn as nn
import torch.nn.functional as F

try:
    from LovaszSoftmax.pytorch.lovasz_losses import lovasz_hinge
except ImportError:
    pass




class BCEDiceLoss(nn.Module):
    def __init__(self):
        super().__init__()

    def forward(self, input, target):
        bce = F.binary_cross_entropy_with_logits(input, target)
        smooth = 1e-5
        input = torch.sigmoid(input)
        num = target.size(0)
        input = input.view(num, -1)
        target = target.view(num, -1)
        intersection = (input * target)
        dice = (2. * intersection.sum(1) + smooth) / (input.sum(1) + target.sum(1) + smooth)
        dice = 1 - dice.sum() / num
        return 0.5 * bce + dice


class LovaszHingeLoss(nn.Module):
    def __init__(self):
        super().__init__()

    def forward(self, input, target):
        input = input.squeeze(1)
        target = target.squeeze(1)
        loss = lovasz_hinge(input, target, per_image=True)

        return loss

# Nested UNet
Implementation taken from this [repository](https://)

# Configuration of parameters:

In [None]:
config = {
        'name': None,
        'epochs': 200,
        'batch_size': 64,
        'arch': 'NestedUNet',
        'deep_supervision': False,
        'input_channels': 3,
        'num_classes': 1,
        'input_w': 96,
        'input_h': 96,
        'loss': 'BCEDiceLoss',
        'dataset': 'dsb2018_96',
        'img_ext': '.png',
        'mask_ext': '.png',
        'optimizer': 'SGD',
        'lr': 1e-3,
        'momentum': 0.9,
        'weight_decay': 1e-4,
        'nesterov': False,
        'scheduler': 'CosineAnnealingLR',
        'min_lr': 1e-5,
        'factor': 0.1,
        'patience': 2,
        'milestones': '1,2',
        'gamma': 2/3,
        'early_stopping': -1,
        'num_workers': 4
    }

In [None]:
import torch
from torch import nn



class VGGBlock(nn.Module):
    def __init__(self, in_channels, middle_channels, out_channels):
        super().__init__()
        self.relu = nn.ReLU(inplace=True)
        self.conv1 = nn.Conv2d(in_channels, middle_channels, 3, padding=1)
        self.bn1 = nn.BatchNorm2d(middle_channels)
        self.conv2 = nn.Conv2d(middle_channels, out_channels, 3, padding=1)
        self.bn2 = nn.BatchNorm2d(out_channels)

    def forward(self, x):
        out = self.conv1(x)
        out = self.bn1(out)
        out = self.relu(out)

        out = self.conv2(out)
        out = self.bn2(out)
        out = self.relu(out)

        return out


class UNet(nn.Module):
    def __init__(self, num_classes, input_channels=3, **kwargs):
        super().__init__()

        nb_filter = [32, 64, 128, 256, 512]

        self.pool = nn.MaxPool2d(2, 2)
        self.up = nn.Upsample(scale_factor=2, mode='bilinear', align_corners=True)

        self.conv0_0 = VGGBlock(input_channels, nb_filter[0], nb_filter[0])
        self.conv1_0 = VGGBlock(nb_filter[0], nb_filter[1], nb_filter[1])
        self.conv2_0 = VGGBlock(nb_filter[1], nb_filter[2], nb_filter[2])
        self.conv3_0 = VGGBlock(nb_filter[2], nb_filter[3], nb_filter[3])
        self.conv4_0 = VGGBlock(nb_filter[3], nb_filter[4], nb_filter[4])

        self.conv3_1 = VGGBlock(nb_filter[3]+nb_filter[4], nb_filter[3], nb_filter[3])
        self.conv2_2 = VGGBlock(nb_filter[2]+nb_filter[3], nb_filter[2], nb_filter[2])
        self.conv1_3 = VGGBlock(nb_filter[1]+nb_filter[2], nb_filter[1], nb_filter[1])
        self.conv0_4 = VGGBlock(nb_filter[0]+nb_filter[1], nb_filter[0], nb_filter[0])

        self.final = nn.Conv2d(nb_filter[0], num_classes, kernel_size=1)


    def forward(self, input):
        x0_0 = self.conv0_0(input)
        x1_0 = self.conv1_0(self.pool(x0_0))
        x2_0 = self.conv2_0(self.pool(x1_0))
        x3_0 = self.conv3_0(self.pool(x2_0))
        x4_0 = self.conv4_0(self.pool(x3_0))

        x3_1 = self.conv3_1(torch.cat([x3_0, self.up(x4_0)], 1))
        x2_2 = self.conv2_2(torch.cat([x2_0, self.up(x3_1)], 1))
        x1_3 = self.conv1_3(torch.cat([x1_0, self.up(x2_2)], 1))
        x0_4 = self.conv0_4(torch.cat([x0_0, self.up(x1_3)], 1))

        output = self.final(x0_4)
        return output


class NestedUNet(nn.Module):
    def __init__(self, num_classes, input_channels=3, deep_supervision=False, **kwargs):
        super().__init__()

        nb_filter = [32, 64, 128, 256, 512]

        self.deep_supervision = deep_supervision

        self.pool = nn.MaxPool2d(2, 2)
        self.up = nn.Upsample(scale_factor=2, mode='bilinear', align_corners=True)

        self.conv0_0 = VGGBlock(input_channels, nb_filter[0], nb_filter[0])
        self.conv1_0 = VGGBlock(nb_filter[0], nb_filter[1], nb_filter[1])
        self.conv2_0 = VGGBlock(nb_filter[1], nb_filter[2], nb_filter[2])
        self.conv3_0 = VGGBlock(nb_filter[2], nb_filter[3], nb_filter[3])
        self.conv4_0 = VGGBlock(nb_filter[3], nb_filter[4], nb_filter[4])

        self.conv0_1 = VGGBlock(nb_filter[0]+nb_filter[1], nb_filter[0], nb_filter[0])
        self.conv1_1 = VGGBlock(nb_filter[1]+nb_filter[2], nb_filter[1], nb_filter[1])
        self.conv2_1 = VGGBlock(nb_filter[2]+nb_filter[3], nb_filter[2], nb_filter[2])
        self.conv3_1 = VGGBlock(nb_filter[3]+nb_filter[4], nb_filter[3], nb_filter[3])

        self.conv0_2 = VGGBlock(nb_filter[0]*2+nb_filter[1], nb_filter[0], nb_filter[0])
        self.conv1_2 = VGGBlock(nb_filter[1]*2+nb_filter[2], nb_filter[1], nb_filter[1])
        self.conv2_2 = VGGBlock(nb_filter[2]*2+nb_filter[3], nb_filter[2], nb_filter[2])

        self.conv0_3 = VGGBlock(nb_filter[0]*3+nb_filter[1], nb_filter[0], nb_filter[0])
        self.conv1_3 = VGGBlock(nb_filter[1]*3+nb_filter[2], nb_filter[1], nb_filter[1])

        self.conv0_4 = VGGBlock(nb_filter[0]*4+nb_filter[1], nb_filter[0], nb_filter[0])

        if self.deep_supervision:
            self.final1 = nn.Conv2d(nb_filter[0], num_classes, kernel_size=1)
            self.final2 = nn.Conv2d(nb_filter[0], num_classes, kernel_size=1)
            self.final3 = nn.Conv2d(nb_filter[0], num_classes, kernel_size=1)
            self.final4 = nn.Conv2d(nb_filter[0], num_classes, kernel_size=1)
        else:
            self.final = nn.Conv2d(nb_filter[0], num_classes, kernel_size=1)


    def forward(self, input):
        x0_0 = self.conv0_0(input)
        x1_0 = self.conv1_0(self.pool(x0_0))
        x0_1 = self.conv0_1(torch.cat([x0_0, self.up(x1_0)], 1))

        x2_0 = self.conv2_0(self.pool(x1_0))
        x1_1 = self.conv1_1(torch.cat([x1_0, self.up(x2_0)], 1))
        x0_2 = self.conv0_2(torch.cat([x0_0, x0_1, self.up(x1_1)], 1))

        x3_0 = self.conv3_0(self.pool(x2_0))
        x2_1 = self.conv2_1(torch.cat([x2_0, self.up(x3_0)], 1))
        x1_2 = self.conv1_2(torch.cat([x1_0, x1_1, self.up(x2_1)], 1))
        x0_3 = self.conv0_3(torch.cat([x0_0, x0_1, x0_2, self.up(x1_2)], 1))

        x4_0 = self.conv4_0(self.pool(x3_0))
        x3_1 = self.conv3_1(torch.cat([x3_0, self.up(x4_0)], 1))
        x2_2 = self.conv2_2(torch.cat([x2_0, x2_1, self.up(x3_1)], 1))
        x1_3 = self.conv1_3(torch.cat([x1_0, x1_1, x1_2, self.up(x2_2)], 1))
        x0_4 = self.conv0_4(torch.cat([x0_0, x0_1, x0_2, x0_3, self.up(x1_3)], 1))

        if self.deep_supervision:
            output1 = self.final1(x0_1)
            output2 = self.final2(x0_2)
            output3 = self.final3(x0_3)
            output4 = self.final4(x0_4)
            return [output1, output2, output3, output4]

        else:
            output = self.final(x0_4)
            return output

# Training of digital model

In [None]:
import argparse
import os
from collections import OrderedDict
from glob import glob

import pandas as pd
import torch
import torch.backends.cudnn as cudnn
import torch.nn as nn
import torch.optim as optim
import yaml
from albumentations.augmentations import transforms
from albumentations.core.composition import Compose, OneOf
from sklearn.model_selection import train_test_split
from torch.optim import lr_scheduler
from tqdm import tqdm
from albumentations.augmentations.geometric.rotate import RandomRotate90
from albumentations.augmentations.geometric import Flip, Resize



ARCH_NAMES = ['UNet', 'NestedUNet']
LOSS_NAMES = ['BCEDiceLoss', 'LovaszHingeLoss']
LOSS_NAMES.append('BCEWithLogitsLoss')



def train(config, train_loader, model, criterion, optimizer):
    avg_meters = {'loss': AverageMeter(),
                  'iou': AverageMeter()}

    model.train()

    pbar = tqdm(total=len(train_loader))
    for input, target, _ in train_loader:
        input = input.cuda()
        target = target.cuda()

        # compute output
        if config['deep_supervision']:
            outputs = model(input)
            loss = 0
            for output in outputs:
                loss += criterion(output, target)
            loss /= len(outputs)
            iou = iou_score(outputs[-1], target)
        else:
            output = model(input)
            loss = criterion(output, target)
            iou = iou_score(output, target)

        # compute gradient and do optimizing step
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        avg_meters['loss'].update(loss.item(), input.size(0))
        avg_meters['iou'].update(iou, input.size(0))

        postfix = OrderedDict([
            ('loss', avg_meters['loss'].avg),
            ('iou', avg_meters['iou'].avg),
        ])
        pbar.set_postfix(postfix)
        pbar.update(1)
    pbar.close()

    return OrderedDict([('loss', avg_meters['loss'].avg),
                        ('iou', avg_meters['iou'].avg)])


def validate(config, val_loader, model, criterion):
    avg_meters = {'loss': AverageMeter(),
                  'iou': AverageMeter()}

    # switch to evaluate mode
    model.eval()

    with torch.no_grad():
        pbar = tqdm(total=len(val_loader))
        for input, target, _ in val_loader:
            input = input.cuda()
            target = target.cuda()

            # compute output
            if config['deep_supervision']:
                outputs = model(input)
                loss = 0
                for output in outputs:
                    loss += criterion(output, target)
                loss /= len(outputs)
                iou = iou_score(outputs[-1], target)
            else:
                output = model(input)
                loss = criterion(output, target)
                iou = iou_score(output, target)

            avg_meters['loss'].update(loss.item(), input.size(0))
            avg_meters['iou'].update(iou, input.size(0))

            postfix = OrderedDict([
                ('loss', avg_meters['loss'].avg),
                ('iou', avg_meters['iou'].avg),
            ])
            pbar.set_postfix(postfix)
            pbar.update(1)
        pbar.close()

    return OrderedDict([('loss', avg_meters['loss'].avg),
                        ('iou', avg_meters['iou'].avg)])


if config['name'] is None:
    if config['deep_supervision']:
        config['name'] = '%s_%s_wDS' % (config['dataset'], config['arch'])
    else:
        config['name'] = '%s_%s_woDS' % (config['dataset'], config['arch'])
os.makedirs('models/%s' % config['name'], exist_ok=True)

print('-' * 20)
for key in config:
    print('%s: %s' % (key, config[key]))
print('-' * 20)

with open('models/%s/config.yml' % config['name'], 'w') as f:
    yaml.dump(config, f)

# define loss function (criterion)
if config['loss'] == 'BCEWithLogitsLoss':
    criterion = nn.BCEWithLogitsLoss().cuda()
elif config['loss'] == 'BCEDiceLoss':
    criterion = BCEDiceLoss().cuda()
else:
    criterion = LovaszHingeLoss().cuda()

cudnn.benchmark = True

# create model
print("=> creating model %s" % config['arch'])
if config['arch'] == "NestedUNet":
  model = NestedUNet(config['num_classes'],config['input_channels'],
                                        config['deep_supervision'])

model = model.cuda()

params = filter(lambda p: p.requires_grad, model.parameters())
if config['optimizer'] == 'Adam':
    optimizer = optim.Adam(
        params, lr=config['lr'], weight_decay=config['weight_decay'])
elif config['optimizer'] == 'SGD':
    optimizer = optim.SGD(params, lr=config['lr'], momentum=config['momentum'],
                          nesterov=config['nesterov'], weight_decay=config['weight_decay'])
else:
    raise NotImplementedError

if config['scheduler'] == 'CosineAnnealingLR':
    scheduler = lr_scheduler.CosineAnnealingLR(
        optimizer, T_max=config['epochs'], eta_min=config['min_lr'])
elif config['scheduler'] == 'ReduceLROnPlateau':
    scheduler = lr_scheduler.ReduceLROnPlateau(optimizer, factor=config['factor'], patience=config['patience'],
                                                verbose=1, min_lr=config['min_lr'])
elif config['scheduler'] == 'MultiStepLR':
    scheduler = lr_scheduler.MultiStepLR(optimizer, milestones=[int(e) for e in config['milestones'].split(',')], gamma=config['gamma'])
elif config['scheduler'] == 'ConstantLR':
    scheduler = None
else:
    raise NotImplementedError

# Data loading code
img_ids = glob(os.path.join('inputs', config['dataset'], 'images', '*' + config['img_ext']))
img_ids = [os.path.splitext(os.path.basename(p))[0] for p in img_ids]

train_img_ids, val_img_ids = train_test_split(img_ids, test_size=0.2, random_state=41)

train_transform = Compose([
    RandomRotate90(),
    Flip(),
    OneOf([
        transforms.HueSaturationValue(),
        transforms.RandomBrightness(),
        transforms.RandomContrast(),
    ], p=1),
    Resize(config['input_h'], config['input_w']),
    transforms.Normalize(),
])

val_transform = Compose([
    Resize(config['input_h'], config['input_w']),
    transforms.Normalize(),
])

train_dataset = Dataset(
    img_ids=train_img_ids,
    img_dir=os.path.join('inputs', config['dataset'], 'images'),
    mask_dir=os.path.join('inputs', config['dataset'], 'masks'),
    img_ext=config['img_ext'],
    mask_ext=config['mask_ext'],
    num_classes=config['num_classes'],
    transform=train_transform)
val_dataset = Dataset(
    img_ids=val_img_ids,
    img_dir=os.path.join('inputs', config['dataset'], 'images'),
    mask_dir=os.path.join('inputs', config['dataset'], 'masks'),
    img_ext=config['img_ext'],
    mask_ext=config['mask_ext'],
    num_classes=config['num_classes'],
    transform=val_transform)

train_loader = torch.utils.data.DataLoader(
    train_dataset,
    batch_size=config['batch_size'],
    shuffle=True,
    num_workers=config['num_workers'],
    drop_last=True)
val_loader = torch.utils.data.DataLoader(
    val_dataset,
    batch_size=config['batch_size'],
    shuffle=False,
    num_workers=config['num_workers'],
    drop_last=False)

log = OrderedDict([
    ('epoch', []),
    ('lr', []),
    ('loss', []),
    ('iou', []),
    ('val_loss', []),
    ('val_iou', []),
])

best_iou = 0
trigger = 0
for epoch in range(config['epochs']):
    print('Epoch [%d/%d]' % (epoch, config['epochs']))

    # train for one epoch
    train_log = train(config, train_loader, model, criterion, optimizer)
    # evaluate on validation set
    val_log = validate(config, val_loader, model, criterion)

    if config['scheduler'] == 'CosineAnnealingLR':
        scheduler.step()
    elif config['scheduler'] == 'ReduceLROnPlateau':
        scheduler.step(val_log['loss'])

    print('loss %.4f - iou %.4f - val_loss %.4f - val_iou %.4f'
          % (train_log['loss'], train_log['iou'], val_log['loss'], val_log['iou']))

    log['epoch'].append(epoch)
    log['lr'].append(config['lr'])
    log['loss'].append(train_log['loss'])
    log['iou'].append(train_log['iou'])
    log['val_loss'].append(val_log['loss'])
    log['val_iou'].append(val_log['iou'])

    pd.DataFrame(log).to_csv('models/%s/log.csv' %
                              config['name'], index=False)

    trigger += 1

    if val_log['iou'] > best_iou:
        torch.save(model.state_dict(), 'models/%s/model.pth' %
                    config['name'])
        best_iou = val_log['iou']
        print("=> saved best model")
        trigger = 0

    # early stopping
    if config['early_stopping'] >= 0 and trigger >= config['early_stopping']:
        print("=> early stopping")
        break

    torch.cuda.empty_cache()



# Analog training

In [None]:
from aihwkit.nn import AnalogConv2d, AnalogLinear, AnalogSequential
from aihwkit.nn.conversion import convert_to_analog_mapped, convert_to_analog
from aihwkit.optim import AnalogSGD
from torch.optim import SGD
from aihwkit.simulator.configs import FloatingPointRPUConfig, SingleRPUConfig, UnitCellRPUConfig, InferenceRPUConfig, DigitalRankUpdateRPUConfig
from aihwkit.simulator.configs.devices import *
from aihwkit.simulator.configs.utils import PulseType
# from aihwkit.simulator.rpu_base import cuda
from aihwkit.inference import BaseNoiseModel, PCMLikeNoiseModel, StateIndependentNoiseModel
from aihwkit.simulator.configs.utils import WeightClipType,WeightModifierType, IOParameters
from aihwkit.inference.compensation.drift import GlobalDriftCompensation

from aihwkit.simulator.configs.utils import BoundManagementType
from aihwkit.simulator.presets.utils import PresetIOParameters
import math

In [None]:
def create_rpu_config_new():
    rpu_config = InferenceRPUConfig()

    rpu_config.clip.type = WeightClipType.FIXED_VALUE
    rpu_config.clip.fixed_value = 1.0
    rpu_config.modifier.pdrop = 0  # Drop connect.

    rpu_config.modifier.std_dev = 0.5

    rpu_config.modifier.rel_to_actual_wmax = True
    rpu_config.mapping.digital_bias = True
    rpu_config.mapping.weight_scaling_omega = 0.4
    rpu_config.mapping.weight_scaling_omega = True
    rpu_config.mapping.max_input_size = 256
    rpu_config.mapping.max_output_size = 256

    rpu_config.mapping.learn_out_scaling_alpha = True

    rpu_config.forward = PresetIOParameters()
    rpu_config.forward.inp_res = 1/256  # 8-bit DAC discretization.
    rpu_config.forward.out_res = 1/256  # 8-bit ADC discretization.
    rpu_config.forward.bound_management = BoundManagementType.NONE

    # Inference noise model.
    rpu_config.noise_model = PCMLikeNoiseModel(g_max=25)

    # drift compensation
    rpu_config.drift_compensation = GlobalDriftCompensation()
    return rpu_config

def create_analog_optimizer(model, lr):
    """Create the analog-aware optimizer.

    Args:
        model (nn.Module): model to be trained

    Returns:
        Optimizer: created analog optimizer
    """

    optimizer = AnalogSGD(model.parameters(), lr) # we will use a learning rate of 0.01 as in the paper
    optimizer.regroup_param_groups(model)

    return optimizer

#### Converting the model to analog
- We use the rpu_config that defines the hardware and the type of noise that is applied


In [None]:
rpu_config = create_rpu_config_new()
model_analog = convert_to_analog_mapped(model, rpu_config)

In [None]:
model_analog

In [None]:
from aihwkit.utils.analog_info import analog_summary

analog_summary(model_analog,(64, 3, 96, 96))

# Analog training loop


In [None]:

if config['name'] is None:
    if config['deep_supervision']:
        config['name'] = '%s_%s_wDS' % (config['dataset'], config['arch'])
    else:
        config['name'] = '%s_%s_woDS' % (config['dataset'], config['arch'])
os.makedirs('models/%s' % config['name'], exist_ok=True)

print('-' * 20)
for key in config:
    print('%s: %s' % (key, config[key]))
print('-' * 20)

with open('models/%s/config.yml' % config['name'], 'w') as f:
    yaml.dump(config, f)

# define loss function (criterion)
if config['loss'] == 'BCEWithLogitsLoss':
    criterion = nn.BCEWithLogitsLoss().cuda()
elif config['loss'] == 'BCEDiceLoss':
    criterion = BCEDiceLoss().cuda()
else:
    criterion = LovaszHingeLoss().cuda()

cudnn.benchmark = True

# create model
print("=> creating model %s" % config['arch'])

params = filter(lambda p: p.requires_grad, model_analog.parameters())
if config['optimizer'] == 'Adam':
    optimizer = optim.Adam(
        params, lr=config['lr'], weight_decay=config['weight_decay'])
elif config['optimizer'] == 'SGD':
    optimizer = optim.SGD(params, lr=config['lr'], momentum=config['momentum'],
                          nesterov=config['nesterov'], weight_decay=config['weight_decay'])

# Create an analog optimizer
optimizer = create_analog_optimizer(model_analog, lr= config['lr'])

if config['scheduler'] == 'CosineAnnealingLR':
    scheduler = lr_scheduler.CosineAnnealingLR(
        optimizer, T_max=config['epochs'], eta_min=config['min_lr'])
elif config['scheduler'] == 'ReduceLROnPlateau':
    scheduler = lr_scheduler.ReduceLROnPlateau(optimizer, factor=config['factor'], patience=config['patience'],
                                                verbose=1, min_lr=config['min_lr'])
elif config['scheduler'] == 'MultiStepLR':
    scheduler = lr_scheduler.MultiStepLR(optimizer, milestones=[int(e) for e in config['milestones'].split(',')], gamma=config['gamma'])
elif config['scheduler'] == 'ConstantLR':
    scheduler = None
else:
    raise NotImplementedError

# Data loading code
img_ids = glob(os.path.join('inputs', config['dataset'], 'images', '*' + config['img_ext']))
img_ids = [os.path.splitext(os.path.basename(p))[0] for p in img_ids]

train_img_ids, val_img_ids = train_test_split(img_ids, test_size=0.2, random_state=41)

train_transform = Compose([
    RandomRotate90(),
    Flip(),
    OneOf([
        transforms.HueSaturationValue(),
        transforms.RandomBrightness(),
        transforms.RandomContrast(),
    ], p=1),
    Resize(config['input_h'], config['input_w']),
    transforms.Normalize(),
])

val_transform = Compose([
    Resize(config['input_h'], config['input_w']),
    transforms.Normalize(),
])

train_dataset = Dataset(
    img_ids=train_img_ids,
    img_dir=os.path.join('inputs', config['dataset'], 'images'),
    mask_dir=os.path.join('inputs', config['dataset'], 'masks'),
    img_ext=config['img_ext'],
    mask_ext=config['mask_ext'],
    num_classes=config['num_classes'],
    transform=train_transform)
val_dataset = Dataset(
    img_ids=val_img_ids,
    img_dir=os.path.join('inputs', config['dataset'], 'images'),
    mask_dir=os.path.join('inputs', config['dataset'], 'masks'),
    img_ext=config['img_ext'],
    mask_ext=config['mask_ext'],
    num_classes=config['num_classes'],
    transform=val_transform)

train_loader = torch.utils.data.DataLoader(
    train_dataset,
    batch_size=config['batch_size'],
    shuffle=True,
    num_workers=config['num_workers'],
    drop_last=True)
val_loader = torch.utils.data.DataLoader(
    val_dataset,
    batch_size=config['batch_size'],
    shuffle=False,
    num_workers=config['num_workers'],
    drop_last=False)

log = OrderedDict([
    ('epoch', []),
    ('lr', []),
    ('loss', []),
    ('iou', []),
    ('val_loss', []),
    ('val_iou', []),
])

best_iou = 0
trigger = 0
for epoch in range(config['epochs']):
    print('Epoch [%d/%d]' % (epoch, config['epochs']))

    # train for one epoch
    train_log = train(config, train_loader, model_analog, criterion, optimizer)
    # evaluate on validation set
    val_log = validate(config, val_loader, model_analog, criterion)

    if config['scheduler'] == 'CosineAnnealingLR':
        scheduler.step()
    elif config['scheduler'] == 'ReduceLROnPlateau':
        scheduler.step(val_log['loss'])

    print('loss %.4f - iou %.4f - val_loss %.4f - val_iou %.4f'
          % (train_log['loss'], train_log['iou'], val_log['loss'], val_log['iou']))

    log['epoch'].append(epoch)
    log['lr'].append(config['lr'])
    log['loss'].append(train_log['loss'])
    log['iou'].append(train_log['iou'])
    log['val_loss'].append(val_log['loss'])
    log['val_iou'].append(val_log['iou'])

    pd.DataFrame(log).to_csv('models/%s/log.csv' %
                              config['name'], index=False)

    trigger += 1

    if val_log['iou'] > best_iou:
        torch.save(model_analog.state_dict(), 'models/%s/model2.pth' %
                    config['name'])
        best_iou = val_log['iou']
        print("=> saved best model")
        trigger = 0

    # early stopping
    if config['early_stopping'] >= 0 and trigger >= config['early_stopping']:
        print("=> early stopping")
        break

    torch.cuda.empty_cache()



# Test the model after different days
- We test the drift after 1 second, 1 hour, 1 day and 30 days


In [None]:
from collections import OrderedDict
from tqdm import tqdm

def test_inference(config, model, criterion, test_loader):
    #model.eval()  # ensure the model is in evaluation mode

    # Initializing metric trackers
    avg_meters = {'loss': AverageMeter(),
                  'iou': AverageMeter(),
                  'accuracy': AverageMeter(),
                  'error': AverageMeter()}

    with torch.no_grad():
        pbar = tqdm(total=len(test_loader))

        # Simulation of inference at different times after training.
        for t_inference in [1, 3600,3600*24, 3600*24*30]:  # Example: Simulate the drift for 1 day.
            print(t_inference)
            model.drift_analog_weights(t_inference)  # Apply the drift simulation if applicable.

            for data_batch in test_loader:
                # Adjust the unpacking to account for the third item in the batch.
                images, labels, _ = data_batch  # The third item is ignored as in training.

                images = images.cuda()
                labels = labels.cuda()

                # Compute model output
                if config['deep_supervision']:
                    outputs = model(images)
                    loss = 0
                    for output in outputs:
                        loss += criterion(output, labels)
                    loss /= len(outputs)
                    iou = iou_score(outputs[-1], labels)
                else:
                    output = model(images)
                    loss = criterion(output, labels)
                    iou = iou_score(output, labels)

                # Calculate accuracy and error
                _, predicted = torch.max(output.data, 1)
                total = labels.size(0)
                correct = (predicted == labels).sum().item()
                accuracy = correct / total
                error = 1 - accuracy

                # Update tracking variables
                avg_meters['loss'].update(loss.item(), total)
                avg_meters['iou'].update(iou, total)

                pbar.update(1)

            # Displaying statistics after inference
            print(f'Inference Time: {t_inference: .2e} seconds')
            print(f'Average Loss: {avg_meters["loss"].avg:.4f}\tAverage IoU: {avg_meters["iou"].avg:.4f}')

            pbar.close()

            # Resetting the average meters for the next inference time point
            for meter in avg_meters.values():
                meter.reset()

    return OrderedDict([('loss', avg_meters['loss'].avg),
                        ('iou', avg_meters['iou'].avg)])

test_inference(config, model_analog,criterion, val_loader )

In [None]:
def dice_score(pred, target, epsilon=1e-6):
    """
    Compute the Dice score.

    Args:
    - pred (torch.Tensor): the predicted tensor
    - target (torch.Tensor): the ground truth tensor
    - epsilon (float): a small value to avoid division by zero

    Returns:
    - dice (torch.Tensor): computed Dice score
    """
    pred_flat = pred.contiguous().view(-1)
    target_flat = target.contiguous().view(-1)

    intersection = (pred_flat * target_flat).sum()
    denominator = pred_flat.sum() + target_flat.sum()

    dice = (2 * intersection + epsilon) / (denominator + epsilon)
    return dice