In [None]:
import os
import tempfile

import numpy as np
import torch
from torch.utils.data import DataLoader
from ignite.engine import Events, create_supervised_trainer, create_supervised_evaluator
from ignite.metrics import Loss, Accuracy
import mlflow

from aidefender.exp.datasets import BaseImagesDataset
from aidefender.exp.models import BaseModel
from aidefender.utils.mlflow import load_model, create_art_model
from aidefender.robustness import robustness_accuracy

# Declare constants

In [None]:
data_path = '../artifacts/data/RP2K_small/'
model_path = os.path.join(tempfile.mkdtemp(), 'model/')

In [None]:
print(model_path)

# Declare dataset class

If you want to train a model on a new data, all you need to do is just to create a new dataset class which would define the labels, deriving it from `aidefender.exp.datasets.BaseImagesDataset`, as shown below. 

The data should be placed into subdirectories corresponding to the labels. For example:
```
data/
    milk/image1.jpg
    milk/image2.jpg
    coffee/image1.jpg
    coffee/image2.jpg
```

In [None]:
class RP2KDataset(BaseImagesDataset):
    LABELS = [
        'coffee', 'juice', 'milk', 'soda', 'tea', 'vinegar', 'alcohol',
        'beanpaste', 'cigarettes', 'seasoning', 'yoghurt',
    ]

    def _load_images_labels(self, data_path):
        images, labels = load_image_dataset(data_path, RP2KDataset.LABELS, file_format='jpg')

        return images, labels

In [None]:
dataset = RP2KDataset(data_path)

In [None]:
print(dataset)

# Declare the model

Similarly, if you want to define a new model, all you need to do is subclass the `aidefender.exp.models.BaseModel` class, as shown below

In [None]:
class ConvSmallModel(BaseModel):
    def __init__(self, nb_classes, normalize_mean, normalize_std):
        super().__init__(normalize_mean, normalize_std)

        self.nb_classes = nb_classes

        self.conv = torch.nn.Sequential(
            torch.nn.Conv2d(in_channels=3, out_channels=32, kernel_size=3, stride=1),
            torch.nn.BatchNorm2d(32),
            torch.nn.ReLU(),
            torch.nn.MaxPool2d(2),
            torch.nn.Conv2d(in_channels=32, out_channels=32, kernel_size=3, stride=1),
            torch.nn.BatchNorm2d(32),
            torch.nn.ReLU(),
            torch.nn.MaxPool2d(2),
            torch.nn.Conv2d(in_channels=32, out_channels=32, kernel_size=3, stride=1),
            torch.nn.BatchNorm2d(32),
            torch.nn.ReLU(),
            torch.nn.MaxPool2d(2),
            torch.nn.Conv2d(in_channels=32, out_channels=32, kernel_size=3, stride=1),
            torch.nn.BatchNorm2d(32),
            torch.nn.ReLU(),
            torch.nn.MaxPool2d(2),
            torch.nn.Conv2d(in_channels=32, out_channels=32, kernel_size=3, stride=1),
            torch.nn.BatchNorm2d(32),
            torch.nn.ReLU(),
            torch.nn.MaxPool2d(2),
        )

        self.fc = torch.nn.Sequential(
            torch.nn.Linear(32 * 5 * 5, 64),
            torch.nn.ReLU(),
            torch.nn.Linear(64, self.nb_classes),
        )
        
    def forward(self, images):
        batch_size = images.shape[0]

        # does normalization and transposes the images to PyTorch format
        images = super().forward(images)

        features = self.conv(images)
        features = features.contiguous().view(batch_size, -1)

        logits = self.fc(features)

        return logits

In [None]:
model = ConvSmallModel(dataset.nb_classes, dataset.images_mean, dataset.images_std)

In [None]:
print(model)

# Train the model

In [None]:
def train_model(model, dataset, nb_epochs=5):
    device = 'cuda' if torch.cuda.is_available() else 'cpu'
    model.to(device)

    criterion = torch.nn.CrossEntropyLoss()
    optimizer = torch.optim.Adam(model.parameters(), lr=0.001, weight_decay=0.00001)

    dataloader = DataLoader(dataset, batch_size=64, shuffle=True)

    trainer = create_supervised_trainer(model, optimizer, criterion, device=device)

    metrics = {
        'accuracy': Accuracy(),
        'nll': Loss(criterion)
    }
    evaluator = create_supervised_evaluator(model, metrics=metrics, device=device)

    @trainer.on(Events.EPOCH_COMPLETED)
    def log_training_results(trainer):
        dataset.train = False

        evaluator.run(dataloader)
        metrics = evaluator.state.metrics
        print(f"Epoch[{trainer.state.epoch}] Train: Accuracy: {metrics['accuracy']:.2f} | Loss: {metrics['nll']:.2f}")

        dataset.train = True

    dataset.train = True
    trainer.run(dataloader, max_epochs=nb_epochs)

    return model

In [None]:
model = train_model(model, dataset, nb_epochs=30)

# Save the model in MLflow format

In [None]:
model.to('cpu')
mlflow.pytorch.save_model(model, model_path, signature=dataset.model_signature)
print(f'Model saved: {model} -> {model_path}')

# Evaluate the model with aidefender

## Load the model and convert it into ART format

In [None]:
mlflow_model = load_model(model_path)

In [None]:
print(mlflow_model)

In [None]:
art_model = create_art_model(mlflow_model)

In [None]:
print(type(art_model))

## Calc robustness accuracy score

In [None]:
score = robustness_accuracy(art_model, dataset.images, attack_name='fgsm')

In [None]:
print(f'Robustenss accuracy: {score:.2f}')