In [219]:
import warnings
from importlib import reload

import daml_stage
import maite.protocols.image_classification as ic
from torch.utils.data import Dataset
from torchvision import transforms
from torchvision.datasets import MNIST
from torchvision.models import ResNet50_Weights, resnet50
from utils import collect_metrics, collect_report_consumables, load_models_and_datasets, run_stages

reload(daml_stage)

from daml_stage import DamlTestStage

warnings.filterwarnings("ignore")

# Configure Pipeline Stages


### Panel Inputs

Based directly off of Panel mockup. Values expected to change


In [220]:
# ['CenterNet V2', 'visdrone-yolo']
model_str = "CenterNet V2"

# ['dev_train', 'dev_val', 'dev_test', 'op_train', 'op_val', 'op_test']
base_dataset_split = "dev_train"

# ['dev_train', 'dev_val', 'dev_test', 'op_train', 'op_val', 'op_test']
target_dataset_split = "dev_val"

# ['Accuracy', 'mAP']
metric = "Accuracy"

# Float
performance = 0.92

# ['Base', 'Target', 'Both']
linting = "Both"

# ['Base', 'Target', 'Both']
bias_detection = "Both"

# ['Base', 'Target', 'Both']
# feasibility = "Target"
feasibility = "Both"

# ['Base', 'Target', 'Both']
sufficiency = "Both"

# ["AE", "VAE", "VAEGMM"]
outlier_detection = "AE"

### Backend Script

A script must be used to convert the Panel config file into usable models, datasets, and TestStages


Load model


In [221]:
# Mock model
weights = ResNet50_Weights.DEFAULT
preprocess = weights.transforms()
model = resnet50(weights)
isinstance(model, ic.Model)  # Not sure where Maite Protocol check is

# Mock model with different weights
comparison_weights = ResNet50_Weights.IMAGENET1K_V1
comparison_model = resnet50(comparison_weights)
isinstance(comparison_model, ic.Model)  # Not sure where in Maite Protocol check is

True

Load datasets


In [222]:
# FMOW was too large for testing
class MaiteMNIST(Dataset):
    def __init__(self, train=True, transforms=None):
        self.dataset = MNIST("../data/", train=train, transform=transforms, download=True)

    def __len__(self):
        return len(self.dataset)

    def __getitem__(self, idx):
        imgs, labels = self.dataset[idx]
        return imgs, labels, {}


# ResNet50 wants 3 channels, MNIST is 1
t = transforms.Compose([transforms.Grayscale(num_output_channels=3), preprocess])
dev_dataset = MaiteMNIST(True, t)
op_dataset = MaiteMNIST(False, t)
print(f"Dev Dataset is MAITE compliant: {isinstance(dev_dataset, ic.Dataset)}")
print(f"Op Dataset is MAITE compliant: {isinstance(op_dataset, ic.Dataset)}")

Dev Dataset is MAITE compliant: True
Op Dataset is MAITE compliant: True


Create Stage


In [223]:
ds = DamlTestStage(
    target_performance=0.90,
    linting_options=None,
    linting_dataset=["development", "operational"],
    bias_options=["coverage", "parity", "balance"],
    bias_dataset=["development", "operational"],
    drift=True,
    ood_detection=True,
    feasibility_dataset=["development", "operational"],
    sufficiency_dataset=["development", "operational"],
    model_name="model1",
    comparison_model_name="model2",
)
stages = [ds]

# Pipeline


In [224]:
load_models_and_datasets(
    dev_dataset=dev_dataset,
    op_dataset=op_dataset,
    model=model,
    comparison_model=comparison_model,
    target_performance=performance,
    stages=stages,
)

In [225]:
run_stages(stages=stages)

Cache hit
{'development': {'balance': {'classes': [], 'factors': []}, 'ber': {'ber': 0.18, 'ber_lower': 0.095}, 'coverage': {'value': 0.9}, 'parity': {'value': 0.25}, 'sufficiency': {'comp_model': [2, 3, 4], 'model': [1, 2, 3], 'steps': [1, 2, 3]}}, 'ks': {'is_drift': False, 'pvalue': 0.8, 'val': 0.1}, 'mmd': {'is_drift': True, 'pvalue': 1.0, 'val': 0.7}, 'operational': {'balance': {'classes': [], 'factors': []}, 'ber': {'ber': 0.18, 'ber_lower': 0.095}, 'coverage': {'value': 0.9}, 'parity': {'value': 0.25}, 'sufficiency': {'comp_model': [2, 3, 4], 'model': [1, 2, 3], 'steps': [1, 2, 3]}}}


In [226]:
collect_metrics(stages=stages)

Returning metrics


{'development': {'ber': {'ber': 0.18, 'ber_lower': 0.095},
  'coverage': {'value': 0.9},
  'parity': {'value': 0.25},
  'balance': {'factors': [], 'classes': []},
  'sufficiency': {'model': [1, 2, 3],
   'comp_model': [2, 3, 4],
   'steps': [1, 2, 3]}},
 'operational': {'ber': {'ber': 0.18, 'ber_lower': 0.095},
  'coverage': {'value': 0.9},
  'parity': {'value': 0.25},
  'balance': {'factors': [], 'classes': []},
  'sufficiency': {'model': [1, 2, 3],
   'comp_model': [2, 3, 4],
   'steps': [1, 2, 3]}},
 'mmd': {'pvalue': 1.0, 'is_drift': True, 'val': 0.7},
 'ks': {'pvalue': 0.8, 'is_drift': False, 'val': 0.1}}

In [227]:
collect_report_consumables(stages=stages)

Returning Gradient parameters


{'development': {'ber': {'ber': 0.18, 'ber_lower': 0.095},
  'coverage': {'value': 0.9},
  'parity': {'value': 0.25},
  'balance': {'factors': [], 'classes': []},
  'sufficiency': {'model': [1, 2, 3],
   'comp_model': [2, 3, 4],
   'steps': [1, 2, 3]}},
 'operational': {'ber': {'ber': 0.18, 'ber_lower': 0.095},
  'coverage': {'value': 0.9},
  'parity': {'value': 0.25},
  'balance': {'factors': [], 'classes': []},
  'sufficiency': {'model': [1, 2, 3],
   'comp_model': [2, 3, 4],
   'steps': [1, 2, 3]}},
 'mmd': {'pvalue': 1.0, 'is_drift': True, 'val': 0.7},
 'ks': {'pvalue': 0.8, 'is_drift': False, 'val': 0.1}}