In [42]:
import sys
sys.path.insert(0, '../')

import torch
from torch.utils.data import sampler, DataLoader
from torch import nn, optim
import torchvision
from torchvision import datasets, transforms
from torchvision.models import resnet50, ResNet50_Weights
import torchdrift

import copy
%matplotlib inline
import os
from os import listdir
from os.path import isfile, join
import pandas as pd
import random
import numpy as np
from codecarbon import EmissionsTracker
import matplotlib.pyplot as plt
import time
import ipyplot

In [43]:
from mlmodeling import *
device = "cuda" if torch.cuda.is_available else "cpu"

In [45]:
data_dir = 'E:\\mladhd\\datasets\\data_clean_extended\\'
models_dir = 'E:\\mladhd\\models\\'
pretrained_model = 'model_with_extended_dataset_resnet50_2023-03-27_16-08-23'

In [50]:
model_name = pretrained_model.split('_')[0]
print("Loading hyperparams...")
with open(models_dir+pretrained_model+'.json', 'r') as fp:
    hyperparams = json.load(fp)
# when loading a pretrained model, the date is updated to the current date
# so that the model is not overwritten (see __init__)
experiment = MLADHD(model_name, data_dir, models_dir, hyperparams)
experiment.load_model(os.path.join(models_dir,  pretrained_model + '.pth'))
experiment.model.eval().to(device)
for p in experiment2.model.parameters():
    p.requires_grad_(False)

Loading hyperparams...
Model loaded from:  E:\mladhd\models\model_with_extended_dataset_resnet50_2023-03-27_16-08-23.pth


And we set up a dataset.

In [51]:
split = (0.7, 0.15, 0.15)
experiment2.load_split_dataset(split)

Train size:  2976
Valid size:  637
Test size:  639


In [59]:
detector = torchdrift.detectors.KernelMMDDriftDetector(return_p_value=True)
feature_extractor = torch.nn.Sequential(*(list(experiment2.model.children())[:-1]))
torchdrift.utils.fit(experiment2.trainloader, feature_extractor, detector, num_batches=1)

100%|████████████████████████████████████████████████████████████████████████████████████| 1/1 [00:04<00:00,  4.86s/it]


We build a model monitor: When it hooks into the model to capture the output of feature_layer. I will cache the last N captured model features in a ring buffer.

If we provide a callback, it will call the drift detector every callback_interval after it has seen enough samples.

Just to show off, I also throw in a little plot function.

In [60]:
class ModelMonitor:
    def __init__(self, drift_detector, feature_layer, N = 20, callback = None, callback_interval = 1):
        self.N = N
        base_outputs = drift_detector.base_outputs
        self.drift_detector = drift_detector
        assert base_outputs is not None, "fit drift detector first"
        feature_dim = base_outputs.size(1)
        self.feature_rb = torch.zeros(N, feature_dim, device=base_outputs.device, dtype=base_outputs.dtype)
        self.have_full_round = False
        self.next_idx = 0
        self.hook = feature_layer.register_forward_hook(self.collect_hook)
        self.counter = 0
        self.callback = callback
        self.callback_interval = callback_interval

    def unhook(self):
        self.hook.remove()

    def collect_hook(self, module, input, output):
        self.counter += 1
        bs = output.size(0)
        if bs > self.N:
            output = output[-self.N:]
            bs = self.N
        output = output.reshape(bs, -1)
        first_part = min(self.N - self.next_idx, bs)
        self.feature_rb[self.next_idx: self.next_idx + first_part] = output[:first_part]
        if first_part < bs:
            self.feature_rb[: bs - first_part] = self.output[first_part:]
        if not self.have_full_round and self.next_idx + bs >= self.N:
            self.have_full_round = True
        self.next_idx = (self.next_idx + bs) % self.N
        if self.callback and self.have_full_round and self.counter % self.callback_interval == 0:
            p_val = self.drift_detector(self.feature_rb)
            self.callback(p_val)

    def plot(self):
        import sklearn.manifold

        mapping = sklearn.manifold.Isomap()
        ref = mapping.fit_transform(self.drift_detector.base_outputs.to("cpu").numpy())

        test = mapping.transform(self.feature_rb.to("cpu").numpy())
        plt.scatter(ref[:, 0], ref[:, 1])
        plt.scatter(test[:, 0], test[:, 1])

In [61]:
def alarm(p_value):
    assert p_value > 0.01, f"Drift alarm! p-value: {p_value*100:.03f}%"

In [62]:
mm = ModelMonitor(detector, experiment.model, callback=alarm)

In [70]:
detector.base_outputs.shape

torch.Size([128, 2048, 1, 1])

We grab a batch each of benign and drifted samples.

Fun fact: For this dataset, shuffling in the dataloader is important here. Otherwise the class balance of the test batch will be off enough to cause the alarm to be set off.

In [63]:
it = iter(experiment2.validloader)
batch = next(it)[0].to(device)
batch_drifted = torchdrift.data.functional.gaussian_blur(next(it)[0].to(device), 5)

Now we run our model. Imagenet class 309 is bee and 310 is ant. Do not believe the model if it says aircraft carrier (it did this during testing). Note that we might be unlucky and get an exception here. This is at least in part a sampling artifact from computing the p-value.

In [64]:
res = experiment2.model(batch).argmax(1)
res

tensor([0, 0, 1, 0, 1, 0, 0, 0, 0, 0, 1, 1, 1, 1, 0, 1, 0, 1, 0, 1, 0, 1, 1, 0,
        1, 1, 0, 0, 1, 0, 0, 0, 1, 1, 1, 0, 0, 0, 1, 1, 0, 0, 0, 0, 0, 1, 0, 1,
        1, 0, 0, 0, 1, 1, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
        0, 0, 1, 1, 1, 0, 1, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1,
        1, 0, 1, 0, 0, 0, 0, 1, 0, 0, 1, 1, 1, 0, 0, 0, 1, 0, 0, 1, 0, 1, 0, 1,
        0, 0, 1, 1, 0, 0, 0, 1], device='cuda:0')

In [66]:
mm.feature_rb

tensor([[0., 0., 0.,  ..., 0., 0., 0.],
        [0., 0., 0.,  ..., 0., 0., 0.],
        [0., 0., 0.,  ..., 0., 0., 0.],
        ...,
        [0., 0., 0.,  ..., 0., 0., 0.],
        [0., 0., 0.,  ..., 0., 0., 0.],
        [0., 0., 0.,  ..., 0., 0., 0.]], device='cuda:0')

In [77]:
next(iter(experiment2.trainloader))[0].shape

torch.Size([128, 3, 224, 398])

In [65]:
detector.compute_p_value(mm.feature_rb)

ValueError: too many values to unpack (expected 2)

In [None]:
mm.plot()

In [40]:
experiment2.model

ResNet(
  (conv1): Conv2d(3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
  (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (relu): ReLU(inplace=True)
  (maxpool): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
  (layer1): Sequential(
    (0): Bottleneck(
      (conv1): Conv2d(64, 64, kernel_size=(1, 1), stride=(1, 1), bias=False)
      (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn2): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (conv3): Conv2d(64, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)
      (bn3): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (relu): ReLU(inplace=True)
      (downsample): Sequential(
        (0): Conv2d(64, 256, kernel_size=(1, 1), stride=(1, 