In [1]:
import os
import random
from typing import Dict, cast

import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
import torchvision.datasets as datasets
import torchvision.transforms.v2 as v2
from torch.utils.data import DataLoader, Dataset, Subset
import nannyml as nml
from IPython.display import display
import loss_estimation

np.random.seed(0)
np.set_printoptions(formatter={"float": lambda x: f"{x:0.4f}"})
torch.manual_seed(0)
torch.set_float32_matmul_precision("high")
device = "cuda" if torch.cuda.is_available() else "cpu"
torch._dynamo.config.suppress_errors = True

random.seed(0)
torch.use_deterministic_algorithms(True)
os.environ["CUBLAS_WORKSPACE_CONFIG"] = ":4096:8"

torch._dynamo.disable()

<torch._dynamo.eval_frame.DisableContext at 0x7f8a4d2d4150>

In [2]:
# Download the mnist dataset
to_tensor = v2.Compose([v2.ToImage(), v2.ToDtype(torch.float32, scale=True)])
train_ds = datasets.MNIST("./data", train=True, download=True, transform=to_tensor)
test_ds = datasets.MNIST("./data", train=False, download=True, transform=to_tensor)

class_names = list(range(10))

In [3]:
# This is the operator that will be used to generate a corrupted MNIST dataset, by adding random noise.

class Corrupt(v2.Transform):
    def _transform(self, inpt, params):
        return self.contrast(inpt)

    def contrast(self, sample):
        x = sample
        c = 0.3

        # x = np.array(x) / 255.0
        x = x.float() / 255.0
        rands = torch.normal(x, std=c)
        x = torch.clip(rands, 0, 1)

        return x


c_to_tensor = v2.Compose([v2.ToImage(), v2.ToDtype(torch.float32, scale=True), Corrupt()])
c_contrast = v2.Compose([Corrupt()])

# Sets up the corrupted test dataset, using the random-noise transform specified above
c_test_ds = datasets.MNIST("./data", train=False, download=True, transform=c_to_tensor)

In [4]:
# Take a subset of 2000 training images and 500 test images,
# so that the notebook cells can be evaluated quickly.
train_ds = Subset(train_ds, range(2000))
test_ds = Subset(test_ds, range(500))
c_test_ds = Subset(c_test_ds, range(500))

In [5]:
# Define our network architecture
class Net(nn.Module):
    def __init__(self):
        super().__init__()
        self.conv1 = nn.Conv2d(1, 6, 5)
        self.conv2 = nn.Conv2d(6, 16, 5)
        self.fc1 = nn.Linear(6400, 120)
        self.fc2 = nn.Linear(120, 84)
        self.fc3 = nn.Linear(84, 10)
        self.softmax = torch.nn.Softmax()

    def forward(self, x):
        x = F.relu(self.conv1(x))
        x = F.relu(self.conv2(x))
        x = torch.flatten(x, 1)  # flatten all dimensions except batch
        x = F.relu(self.fc1(x))
        x = F.relu(self.fc2(x))
        x = self.fc3(x)
        x = self.softmax(x)
        return x


# Compile the model
model = torch.compile(Net().to(device))

# Type cast the model back to Net as torch.compile returns a Unknown
# Nothing internally changes from the cast; we are simply signaling the type
model = cast(Net, model)

In [6]:
# Defines the function we will use to train the model.
def custom_train(model: nn.Module, dataset: Dataset):
    # Defined only for this testing scenario
    criterion = torch.nn.CrossEntropyLoss().to(device)
    optimizer = optim.SGD(model.parameters(), lr=0.01, momentum=0.9)
    epochs = 10

    # Define the dataloader for training
    dataloader = DataLoader(dataset, batch_size=16)

    for epoch in range(epochs):
        for batch in dataloader:
            # Load data/images to device
            X = torch.Tensor(batch[0]).to(device)
            # Load targets/labels to device
            y = torch.Tensor(batch[1]).to(device)
            # Zero out gradients
            optimizer.zero_grad()
            # Forward propagation
            outputs = model(X)
            # Compute loss
            loss = criterion(outputs, y)
            # Back prop
            loss.backward()
            # Update weights/parameters
            optimizer.step()

def reset_parameters(model: nn.Module):
    """
    Re-initializes each layer in the model using
    the layer's defined weight_init function
    """

    @torch.no_grad()
    def weight_reset(m: nn.Module):
        # Check if the current module has reset_parameters
        reset_parameters = getattr(m, "reset_parameters", None)
        if callable(reset_parameters):
            m.reset_parameters()  # type: ignore

    # Applies fn recursively to every submodule see:
    # https://pytorch.org/docs/stable/generated/torch.nn.Module.html
    return model.apply(fn=weight_reset)

In [7]:
# Reset the network weights to "create" an untrained model
model = reset_parameters(model)
# Run the model with each substep of data
# train on subset of train data
train_kwargs = {}
eval_kwargs = {}
custom_train(
    model,
    train_ds,
    **train_kwargs,
)

  return self._call_impl(*args, **kwargs)
  return self._call_impl(*args, **kwargs)
  return self._call_impl(*args, **kwargs)
  return self._call_impl(*args, **kwargs)
  return self._call_impl(*args, **kwargs)


In [8]:
estimator = loss_estimation.LossEstimator("CBPE", "classification_multiclass")
results = estimator.evaluate(model, test_ds, c_test_ds, class_names)

  return self._call_impl(*args, **kwargs)


In [9]:
print(results)

{'Reference_Metric': 0.8066528514612994, 'Op_Predicted_Metric': 0.13486985345575161, 'Has_Drifted': True}


In [12]:
import torchmetrics

def custom_eval(model: nn.Module, dataset: Dataset) -> Dict[str, list]:
    # metric = torchmetrics.Accuracy(task="multiclass", num_classes=10).to(device)
    # result = 0
    # batch_dicts = []
    metric = torchmetrics.Accuracy(task="multiclass", num_classes=10).to(device)

    # dict_out = {"conf": np.zeros(0), "preds": np.zeros(0), "ground_truth": np.zeros(0)}
    dict_out = {"y_pred": np.zeros(0, dtype=int), "y": np.zeros(0, dtype=int)}
    for i in range(10):
        dict_out[f"y_pred_proba_{i}"] = np.zeros(0)

    # Set model layers into evaluation mode
    model.eval()
    dataloader = DataLoader(dataset, batch_size=16)
    # Tell PyTorch to not track gradients, greatly speeds up processing
    with torch.no_grad():
        for batch in dataloader:
            # Load data/images to device
            X = torch.Tensor(batch[0]).to(device)
            # Load targets/labels to device
            y = torch.Tensor(batch[1]).int()
            output = model(X).cpu()
            processed_output = torch.max(output, dim=1)
            confs = processed_output[0]
            preds = np.int64(processed_output[1])

            # batch_dict = {"conf": confs, "preds": preds, "ground_truth": y}
            # dict_out["conf"] = np.concatenate((dict_out["conf"], confs))
            dict_out["y_pred"] = np.concatenate((dict_out["y_pred"], preds), dtype=int)
            dict_out["y"] = np.concatenate((dict_out["y"], y), dtype=int)
            for i in range(10):
                key = f"y_pred_proba_{i}"
                dict_out[key] = np.concatenate((dict_out[key], output[:, i]))

            metric.update(output, y)
        result = metric.compute().cpu()
    return {"Accuracy": result}

In [26]:
c_evaluated = custom_eval(model, c_test_ds, **eval_kwargs)

pred_accuracy = results['Op_Predicted_Metric']
true_accuracy = c_evaluated["Accuracy"].float()
percent_diff = np.abs(pred_accuracy - true_accuracy)* 100

print(f'Predicted accuracy on corrupted MNIST: {pred_accuracy}')
print(f'Actual accuracy on corrupted MNIST: {true_accuracy}')
print(f'Percentage point difference between true and predicted accuracy: {percent_diff} %')

Predicted accuracy on corrupted MNIST: 0.13486985345575161
Actual accuracy on corrupted MNIST: 0.09399999678134918
Percentage point difference between true and predicted accuracy: 4.086986064910889 %
