In [None]:
# Messy test code based on https://nannyml.readthedocs.io/en/stable/quick.html

import nannyml as nml
from IPython.display import display

In [None]:
reference_df, analysis_df, _ = nml.load_us_census_ma_employment_data()

In [None]:
display(reference_df.head())

In [None]:
display(analysis_df.head())

In [None]:
chunk_size = 5000

In [None]:
estimator = nml.CBPE(
    problem_type="classification_binary",
    y_pred_proba="predicted_probability",
    y_pred="prediction",
    y_true="employed",
    metrics=["roc_auc"],
    chunk_size=chunk_size,
)

In [None]:
estimator = estimator.fit(reference_df)
estimated_performance = estimator.estimate(analysis_df)

In [None]:
figure = estimated_performance.plot()
figure.show()

In [None]:
reference_df.keys()

In [None]:
import numpy as np
import pandas as pd

probs = np.random.rand(100)
preds = np.round(probs)
truth = np.array([0, 1] * 50)
test_dict = {"id": list(range(100)), "prediction": preds, "predicted_probability": probs, "employed": truth}
test_df = pd.DataFrame(test_dict)

new_probs = np.random.rand(100)
new_preds = np.round(probs)
new_dict = {"id": list(range(100)), "prediction": new_preds, "predicted_probability": new_probs}
new_df = pd.DataFrame(new_dict)

In [None]:
estimator_base = nml.CBPE(
    problem_type="classification_binary",
    y_pred_proba="predicted_probability",
    y_pred="prediction",
    y_true="employed",
    metrics=["accuracy"],
    chunk_size=10,
)

In [None]:
e2 = estimator_base.fit(test_df)
ep2 = e2.estimate(new_df)

In [None]:
figure = ep2.plot()
figure.show()

In [None]:
edf = ep2.to_df()

In [None]:
np.mean(edf["accuracy"]["value"])

In [None]:
import os
import random
from typing import Dict, cast

import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
import torchvision.datasets as datasets
import torchvision.transforms.v2 as v2
from torch.utils.data import DataLoader, Dataset, Subset

np.random.seed(0)
np.set_printoptions(formatter={"float": lambda x: f"{x:0.4f}"})
torch.manual_seed(0)
torch.set_float32_matmul_precision("high")
device = "cuda" if torch.cuda.is_available() else "cpu"
torch._dynamo.config.suppress_errors = True

random.seed(0)
torch.use_deterministic_algorithms(True)
os.environ["CUBLAS_WORKSPACE_CONFIG"] = ":4096:8"

torch._dynamo.disable()

In [None]:
# Download the mnist dataset and preview the images
to_tensor = v2.Compose([v2.ToImage(), v2.ToDtype(torch.float32, scale=True)])
train_ds = datasets.MNIST("./data", train=True, download=True, transform=to_tensor)
test_ds = datasets.MNIST("./data", train=False, download=True, transform=to_tensor)

In [None]:
fig = plt.figure(figsize=(8, 3))

for lbl in range(10):
    i = (train_ds.targets == lbl).nonzero()[0][0]
    img = train_ds.data[i]
    ax = fig.add_subplot(2, 5, lbl + 1)
    ax.xaxis.set_visible(False)
    ax.yaxis.set_visible(False)
    ax.imshow(img, cmap="gray_r")

In [None]:
class Contrast(v2.Transform):
    # def __init__(self, severity=4):
    #    self.severity = severity

    def _transform(self, inpt, params):
        # return F.to_image(inpt)
        return self.contrast(inpt)

    def contrast(self, sample):
        severity = 4
        x = sample  # , landmarks = sample["image"], sample["landmarks"]
        # x = x * 0
        # return x
        # c = [0.4, 0.3, 0.2, 0.1, 0.05][severity - 1]
        c = 0.3#100

        # x = np.array(x) / 255.0
        x = x.float() / 255.0
        means = torch.mean(x, axis=(0, 1), keepdims=True)
        #x = torch.clip((x - means) * c + means, 0, 1) * 255
        #x = torch.clip(x + x * torch.normal(size=x.shape, scale=c), 0, 1) * 255
        rands = torch.normal(x, std=c)
        x = torch.clip(rands, 0, 1)
        # return {"image": x, "landmarks": landmarks}
        return x


c_to_tensor = v2.Compose([v2.ToImage(), v2.ToDtype(torch.float32, scale=True), Contrast()])
c_contrast = v2.Compose([Contrast()])

c_test_ds = datasets.MNIST("./data", train=False, download=True, transform=c_to_tensor)

In [None]:
fig = plt.figure(figsize=(8, 3))

c_loader = DataLoader(c_test_ds, batch_size=len(c_test_ds), shuffle=False)

# for c_batch in c_loader:
for lbl in range(10):
    # data, targets = c_batch
    i = (c_test_ds.targets == lbl).nonzero()[0][0]
    img = c_contrast(c_test_ds.data[i])
    # i = (targets == lbl).nonzero()[0][0]
    # img = data[i]
    ax = fig.add_subplot(2, 5, lbl + 1)
    ax.xaxis.set_visible(False)
    ax.yaxis.set_visible(False)
    ax.imshow(img, cmap="gray_r")

In [None]:
# Take a subset of 2000 training images and 500 test images
train_ds = Subset(train_ds, range(2000))
test_ds = Subset(test_ds, range(500))
c_test_ds = Subset(c_test_ds, range(500))

In [None]:
# Define our network architecture
class Net(nn.Module):
    def __init__(self):
        super().__init__()
        self.conv1 = nn.Conv2d(1, 6, 5)
        self.conv2 = nn.Conv2d(6, 16, 5)
        self.fc1 = nn.Linear(6400, 120)
        self.fc2 = nn.Linear(120, 84)
        self.fc3 = nn.Linear(84, 10)
        self.softmax = torch.nn.Softmax()

    def forward(self, x):
        x = F.relu(self.conv1(x))
        x = F.relu(self.conv2(x))
        x = torch.flatten(x, 1)  # flatten all dimensions except batch
        x = F.relu(self.fc1(x))
        x = F.relu(self.fc2(x))
        x = self.fc3(x)
        x = self.softmax(x)
        return x


# Compile the model
model = torch.compile(Net().to(device))

# Type cast the model back to Net as torch.compile returns a Unknown
# Nothing internally changes from the cast; we are simply signaling the type
model = cast(Net, model)

In [None]:
def custom_train(model: nn.Module, dataset: Dataset):
    # Defined only for this testing scenario
    criterion = torch.nn.CrossEntropyLoss().to(device)
    optimizer = optim.SGD(model.parameters(), lr=0.01, momentum=0.9)
    epochs = 10

    # Define the dataloader for training
    dataloader = DataLoader(dataset, batch_size=16)

    for epoch in range(epochs):
        for batch in dataloader:
            # Load data/images to device
            X = torch.Tensor(batch[0]).to(device)
            # Load targets/labels to device
            y = torch.Tensor(batch[1]).to(device)
            # Zero out gradients
            optimizer.zero_grad()
            # Forward propagation
            outputs = model(X)
            # Compute loss
            loss = criterion(outputs, y)
            # Back prop
            loss.backward()
            # Update weights/parameters
            optimizer.step()


def custom_eval(model: nn.Module, dataset: Dataset) -> Dict[str, list]:
    # metric = torchmetrics.Accuracy(task="multiclass", num_classes=10).to(device)
    # result = 0
    # batch_dicts = []
    # metric = torchmetrics.Accuracy(task="multiclass", num_classes=10).to(device)

    # dict_out = {"conf": np.zeros(0), "preds": np.zeros(0), "ground_truth": np.zeros(0)}
    dict_out = {"y_pred": np.zeros(0, dtype=int), "y": np.zeros(0, dtype=int)}
    for i in range(10):
        dict_out[f"y_pred_proba_{i}"] = np.zeros(0)

    # Set model layers into evaluation mode
    model.eval()
    dataloader = DataLoader(dataset, batch_size=16)
    # Tell PyTorch to not track gradients, greatly speeds up processing
    with torch.no_grad():
        for batch in dataloader:
            # Load data/images to device
            X = torch.Tensor(batch[0]).to(device)
            # Load targets/labels to device
            y = torch.Tensor(batch[1]).int()
            output = model(X).cpu()
            processed_output = torch.max(output, dim=1)
            confs = processed_output[0]
            preds = np.int64(processed_output[1])

            # batch_dict = {"conf": confs, "preds": preds, "ground_truth": y}
            # dict_out["conf"] = np.concatenate((dict_out["conf"], confs))
            dict_out["y_pred"] = np.concatenate((dict_out["y_pred"], preds), dtype=int)
            dict_out["y"] = np.concatenate((dict_out["y"], y), dtype=int)
            for i in range(10):
                key = f"y_pred_proba_{i}"
                dict_out[key] = np.concatenate((dict_out[key], output[:, i]))

            # metric.update(preds, y)
        # result = metric.compute().cpu()
    # return {"Accuracy": result}
    return dict_out

In [None]:
def reset_parameters(model: nn.Module):
    """
    Re-initializes each layer in the model using
    the layer's defined weight_init function
    """

    @torch.no_grad()
    def weight_reset(m: nn.Module):
        # Check if the current module has reset_parameters
        reset_parameters = getattr(m, "reset_parameters", None)
        if callable(reset_parameters):
            m.reset_parameters()  # type: ignore

    # Applies fn recursively to every submodule see:
    # https://pytorch.org/docs/stable/generated/torch.nn.Module.html
    return model.apply(fn=weight_reset)

In [None]:
# Reset the network weights to "create" an untrained model
model = reset_parameters(model)
# Run the model with each substep of data
# train on subset of train data
train_kwargs = {}
eval_kwargs = {}
custom_train(
    model,
    train_ds,
    **train_kwargs,
)

# evaluate on test data
train_dict = custom_eval(model, train_ds, **eval_kwargs)
train_df = pd.DataFrame(train_dict)

test_dict = custom_eval(model, test_ds, **eval_kwargs)
test_df = pd.DataFrame(test_dict)

c_test_dict = custom_eval(model, c_test_ds, **eval_kwargs)
c_test_df = pd.DataFrame(c_test_dict)

In [None]:
sum(c_test_df["y_pred"] == c_test_df["y"])/500

In [None]:
import nannyml as nml
from IPython.display import display

In [None]:
y_pred_keys = {}
for i in range(10):
    y_pred_keys[i] = f"y_pred_proba_{i}"
print(y_pred_keys)

estimator_base = nml.CBPE(
    problem_type="classification_multiclass",
    y_pred_proba=y_pred_keys,
    y_pred="y_pred",
    y_true="y",
    metrics=["accuracy"],
    chunk_size=50,  # 100,
)

In [None]:
estimator_base.fit(test_df)

In [None]:
results = estimator_base.estimate(c_test_df)  # change to c_test_df

In [None]:
results.plot()

In [None]:
results_df = results.filter(period="analysis").to_df()
display(results_df)


In [None]:
pred_accuracy = np.mean(results_df['accuracy']['value'])
alert = np.any(results_df['accuracy']['alert'])

print(f"Predicted accuracy: {pred_accuracy}")
print(f"Action recommended: {'yes' if alert else 'no'}")