In [None]:
import random

import numpy as np
import pandas as pd
import torch

try:
    import google.colab  # noqa: F401

    !pip install -q daml[torch] torchmetrics torchvision
    !export LC_ALL="en_US.UTF-8"
    !export LD_LIBRARY_PATH="/usr/lib64-nvidia"
    !export LIBRARY_PATH="/usr/local/cuda/lib64/stubs"
    !ldconfig /usr/lib64-nvidia
except Exception:
    pass

!pip install -q tabulate

import os

os.environ["TF_CPP_MIN_LOG_LEVEL"] = "3"


np.random.seed(0)
np.set_printoptions(formatter={"float": lambda x: f"{x:0.4f}"})
torch.manual_seed(0)
torch.set_float32_matmul_precision("high")
device = "cuda" if torch.cuda.is_available() else "cpu"
torch._dynamo.config.suppress_errors = True

random.seed(0)
torch.use_deterministic_algorithms(True)
os.environ["CUBLAS_WORKSPACE_CONFIG"] = ":4096:8"

os.environ["TF_CPP_MIN_LOG_LEVEL"] = "3"


import tensorflow.keras.datasets as tfds

from daml.metrics import BER

In [None]:
# Load in the mnist dataset from tensorflow datasets
(images, labels), (test_images, test_labels) = tfds.mnist.load_data()

images_split = {}
labels_split = {}

# Keep only 1, 4, and 9
for label in (1, 4, 9):
    subset_indices = np.where(labels == label)
    images_split[label] = images[subset_indices][:2000]
    labels_split[label] = labels[subset_indices][:2000]

images_subset = np.concatenate(list(images_split.values()))
labels_subset = np.concatenate(list(labels_split.values()))
print(images_subset.shape)
print(np.unique(labels_subset, return_counts=True))

# Flatten the images
images_flattened = images_subset.reshape((images_subset.shape[0], -1))
print("Dataset shape:", images_flattened.shape)

In [None]:
# Load the BER metric
metric = BER(images_flattened, labels_subset, method="MST")

In [None]:
# User-specified inputs

# What we use to curve-fit the sufficiency model
# train_ds = Subset(train_ds, range(2000))
# test_ds = Subset(test_ds, range(500))

target_performance = 0.97

In [None]:
# Evaluate the BER metric for the MNIST data with digits 1, 4, 9.
# One minus the value of this metric gives our estimate of the upper bound on accuracy.
base_ber = metric.evaluate()

In [None]:
# The values we pull out of sufficiency
# Whether the accuracy evaluated at the user's dataset size exceeds the user's desired accuracy
base_ber["max_accuracy"] = 1 - base_ber["ber"]
dev_dict = {"BER": base_ber}

shauns_output = {"Development": dev_dict}

# ---------------------------------------------------------------

# The plot showing 1) the points internally generated to do the curve-fit
# and 2) the curve fit
# The projected accuracy for the number of samples the user has
ber = round(dev_dict["BER"]["ber"], 2)  # base_ber["ber"]
# The estimated number of samples needed to achieve the accuracy that the user wants
ber_lower = round(dev_dict["BER"]["ber_lower"], 2)
max_accuracy = round(dev_dict["BER"]["max_accuracy"], 2)

is_feasible = max_accuracy >= target_performance

In [None]:
# Create a dictionary that gradient will plot as a table
feas_preds = {
    "feasible": is_feasible,
    "BER": ber,
    "Lower BER": ber_lower,
    "Max Accuracy": max_accuracy,
    "Target Performance": target_performance,
}

In [None]:
from gradient.slide_deck.shapes import SubText, Table, Text, TextContent
from gradient.slide_deck.slidedeck import (
    DEFAULT_GRADIENT_PRESENTATION_TEMPLATE_PATH,
    DefaultGradientSlideLayouts,
    SlideDeck,
)


def generate_feas_report_table(feas_preds: dict) -> pd.DataFrame:
    drift_table = pd.DataFrame(
        {
            "Is feasible?": ["Yes" if feas_preds["feasible"] else "No"],
            # "Test statistic": [np.mean(preds["distance"]) for preds in drift_preds.values()],
            "Bayes Error Rate": [feas_preds["BER"]],
            "Lower Bayes Error Rate": [feas_preds["Lower BER"]],
            "Maximum Accuracy": [feas_preds["Max Accuracy"]],
        }
    )
    return drift_table


def generate_feas_report_slide_kwargs(feas_preds: dict) -> dict:
    content = [
        f"Accuracy of {feas_preds['Target Performance']*100}% ",
        SubText(f"{'is' if is_feasible else 'is not'}", bold=True),
        " feasible for the dataset",
    ]

    kwargs = {
        "title": "Feasibility: Summary",
        "layout": DefaultGradientSlideLayouts.CONTENT_DEFAULT,
        "placeholder_fillings": [TextContent(lines=[Text(content=content)])],
        "additional_shapes": [
            Table(
                dataframe=generate_feas_report_table(feas_preds).round(4),
                fontsize=16,
                left=2.0,
                top=2.0,
                width=9.0,
                height=4.0,
            ),
        ],
    }
    return kwargs

In [None]:
from pathlib import Path

example_directory = Path.cwd() / "report_feas_example"
example_directory.mkdir(parents=True, exist_ok=True)

In [None]:
# Generate and add to the slide deck
deck = SlideDeck(presentation_template_path=DEFAULT_GRADIENT_PRESENTATION_TEMPLATE_PATH)

deck.add_slide(**generate_feas_report_slide_kwargs(feas_preds))

In [None]:
deck.save(
    output_directory=example_directory,
    name="report_feas_example",
)