# Decision boundaries usign Supervised decision boundary maps (SDBM)

In [1]:
from pathlib import Path
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import math
import os
import tqdm

from sklearn.utils.extmath import cartesian

import torch
import torch.nn as nn
import torch.nn.functional as F

from NegativeClassOptimization import decision_boundaries
from NegativeClassOptimization import utils
from NegativeClassOptimization import preprocessing
from NegativeClassOptimization import datasets



In [2]:
def get_model_from_task(task):
    if type(task.model) == torch.optim.swa_utils.AveragedModel:
    # Unwrap the SWA model. We need a module class,
    # that has updated weights, but still has other
    # module funcs, such as forward_logits.
    # Note: swa_model.module has same weights as swa_model.state_dict().
        return task.model.module
    else:
        return task.model

In [3]:
loader = datasets.FrozenMiniAbsolutMLLoader(
    data_dir=Path("../data/Frozen_MiniAbsolut_ML/")
)

task = datasets.ClassificationTask(
        task_type=datasets.ClassificationTaskType.HIGH_VS_LOOSER,
        ag_pos="5E94",
        ag_neg="auto",
        seed_id=0,
        split_id=42,
    )
            
task = loader.load(task, attributions_toload="v2.0-2", load_model=True)

In [4]:
task.test_dataset = preprocessing.onehot_encode_df(task.test_dataset)
task.test_dataset["X"] = task.test_dataset["Slide_onehot"]

X = np.stack(task.test_dataset["X"])
y = task.test_dataset["y"].astype(float)

In [5]:
# Based on experiment_blobs.py

output_dir = Path("07i_decision_boundaries")
dataset_name = "test_1adq"
epochs = 30  # 200
patience = 5
verbose = False


n_samples = X.shape[0]
n_classes = len(np.unique(y))

X_ssnpgt_proj_file = f'X_SSNP_{dataset_name}.npy'
name_projector_ssnp = f"{dataset_name}_ssnp"

ssnpgt = decision_boundaries.SSNP(
    epochs=epochs, 
    verbose=verbose, 
    patience=patience, 
    opt='adam', 
    bottleneck_activation='linear'
)

if (output_dir / name_projector_ssnp).exists():
    ssnpgt.load_model(output_dir / name_projector_ssnp)
else: #otherwise it will be fitted
    ssnpgt.fit(X, y)
    ssnpgt.save_model(output_dir / name_projector_ssnp)

2024-03-21 15:41:07.553236: I tensorflow/compiler/jit/xla_cpu_device.cc:41] Not creating XLA devices, tf_xla_enable_xla_devices not set
2024-03-21 15:41:07.553450: I tensorflow/core/platform/cpu_feature_guard.cc:142] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations:  SSE4.1 SSE4.2 AVX AVX2 FMA
To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.
2024-03-21 15:41:07.554899: I tensorflow/core/common_runtime/process_util.cc:146] Creating new thread pool with default inter op setting: 2. Tune using inter_op_parallelism_threads for best performance.


In [6]:
if os.path.exists(os.path.join(output_dir, X_ssnpgt_proj_file)):
    print(
        f"Projected SSNP points found! {os.path.join(output_dir,X_ssnpgt_proj_file)}"
    )
    X_ssnpgt = np.load(os.path.join(output_dir, X_ssnpgt_proj_file))
else:
    print("Projected SSNP points not found! Transforming...")
    X_ssnpgt = ssnpgt.transform(X)
    np.save(os.path.join(output_dir, X_ssnpgt_proj_file), X_ssnpgt)
    print(f"Projected points ({dataset_name}) saved.")

Projected SSNP points found! 07i_decision_boundaries/X_SSNP_test_1adq.npy


In [7]:
clf = get_model_from_task(task)
clf_name = "SN10"
grid_size = 300


ssnp_done = False
out_name = f"{clf_name}_{grid_size}x{grid_size}_{dataset_name}"
out_file = os.path.join(output_dir, out_name + "_ssnp.npy")

if os.path.exists(out_file):
    img_grid_ssnp = np.load(
        os.path.join(output_dir, out_name + "_ssnp.npy")
    )
    prob_grid_ssnp = np.load(
        os.path.join(output_dir, out_name + "_ssnp_prob" + ".npy")
    )
    prob_grid_ssnp = prob_grid_ssnp.clip(max=0.8)

    # Background mode
    normalized = None
    suffix = "ssnp_background"

    decision_boundaries.results_to_png(
        np_matrix=img_grid_ssnp,
        prob_matrix=prob_grid_ssnp,
        grid_size=grid_size,
        n_classes=n_classes,
        real_points=normalized,
        max_value_hsv=0.8,
        dataset_name=dataset_name,
        classifier_name=clf_name,
        suffix=suffix,
        output_dir=output_dir,
    )

else:

    print("Defining grid around projected 2D points.")
    xmin_ssnp = np.min(X_ssnpgt[:, 0])
    xmax_ssnp = np.max(X_ssnpgt[:, 0])
    ymin_ssnp = np.min(X_ssnpgt[:, 1])
    ymax_ssnp = np.max(X_ssnpgt[:, 1])

    x_intrvls_ssnp = np.linspace(xmin_ssnp, xmax_ssnp, num=grid_size)
    y_intrvls_ssnp = np.linspace(ymin_ssnp, ymax_ssnp, num=grid_size)

    x_grid = np.linspace(0, grid_size - 1, num=grid_size)
    y_grid = np.linspace(0, grid_size - 1, num=grid_size)

    pts_ssnp = cartesian((x_intrvls_ssnp, y_intrvls_ssnp))
    pts_grid = cartesian((x_grid, y_grid))
    pts_grid = pts_grid.astype(int)

    batch_size = min(grid_size**2, 10000)

    # Can probably be moved lower, here not used
    img_grid_ssnp = np.zeros((grid_size, grid_size))
    prob_grid_ssnp = np.zeros((grid_size, grid_size))

    pbar = tqdm.tqdm(total=len(pts_ssnp))
    position = 0

    # Iterate over all points in the 2D-grid 
    while True:
        if position >= len(pts_ssnp):
            break

        pts_batch_ssnp = pts_ssnp[position : position + batch_size]
        image_batch_ssnp = ssnpgt.inverse_transform(pts_batch_ssnp)

        probs_ssnp = clf.predict(torch.tensor(image_batch_ssnp)).detach().numpy()
        alpha_ssnp = np.amax(probs_ssnp, axis=1)
        labels_ssnp = probs_ssnp.argmax(axis=1)

        pts_grid_batch = pts_grid[position : position + batch_size]

        img_grid_ssnp[
            pts_grid_batch[:, 0],  # First column
            pts_grid_batch[:, 1],  # Second column
        ] = labels_ssnp

        position += batch_size

        prob_grid_ssnp[
            pts_grid_batch[:, 0],  # First column
            pts_grid_batch[:, 1],  # Second column
        ] = alpha_ssnp

        pbar.update(batch_size)

    pbar.close()
    np.save(os.path.join(output_dir, f"{out_name}_ssnp.npy"), img_grid_ssnp)
    np.save(
        os.path.join(output_dir, f"{out_name}_ssnp_prob.npy"), prob_grid_ssnp
    )

    prob_grid_ssnp = prob_grid_ssnp.clip(max=0.8)

    # Background mode
    normalized = None
    suffix = "ssnp_background"

    decision_boundaries.results_to_png(
        np_matrix=img_grid_ssnp,
        prob_matrix=prob_grid_ssnp,
        grid_size=grid_size,
        n_classes=n_classes,
        real_points=normalized,
        max_value_hsv=0.8,
        dataset_name=dataset_name,
        classifier_name=clf_name,
        suffix=suffix,
        output_dir=output_dir,
    )

Saving vanilla. 300x300 - test_1adq - SN10
Saving alpha. 300x300 - test_1adq - SN10
Saving hsv. 300x300 - test_1adq - SN10
