# Main experiment (TF2.11)

This notebook produces the main experimental pipeline:
- Load CIFAR-10 and adversarial samples produced in `01` (Torch AutoAttack) and `03` (TF2 + ART APGD).
- Cache per-model predictions and compute ensemble entropy.
- Sweep entropy thresholds (core vs out-of-core) and report accuracy/coverage.
- Specialize **second-generation $\mathcal{U}^{(2)^\prime}$** on the out-of-core region and evaluate the full IMM pipeline.



## Preparations

### import libraries and set variables

In [None]:
from __future__ import annotations

import os
from pathlib import Path
from typing import Dict, List, Tuple, Any

import numpy as np
import tensorflow as tf
from tensorflow import keras

from sklearn.model_selection import train_test_split
import pandas as pd
import matplotlib.pyplot as plt

import source.cache_store as cache_store
import source.custom_specialization as custom_specialization
import source.utils as utils

from tensorflow.keras.models import load_model
from sklearn.model_selection import train_test_split
from tensorflow.keras.utils import to_categorical

from source.utils import load_yaml


PATHS = load_yaml("./configs/paths.yaml")
EXP   = load_yaml("./configs/exp.yaml")

data_root    = Path(PATHS["data_root"])
results_root = Path(PATHS["results_root"])
tf_model_dir = Path(PATHS["tf_model_dir"])
SPs_model_dir = Path(PATHS["SPs_model_dir"])
autoattack_out = Path(PATHS["autoattack_out"])
apgd_out     = Path(PATHS["apgd_out"])
cache_root   = Path(PATHS["cache_root"])

seed = int(EXP["seed"])
np.random.seed(seed)
tf.random.set_seed(seed)

# Ensure dirs
results_root.mkdir(parents=True, exist_ok=True)
cache_root.mkdir(parents=True, exist_ok=True)

print("tf_model_dir:", tf_model_dir)
print("autoattack_out:", autoattack_out)
print("apgd_out:", apgd_out)
print("cache_root:", cache_root)

In [None]:
tf.__version__

### Load CIFAR-10 (Keras) dataset

In [None]:
(x_all, y_all), (x_test, y_test) = tf.keras.datasets.cifar10.load_data()

y_all  = y_all.reshape(-1).astype(np.int64)
y_test = y_test.reshape(-1).astype(np.int64)

x_train, x_val, y_train, y_val = train_test_split(
    x_all, y_all, test_size=0.2, random_state=seed, stratify=y_all
)

x_train = x_train.astype(np.float32)/255.0
x_val   = x_val.astype(np.float32)/255.0
x_test  = x_test.astype(np.float32)/255.0

y_train = y_train.astype(np.int64)
y_val   = y_val.astype(np.int64)

### Load adversarial samples from 01 (Torch AutoAttack) and 03 (ART APGD)

In [None]:

samples: Dict[str, np.ndarray] = {}
y_true: Dict[str, np.ndarray] = {}

# Original
samples["original_train"] = x_train.copy()
samples["original_val"]   = x_val.copy()
samples["original_test"]  = x_test.copy()
y_true["original_train"] = y_train.copy()
y_true["original_val"]   = y_val.copy()
y_true["original_test"]  = y_test.copy()

# Torch AutoAttack outputs
aa_path = autoattack_out / "x_adv_test_std_l2_eps5.npy"
x_adv = np.load(aa_path).astype(np.float32)

if x_adv.ndim == 4 and x_adv.shape[1] == 3 and x_adv.shape[-1] != 3:
    x_adv = np.transpose(x_adv, (0, 2, 3, 1))
    
samples["AA_std_test"] = x_adv
y_true["AA_std_test"] = y_test.copy()

# ART APGD outputs -> key = "{tag}_{split}_{weak/strong}"
for p in sorted(apgd_out.glob("*.npz")):
    if 'from' in str(p):
        continue
    d = np.load(p)

    split = str(d["split"]).split('_')[0] if "split" in d else "unknown"
    # released sample has no tag.
    tag   = str(d["tag"])   if "tag"   in d else "U0"
    it    = int(d["max_iter"]) if "max_iter" in d else None

    adv_name = "APGD_weak" if it == 2 else "APGD_strong"
    sample_name = f"{tag}_{adv_name}_{split}" if tag else f"{adv_name}_{split}"

    samples[sample_name] = d["x_adv"].astype(np.float32)
    y_true[sample_name]  = d["y"].astype(np.int64).reshape(-1)

def make_union(tag: str, split: str):
    return (
        np.concatenate([samples[f"original_{split}"],
                        samples[f"{tag}_APGD_weak_{split}"],
                        samples[f"{tag}_APGD_strong_{split}"]], axis=0).astype(np.float32),
        np.concatenate([y_true[f"original_{split}"],
                        y_true[f"{tag}_APGD_weak_{split}"],
                        y_true[f"{tag}_APGD_strong_{split}"]], axis=0).astype(np.int64),
    )


samples["U0_union_train"], y_true["U0_union_train"] = make_union("U0", "train")
samples["U0_union_val"],   y_true["U0_union_val"]   = make_union("U0", "val")
samples["U0_union_test"],  y_true["U0_union_test"]  = make_union("U0", "test")


In [None]:
## Since released file has no 'tag', put tag manually.
## If it's generated via the notebooks, then this procedure is automatically skipped.

for p in sorted(apgd_out.glob("*.npz")):
    if not 'first_layer' in str(p) and not 'weak-SP' in str(p):
        continue
    d = np.load(p)
    if 'first_layer' in str(p):
        tag = 'U1'
    elif 'weak-SP' in str(p):
        tag = 'SPs'
    
    split = str(d["split"]).split('_')[0] if "split" in d else "unknown"
    it    = int(d["max_iter"]) if "max_iter" in d else None
    adv_name = "APGD_weak" if it == 2 else "APGD_strong"
    
    sample_name = f"{tag}_{adv_name}_{split}" if tag else f"{adv_name}_{split}"
    samples[sample_name] = d["x_adv"].astype(np.float32)
    y_true[sample_name]  = d["y"].astype(np.int64).reshape(-1)

### Load models and build first-layer committee (U0 âˆª U1)

In [None]:
from source.helpers04note import load_models_from_dir

u0 = load_models_from_dir(tf_model_dir, pattern="*.keras")
sps = load_models_from_dir(SPs_model_dir, pattern="*.keras")
u1 = {}
u1.update(u0)
u1.update(sps)

# committees
u0_keys = sorted(u0.keys())
u1_keys = sorted(u1.keys())
sps_keys = sorted(sps.keys())

### Caching

In [None]:
z = input("Type 'Y' if you want to clear the cache: ")
if z == 'Y':
    utils.clear_folder(str(cache_root), dry_run=False)
else:
    print("Cache not cleared.")
caching = cache_store.ResultStore(root=str(cache_root))


# predict-cache: U1 (== First_layer) only (contains U0 + SPs)
participating_models = u1


In [None]:

for sample_name, X in samples.items():
    Y = y_true[sample_name]
    print(f"{len(participating_models)} models caching preds on {sample_name}...", end="---")

    for model_name, model in participating_models.items():
        pred = model.predict(X, verbose=0)
        caching.set_pred(model_name, sample_name, pred)
    print("Done.")

    # entropy for U0
    print(f"caching entropy (U0) on {sample_name}...", end="---")
    P0 = np.stack([caching.get_pred(k, sample_name) for k in u0_keys], axis=0)  # (K,N,C)
    ent0 = utils.cross_entropy(P0)
    caching.set_entropy(u0_keys, sample_name, ent0)
    print("Done.")

    # entropy for U1 (= U0 + SPs)
    print(f"caching entropy (U1) on {sample_name}...", end="---")
    P1 = np.stack([caching.get_pred(k, sample_name) for k in u1_keys], axis=0)
    ent1 = utils.cross_entropy(P1)
    caching.set_entropy(u1_keys, sample_name, ent1)
    print("Done.\n")

## Find suitable total entropy threshold for each dataset

whose validation sample exists


In [None]:
# Assuming all raw predictions already cached

def ens_probs(keys: List[str], sample_name: str) -> np.ndarray:
    P = np.stack([caching.get_pred(k, sample_name) for k in keys], axis=0)  # (K,N,C)
    return P.mean(axis=0)  # (N,C)

def core_mask(keys: List[str], sample_name: str, ent_th: float) -> np.ndarray:
    ent = caching.get_entropy(keys, sample_name)
    return ent <= float(ent_th)

# Target acc is based on the (clean) validation accuracy of baseline (U0)
TARGET_ACC = 0.95
# Or..
# TARGET_ACC = np.mean(np.argmax(ens_probs(u0_keys, 'original_val'), axis=1) == y_true['original_val'])

# We could use searching algorithm
# But here we use a simple grid search
th_grid = np.arange(2.0, 1e-2, -0.01)


def find_th_for_core_acc(keys: List[str], sample_name: str, y: np.ndarray):
    '''
    return (entrotpy threshold, core acc, core coverage, core count)
    or (0, nan, 0, 0) if no threshold satisfies the target accuracy.
    It happens if the participating committee tells different answers at almost all samples.
    
    '''
    p = ens_probs(keys, sample_name)
    for th in th_grid:
        core = core_mask(keys, sample_name, th)
        if int(core.sum()) < 10:
            # If core size is too small, that means the entropy threshold is too strict.
            # and hence we don't need to investigate it further
            break
        acc = float(np.mean(np.argmax(p[core], axis=1) == y[core]))
        if acc >= TARGET_ACC:
            return float(th), acc, float(core.mean()), int(core.sum())
    
    return 0, float('nan'), 0, 0

ent_th_dict = {}
for sample_name in samples:
    if not sample_name.endswith("_val"):
        continue
    head = "_".join(sample_name.split("_")[:-1])
    out = find_th_for_core_acc(u1_keys, sample_name, y_true[sample_name])
    ent_th_dict[head] = out[0]

## Plot graphs : Entropy sweep of U1 committee on validation samples

In [None]:
def eval_entropy_fuzzy(
    committee_keys: List[str],
    sample_name: str,
    ent_th: float,
) -> Dict[str, Any]:
    '''
    Committee provides fuzzy evaluation based on entropy threshold.
    
    return
    -------
        "overall_acc": simple average accuracy (on whole given dataset),
        "low_entropy_count": core count,
        "low_entropy_acc": core accuracy,
        "high_entropy_count": out of core count,
        "high_entropy_acc": out of core accuracy,
    
    '''
    
    y = y_true[sample_name].reshape(-1)
    ent = caching.get_entropy(committee_keys, sample_name)
    low = ent <= float(ent_th)
    high = ~low

    p = ens_probs(committee_keys, sample_name)
    overall = float(np.mean(np.argmax(p, axis=1) == y))

    low_acc  = float(np.mean(np.argmax(p[low], axis=1) == y[low])) if low.any() else 0.0
    high_acc = float(np.mean(np.argmax(p[high], axis=1) == y[high])) if high.any() else 0.0

    return {
        "overall_acc": overall,
        "low_entropy_count": int(low.sum()),
        "low_entropy_acc": low_acc,
        "high_entropy_count": int(high.sum()),
        "high_entropy_acc": high_acc,
    }



In [None]:
def plot_ent_th_sweep(
    df : pd.DataFrame,
    overall_acc,
    sample_name="AA_RBST_test",
    outdir=None,
    vline_ent_ths: list[float] = [0.0414, 1.0],
    show_legend: bool = False,
    legend_outside: bool = True,

):
    ''' vline_ent_ths = [ent_th found on validation sample, high entropy threshold (arbitrary)]'''
    assert len(vline_ent_ths) == 2, "vline_ent_ths must have two thresholds (low and high)."

    df = df.copy()
    df["ent_th"] = df["ent_th"].astype(float)
    df = df.sort_values("ent_th").reset_index(drop=True)

    x = df["ent_th"].to_numpy()
    core_cnt = df["core count"].to_numpy(dtype=float)
    high_cnt = df["high count"].to_numpy(dtype=float)
    acc1 = df["acc1"].to_numpy(dtype=float)
    acc2 = df["acc2"].to_numpy(dtype=float)

    total_n = core_cnt + high_cnt
    N = float(total_n[0])

    def _save(fig, name):
        if outdir is None:
            return
        os.makedirs(outdir, exist_ok=True)
        fig.savefig(os.path.join(outdir, name), dpi=300)

    def nearest_idx(arr, val):
        arr = np.asarray(arr, dtype=float)
        return int(np.argmin(np.abs(arr - float(val))))

    fig, ax1 = plt.subplots()
    ax2 = ax1.twinx()

    ax1.plot(x, core_cnt / N, label="core coverage", linestyle="--", color="orange")
    ax1.plot(x, high_cnt / N, label="high coverage", linestyle="-.", color="blue")

    ax2.plot(x, acc1, linestyle="-", label="acc (core)", color="black")
    ax2.plot(x, acc2, linestyle="-", label="acc (high)", color="red")

    ax2.axhline(float(overall_acc), linestyle=":")
    ax2.annotate(
        f"overall acc\n{overall_acc:.4f}",
        (x[-1], float(overall_acc)),
        textcoords="offset points",
        xytext=(18, 8),
        ha="left",
        va="bottom",
        fontsize=9,
    )

    ax1.set_xlabel("Entropy Threshold")
    ax1.set_ylabel("Coverage")
    ax2.set_ylabel("Accuracy")
    ax1.set_ylim(0, 1.1)
    ax2.set_ylim(0, 1.1)

    ax1.grid(False)
    ax2.grid(False)
    plt.yticks([])

    core_loc = nearest_idx(x, vline_ent_ths[0])

    ax1.axvline(x[core_loc], linestyle="--", alpha=0.7)
    ax1.annotate(
        f"{vline_ent_ths[0]:.3f}",
        (x[core_loc], 0),
        textcoords="offset points",
        xytext=(6, -6),
        ha="left",
        va="top",
        fontsize=11,
    )

    ax1.plot([x[core_loc]], [core_cnt[core_loc] / N], marker="o", linestyle="None", color="black")
    ax1.annotate(
        f"{core_cnt[core_loc] / N:.4f}",
        (x[core_loc], core_cnt[core_loc] / N),
        textcoords="offset points",
        xytext=(-6, 4),
        ha="right",
        va="bottom",
        fontsize=11,
    )
    
    ax2.plot([x[core_loc]], [acc1[core_loc]], marker="o", linestyle="None", color="black")
    ax2.annotate(
        f"{acc1[core_loc]:.4f}",
        (x[core_loc], acc1[core_loc]),
        textcoords="offset points",
        xytext=(-6, 4),
        ha="right",
        va="bottom",
        fontsize=11,
    )

    ax1.set_xscale("log")

    fig.subplots_adjust(right=0.85, top=0.82)

    if show_legend:
        lines1, labels1 = ax1.get_legend_handles_labels()
        lines2, labels2 = ax2.get_legend_handles_labels()
        lines = lines1 + lines2
        labels = labels1 + labels2

        if legend_outside:
            ax1.legend(
                lines,
                labels,
                loc="lower center",
                bbox_to_anchor=(0.5, 1.02),
                ncol=len(labels),
                frameon=True,
                borderaxespad=0.0,
                handlelength=2.0,
                columnspacing=1.2,
            )
        else:
            ax1.legend(lines, labels, loc="best")

    _save(fig, f"{sample_name}_counts_and_acc_dualaxis.png")
    plt.show()

In [None]:
# adjust this for each figures.
# -----
LOW_ENT = 0.1
HIGH_ENT = 1.0
HIGH_TO_LOW_grid = 8
LOW_TO_0p04_grid = 6
VERY_LOW_PART_grid = 8

participating_models_str = 'u1'
participating_models = u1

legend_outside = False
show_legend = False
# -----

high_ents = np.linspace(1.05, LOW_ENT, HIGH_TO_LOW_grid)
low_ents = np.linspace(LOW_ENT, np.log(1.1)/np.log(10), LOW_TO_0p04_grid)
very_low_ents = np.linspace(np.log(1.1)/np.log(10), 0.001, VERY_LOW_PART_grid)

base_ent_ths = np.unique(np.concatenate([high_ents, low_ents, very_low_ents]))
base_ent_ths = np.sort(base_ent_ths)[::-1]


per_sample_ent_th_results = {}

sample_name = ''
head = "_".join(sample_name.split("_")[:-1])  # drop "_val"
vth = ent_th_dict.get(head, None)

# add per-sample vth into sweep thresholds
if vth is None:
    # If no chosen threshold, then it shows 0.0414 as the low threshold
    ent_ths = base_ent_ths
    low_and_high = [float(round(np.log(1.1)/np.log(10), 4)), HIGH_ENT]
else:
    ent_ths = np.unique(np.concatenate([base_ent_ths, np.array([vth], dtype=float)]))
    ent_ths = np.sort(ent_ths)[::-1]
    low_and_high = [float(round(vth, 4)), HIGH_ENT]
    
rows = []
for ent_th in ent_ths:
    ent_th = float(round(float(ent_th), 4))
    result = eval_entropy_fuzzy(
        participating_models=sorted(participating_models.keys()),
        sample_name=sample_name,
        ent_th=ent_th
    )
    per_sample_ent_th_results[(sample_name, ent_th)] = result
    ens = result
    rows.append({
        "ent_th": ent_th,
        "core count": ens["low_entropy_count"],
        "acc1": ens["low_entropy_acc"],
        "high count": ens["high_entropy_count"],
        "acc2": ens["high_entropy_acc"],
    })
    

df_sweep = pd.DataFrame(rows)
overall_acc = ens["overall_acc"]

outdir = f'./results/figures/{sample_name}_{participating_models_str}_val'
plot_ent_th_sweep(
    df_sweep,
    overall_acc=overall_acc,
    sample_name=sample_name,
    outdir=outdir,
    vline_ent_ths=low_and_high,
    show_legend=show_legend,
    legend_outside=legend_outside
)

## Experiment 1.

Compute ensemble and single model entropies on each white-box attack against themselves with weak/strong configuration. Then evaluate ensemble and single model performance on each white-box attack.

In [None]:
# Single model entropy stats (clean vs adv) aggregated by family (resnet/vgg)
truth = y_true["original_test"].reshape(-1)

ADV_DIR = apgd_out

# ------
ent_th = 0.95
# ------
for STRONG in [True, False]:
    resnet_res, vgg_res = [], []

    for mn, m in u0.items():  # u0 baseline models
        stem = Path(mn).stem

        if STRONG:
            p = ADV_DIR / f"test_apgdce_l2_eps0p7_step0p2_it10_rinit4_from_{stem}.npz"
        else:
            p = ADV_DIR / f"test_apgdce_l2_eps0p5_step0p2_it2_rinit4_from_{stem}.npz"

        X_adv = np.load(p)["x_adv"].astype(np.float32)

        # clean
        clean_pred = caching.get_pred(mn, "original_test")
        clean_lab = np.argmax(clean_pred, axis=1)
        clean_ent = -np.sum(clean_pred * np.log(clean_pred + 1e-12) / np.log(10), axis=1)

        # adv
        adv_pred = caching.get_pred(mn, "U0_APGD_weak_test" if not STRONG else "U0_APGD_strong_test")
        adv_lab = np.argmax(adv_pred, axis=1)
        adv_ent = -np.sum(adv_pred * np.log(adv_pred + 1e-12) / np.log(10), axis=1)

        adv_core = adv_ent <= ent_th
        cln_core = clean_ent <= ent_th

        adv_cnt = int(adv_core.sum())
        cln_cnt = int(cln_core.sum())

        adv_cov = adv_cnt / X_adv.shape[0]
        cln_cov = cln_cnt / samples["original_test"].shape[0]

        adv_acc = float(np.mean(adv_lab[adv_core] == truth[adv_core])) if adv_cnt > 0 else 0.0
        cln_acc = float(np.mean(clean_lab[cln_core] == truth[cln_core])) if cln_cnt > 0 else 0.0

        row = [adv_cov, cln_cov, adv_acc, cln_acc]
        (resnet_res if "resnet" in mn else vgg_res).append(row)
    print(f"STRONG = {STRONG}")
    print("adv_cov, clean_cov, adv_acc, clean_acc")

    print("resnet")
    R = np.array(resnet_res, dtype=float)
    print(R.mean(axis=0))
    print(R.std(axis=0))

    print("vgg")
    V = np.array(vgg_res, dtype=float)
    print(V.mean(axis=0))
    print(V.std(axis=0))

In [None]:
# U0, U1, Sps stats

def core_stats(ens, X, y, ent_th, sample_name=""):
    core_preds, core, _ = ens.core_preds(X, sample_name=sample_name, ent_th=float(ent_th))
    acc = float(np.mean(np.argmax(core_preds, axis=-1) == y[core])) if cnt > 0 else 0.0
    cov = float(core.mean())
    return acc, cov

truth = y_true["original_test"].reshape(-1)
ent_th = 0.95

from source.helpers04note import ProbAverageEnsembleFromProbModels

U0 = ProbAverageEnsembleFromProbModels(u0)
SPs = ProbAverageEnsembleFromProbModels(sps)
U1 = ProbAverageEnsembleFromProbModels(u1)

cases = [
    ("U0_clean_test",   U0,  samples["original_test"]),
    ("U0_weak_test",    U0,  samples['U0_weak_test']),
    ("U0_strong_test",  U0,  samples['U0_strong_test']),
    ("SPs_weak_test",   SPs, samples['SPs_weak_test']),
    ("SPs_strong_test", SPs, samples['SPs_strong_test']),
    ("U1_weak_test",    U1,  samples['U1_weak_test']),
    ("U1_strong_test",  U1,  samples['U1_strong_test']),
]

for name, ens, X in cases:
    acc, cov, cnt = core_stats(ens, X, truth, ent_th, sample_name=name)
    print(f"{name:10s} | core_acc {acc:.4f} | core_cov {cov:.4f}")
    print()

## Experiment 2 and 3

Evaluate Logifold / IMM (if second layer exists)

In [None]:
First_layer = u1.copy()
baseline = u0
model_dict = u0.copy()
specialized_model_dict = sps

# baseline = U0, first layer = U1 defined above cell.
baseline = ProbAverageEnsembleFromProbModels(baseline)
first_layer = ProbAverageEnsembleFromProbModels(First_layer)
experiment_description = '''
Baseline : Res + VGG
first layer : Res
Second layer : Res
union : clean + weak + strong
'''     
def log_text(out_txt, text: str):
    os.makedirs(os.path.dirname(out_txt), exist_ok=True)
    with open(out_txt, "a") as f:
        f.write(text + "\n")
log_text("./results/logs/evaluation_results.txt", f"New Experiment started\nexperimental description:\n{experiment_description}")
from source.utils import specialize


### Get second layer

In [None]:
# ------

'''
sample name list

U0_APGD_weak_{split}
U0_APGD_strong_{split}
U0_union_{split}
original_{split}
...
U0 can be replaced by U1 or SPs
'''
sample_name = "U0_union_test"
# ------
sample = samples[sample_name]


In [None]:

tokens = sample_name.split('_')
split = tokens[-1]

sample_name_head = '_'.join(tokens[:-1])
sample_val = sample_name_head + '_val'
sample_train = sample_name_head + '_train'
sample_test = sample_name_head + '_test'

truth = y_true[sample_name]
ent_first = caching.get_entropy(sorted(First_layer.keys()), sample_name)
ent_th = round(ent_th_dict[sample_name_head],3)
str_ent_th = str(ent_th).replace('.', 'p')
out_of_core_th = ent_th

print(sample_name, end = ' | ')
print('ent_th:' , ent_th, end = ' | ')
print('----------------------------------------')
print('-----------------------------------------------------------------------')


## Get Specialized 2nd layer experts
## Prepare for OoC samples
s = np.concatenate([samples[sample_train], samples[sample_val]], axis=0)
t = np.concatenate([y_true[sample_train], y_true[sample_val]], axis=0)
new_ent = np.concatenate([caching.get_entropy(sorted(First_layer.keys()), sample_train), caching.get_entropy(sorted(First_layer.keys()), sample_val)], axis = 0)
OoC_idx = new_ent>out_of_core_th
OoC_s = s.copy()[OoC_idx]
OoC_t = t.copy()[OoC_idx]
if OoC_t.shape[0] < 0.1*t.shape[0]:
    raise ValueError(f'[WARN] out_of_core_th={out_of_core_th:.3e} -> OoC size too small. We do not need the second layer for this sample. OoC size is', OoC_t.shape[0])
        
OoC_x_train, OoC_x_val, OoC_y_train, OoC_y_val = train_test_split(
    OoC_s, OoC_t,
    test_size=0.1,
    random_state=42,
    stratify=OoC_t
)

print(f'For 2nd layer experts, we prepared {OoC_y_train.shape} training samples and {OoC_y_val.shape} validation samples')
Second_layer={}

adv_sample_name = sample_name_head + '_OoC_E1E2'+'_'+str_ent_th
for model_name, m in model_dict.items():
    
    # ResNet only
    if 'resnet' not in model_name:
        continue
    
    model_name = model_name[:-6]
    model_name = model_name.split('_')[0] if model_name[0] == 'v' else model_name.split('_')[0] + model_name.split('_')[-1] + 'v'+model_name.split('_')[1][-1]
    model_path = Path('./data/specialized_models/') / Path(f"second_layer/{model_name}_{adv_sample_name}-SP.keras")
    if model_path.exists():
        Second_layer[f"{model_name}_{adv_sample_name}_2nd"] = load_model(model_path)
    else:
        info = specialize(
                (OoC_x_train, to_categorical(OoC_y_train,10)),
                (OoC_x_val, to_categorical(OoC_y_val,10)),
                m,
                new_model_path=f"second_layer/{model_name}_{adv_sample_name}-SP",
            verbose = 0)

        Second_layer[f"{model_name}_{adv_sample_name}_2nd"] = info[1]

## Reload baseline models
model_dict = {}
for f_name in sorted(os.listdir("./data/models/")):
    if f_name.endswith('keras'):
        m = load_model(f"./data/models/{f_name}")
        model_dict[f_name] = m
        
First_layer = {}
First_layer.update(model_dict)
First_layer.update(specialized_model_dict)


### evaluation

In [None]:

## baseline results

p_baseline = ens_probs(sorted(u0.keys()), sample_name)
acc_baseline = np.mean(np.argmax(p_baseline, axis=-1) == truth)

## First layer results
p_first = ens_probs(sorted(u1.keys()), sample_name)
acc_first = np.mean(np.argmax(p_first, axis=-1) == truth)

first_core_loc = core_mask(sorted(u1.keys()), sample_name, ent_th)
first_core_ent = ent_first[first_core_loc]

core_p_first = p_first[first_core_loc]
core_truth = truth[first_core_loc]
core_acc_first = np.mean(np.argmax(core_p_first, axis=-1) == core_truth)
OutCore_acc_first = np.mean(np.argmax(p_first[~first_core_loc], axis=-1) == truth[~first_core_loc])

## Wrap all_ens and second_layer_ens
All_layer = {}
All_layer.update(First_layer)
All_layer.update(Second_layer)
all_ens = ProbAverageEnsembleFromProbModels(All_layer)
second_layer_ens = ProbAverageEnsembleFromProbModels(Second_layer)

## All ensemble results
p_all = all_ens.probs(sample, sample_name)
acc_all = float(np.mean(np.argmax(p_all, axis=-1) == truth))

## Second results
p_second = second_layer_ens.probs(sample, sample_name)
acc_second = float(np.mean(np.argmax(p_second, axis=-1) == truth))

p_second_OutCore = p_second[~first_core_loc]
acc_second_OutCore = float(np.mean(np.argmax(p_second_OutCore, axis=-1) == truth[~first_core_loc]))

preds = []

for model_name in Second_layer.keys():
    if (cache_root/sample_name/model_name).exists():
        pred = caching.get_pred(model_name,sample_name)
    else:
        pred = Second_layer[model_name].predict(samples[sample_name])
        caching.set_pred(model_name, sample_name, pred)
    preds.append(pred)
preds = np.array(preds)
ent_second = utils.cross_entropy(preds, 10)
ent_second = ent_second[~first_core_loc]
second_core = ent_second<out_of_core_th
p_second_core = p_second_OutCore[second_core]
acc_second_core_of_OutCore = np.mean(np.argmax(p_second_core, axis=-1) == truth[~first_core_loc][second_core])

## Gated results (IMM)
acc_gated = np.sum(np.argmax(p_first[first_core_loc], axis=-1) == truth[first_core_loc]) + np.sum(np.argmax(p_second_OutCore, axis=-1) == truth[~first_core_loc]) 
acc_gated /= sample.shape[0]

# See behaviour on AutoAttack samples
sample_name_aa = 'AA_std_test'
sample_aa = samples[sample_name_aa]
truth_aa = y_true[sample_name_aa]

aa_acc_baseline = float(np.mean(np.argmax(baseline.probs(sample_aa, sample_name_aa), axis=-1) == truth_aa))


aa_p_first_layer = first_layer.probs(sample_aa, sample_name_aa)
aa_acc_first = float(np.mean(np.argmax(aa_p_first_layer, axis=-1) == truth_aa))
aa_first_ent = caching.get_entropy(sorted(First_layer.keys()), sample_name_aa)
aa_first_core_loc = aa_first_ent<out_of_core_th

aa_core_p_first = aa_p_first_layer[aa_first_core_loc]
aa_core_truth = truth_aa[aa_first_core_loc]
aa_core_acc_first = np.mean(np.argmax(aa_core_p_first, axis=-1) == aa_core_truth)

aa_p_second = second_layer_ens.probs(sample_aa, sample_name_aa)
aa_acc_second = float(np.mean(np.argmax(aa_p_second, axis=-1) == truth_aa))

aa_p_second_OutCore = aa_p_second[~aa_first_core_loc]
aa_acc_second_OutCore = float(np.mean(np.argmax(aa_p_second_OutCore, axis=-1) == truth_aa[~aa_first_core_loc])) 
preds = []
for model_name in Second_layer.keys():
    if (cache_root/sample_name_aa/model_name).exists():
        pred = caching.get_pred(model_name,sample_name_aa)
    else:
        pred = Second_layer[model_name].predict(samples[sample_name_aa])
        caching.set_pred(model_name, sample_name_aa, pred)
    preds.append(pred)
preds = np.array(preds)
aa_ent_second = utils.cross_entropy(preds, 10)
aa_ent_second = aa_ent_second[~aa_first_core_loc]
aa_second_core = aa_ent_second<out_of_core_th
aa_p_second_core = aa_p_second_OutCore[aa_second_core]
aa_acc_second_core_of_OutCore = np.mean(np.argmax(aa_p_second_core, axis=-1) == truth_aa[~aa_first_core_loc][aa_second_core])

aa_acc_gated = np.sum(np.argmax(aa_p_first_layer[aa_first_core_loc], axis=-1) == truth_aa[aa_first_core_loc]) + np.sum(np.argmax(aa_p_second_OutCore, axis=-1) == truth_aa[~aa_first_core_loc]) 
aa_acc_gated /= sample_aa.shape[0]

aa_acc_all = float(np.mean(np.argmax(all_ens.probs(sample_aa, sample_name_aa), axis=-1) == truth_aa))


txt = "\n".join([
    "----------------------------------------",
    f"Sample name : {sample_name} (size : {sample.shape[0]}), out_of_core_th : {out_of_core_th}",
    f"First layer acc : {acc_first}",
    f"Baseline acc : {acc_baseline}",
    f"Second layer acc : {acc_second}",
    f"All members ensemble acc : {acc_all}",
    f"Entropy threshold for first layer : {out_of_core_th}",
    f"First layer core acc : {core_acc_first} (size : {np.sum(first_core_loc)})",
    f"First layer OutCore acc : {OutCore_acc_first} (size : {np.sum(~first_core_loc)})",
    f"First layer core entropy : {np.sum(first_core_ent)}",
    f"First layer entropy : {np.sum(ent_first)}",
    f"First layer OutCore entropy : {np.sum(ent_first[~first_core_loc])}",
    f"First layer core coverage : {np.sum(first_core_loc) / sample.shape[0]}",
    f"Second layer acc on OutCore from first layer  : {acc_second_OutCore} (size : {np.sum(~first_core_loc)})",
    f"Second layer core acc on OutCore from first layer : {acc_second_core_of_OutCore} (size : {np.sum(second_core)})",
    f"Second layer core entropy on OutCore from first layer : {np.sum(ent_second[second_core])}",
    f"Second layer entropy on OutCore from first layer : {np.sum(ent_second)}",
    f"out core of second layer count : {np.sum(~second_core)}",
    f"Second layer OutCore entropy on OutCore from first layer : {np.sum(ent_second[~second_core])}",
    f"Second layer core coverage on OutCore from first layer : {np.sum(second_core) / ent_second.shape[0]}",
    f"Gated ensemble acc : {acc_gated}",
    f"total entropy = first layer core ent + second layer ent on OutCore from first layer\n{np.sum(first_core_ent)} + {np.sum(ent_second)} = {np.sum(first_core_ent)+np.sum(ent_second)}",
    f"total core entropy (first layer) + core entropy (second layer) : {np.sum(first_core_ent) + np.sum(ent_second[second_core])}",
    f"maximum acc among baseline, first layer, second layer : {max(acc_baseline, acc_first, acc_second)}",
    f"difference between gated ensemble and maximum acc : {acc_gated - max(acc_baseline, acc_first, acc_second)}",
    "",
    "AutoAttack result",
    f"AA Sample name : {sample_name_aa} (size : {sample_aa.shape[0]}), out_of_core_th : {out_of_core_th}",
    f"First layer acc : {aa_acc_first}",
    f"Baseline acc : {aa_acc_baseline}",
    f"Second layer acc : {aa_acc_second}",
    f"All members ensemble acc : {aa_acc_all}",
    f"Entropy threshold for first layer : {out_of_core_th}",
    f"First layer core acc : {aa_core_acc_first} (size : {np.sum(aa_first_core_loc)})",
    f"Second layer acc on OutCore from first layer  : {aa_acc_second_OutCore} (size : {np.sum(~aa_first_core_loc)})",
    f"Second layer core acc on OutCore from first layer : {aa_acc_second_core_of_OutCore} (size : {np.sum(aa_second_core)})",
    f"Gated ensemble  acc : {aa_acc_gated}",
    f"First layer entropy : {np.sum(aa_first_ent)}",
    f"First layer core entropy : {np.sum(aa_first_ent[aa_first_core_loc])}",
    f"Total entropy = first layer core entropy + 2nd layer ent on outcore : {np.sum(aa_first_ent[aa_first_core_loc])} + {np.sum(aa_ent_second)}"
])

log_text("./results/logs/evaluation_results.txt", txt)

In [None]:

print('----------------------------------------')
print('----------------------------------------')
print(f"Sample name : {sample_name} (size : {sample.shape[0]}), out_of_core_th : {out_of_core_th}")
print(f"First layer acc : {acc_first}")
print(f"Baseline acc : {acc_baseline}")
print(f"Second layer acc : {acc_second}")
print(f"All members ensemble acc : {acc_all}")
print(f"Entropy threshold for first layer : {out_of_core_th}")
print(f"First layer core acc : {core_acc_first} (size : {np.sum(first_core_loc)})")
print(f"First layer OutCore acc : {OutCore_acc_first} (size : {np.sum(~first_core_loc)})")
print(f"First layer core entropy : {np.sum(first_core_ent)}")
print(f"First layer entropy : {np.sum(ent_first)}")
print(f"First layer OutCore entropy : {np.sum(ent_first[~first_core_loc])}")
print(f"First layer core coverage : {np.sum(first_core_loc) / sample.shape[0]}")
print(f"Second layer acc on OutCore from first layer  : {acc_second_OutCore} (size : {np.sum(~first_core_loc)})")
print(f"Second layer core acc on OutCore from first layer : {acc_second_core_of_OutCore} (size : {np.sum(second_core)})")
print(f"Second layer core entropy on OutCore from first layer : {np.sum(ent_second[second_core])}")
print(f"Second layer entropy on OutCore from first layer : {np.sum(ent_second)}")
print(f"Second layer OutCore entropy on OutCore from first layer : {np.sum(ent_second[~second_core])}")
print(f"Second layer core coverage on OutCore from first layer : {np.sum(second_core) / ent_second.shape[0]}")
print(f"out core of second layer count : {np.sum(~second_core)}")
print(f"Gated ensemble acc : {acc_gated}")
print(f"total entropy = first layer core ent + second layer ent on OutCore from first layer\n{np.sum(first_core_ent)} + {np.sum(ent_second)} = {np.sum(first_core_ent)+np.sum(ent_second)}")
print("total core entropy (first layer) + core entropy (second layer) : ", np.sum(first_core_ent) + np.sum(ent_second[second_core]))
print(f"maximum acc among baseline, first layer, second layer : {max(acc_baseline, acc_first, acc_second)}")
print(f"difference between gated ensemble and maximum acc : {acc_gated - max(acc_baseline, acc_first, acc_second)}")

print()
print('AutoAttack result')
print(f"AA Sample name : {sample_name_aa} (size : {sample_aa.shape[0]}), out_of_core_th : {out_of_core_th}")
print(f"First layer acc : {aa_acc_first}")
print(f"Baseline acc : {aa_acc_baseline}")
print(f"Second layer acc : {aa_acc_second}")
print(f"All members ensemble acc : {aa_acc_all}")
print(f"Entropy threshold for first layer : {out_of_core_th}")
print(f"First layer core acc : {aa_core_acc_first} (size : {np.sum(aa_first_core_loc)})")
print(f"Second layer acc on OutCore from first layer  : {aa_acc_second_OutCore} (size : {np.sum(~aa_first_core_loc)})")
print(f"Second layer core acc on OutCore from first layer : {aa_acc_second_core_of_OutCore} (size : {np.sum(aa_second_core)})")
print(f"Gated ensemble  acc : {aa_acc_gated}")
print()
