In [None]:
%load_ext jupyter_black
%load_ext autoreload
%autoreload 2

In [None]:
from pathlib import Path
import turbo_broccoli as tb

OUTPUT_DIR = Path("out.test")

RESULT_FILE_PATH = (
    OUTPUT_DIR
    / "microsoft-cats_vs_dogs"
    / "alexnet"
    / "results.0e37da4ab09345e4a1eadfe4aef78bbd.json"
)

RESULT_FILE_PATH = (
    OUTPUT_DIR
    / "timm-eurosat-rgb"
    / "timm-vgg11.tv_in1k"
    / "results.6e99633302094c71b965b81b4d128df0.json"
)

RESULT_FILE_PATH = (
    OUTPUT_DIR
    / "cifar10"
    / "microsoft-resnet-18"
    / "results.cdb406564f0d4da98c3b4c0f54958d62.json"
)

results = tb.load(RESULT_FILE_PATH)
results

In [None]:
from lcc.classifiers import get_classifier_cls

MODEL_NAME = results["model"]["name"]
CKPT_PATH = OUTPUT_DIR / results["training"]["best_checkpoint"]["path"]

cls = get_classifier_cls(MODEL_NAME)
model = cls.load_from_checkpoint(CKPT_PATH)
model.to("cuda")
model.hparams

In [None]:
from lcc.datasets import HuggingFaceDataset

DATASET_NAME = results["dataset"]["name"]

dl_kw = {"batch_size": 64, "num_workers": 8}
dataset = HuggingFaceDataset(
    dataset_name=DATASET_NAME,
    fit_split=results["dataset"]["train_split"],
    val_split=results["dataset"]["val_split"],
    test_split=results["dataset"]["test_split"],
    label_key=results["dataset"]["label_key"],
    train_dl_kwargs=dl_kw,
    image_processor=cls.get_image_processor(MODEL_NAME),
)
dataset.setup("fit")
dl = dataset.train_dataloader()
y_true = dataset.y_true("train")

In [None]:
from tqdm.notebook import tqdm
import torch

N_SAMPLES = 4000

SUBMODULES = [
    # "model.features.0",
    # "model.features.3",
    "model.features.6",
    "model.features.8",
    "model.features.10",
    "model.classifier.1",
    "model.classifier.4",
    # "model.classifier.6",
]

SUBMODULES = [
    # "model.features.0",
    # "model.features.3",
    "model.features.6",
    # "model.features.8",
    "model.features.11",
    # "model.features.13",
    "model.features.16",
    "model.features.18",
    "model.pre_logits.fc1",
    # "model.head",
]

SUBMODULES = [
    # "model.resnet.embedder",
    # "model.resnet.encoder.stages.0",
    "model.resnet.encoder.stages.1.layers.0.layer.1.convolution",
    "model.resnet.encoder.stages.2.layers.1.layer.1.convolution",
    "model.resnet.encoder.stages.3.layers.0.layer.0.convolution",
    "model.resnet.encoder.stages.3.layers.0.layer.1.convolution",
    "model.resnet.encoder.stages.3.layers.1.layer.0.convolution",
    # "model.resnet.encoder.stages.3.layers.1.layer.1.convolution",
    # "model.classifier",
]

n_seen, _data = 0, []
for batch in tqdm(dl):
    out = {}
    model.forward_intermediate(batch, SUBMODULES, out)
    out = {k: v.flatten(1) for k, v in out.items()}
    _data.append(out)
    n_seen += len(next(iter(out.values())))
    if n_seen >= N_SAMPLES:
        break

z = {sm: torch.cat([r[sm] for r in _data])[:N_SAMPLES] for sm in SUBMODULES}
for k, v in z.items():
    print(k, ":", v.shape)

In [None]:
from cuml import UMAP

from lcc.utils import to_array

e = {}
for sm, u in tqdm(z.items()):
    e[sm] = UMAP().fit_transform(to_array(u))

In [None]:
from bokeh.io import output_notebook

output_notebook()

In [None]:
import bokeh.plotting as bk
import bokeh.layouts as bkl
import bokeh.palettes as bkp

from lcc.plotting import class_scatter
from sklearn.preprocessing import RobustScaler

SIZE = 250

figures = []
for sm, u in e.items():
    fig = bk.figure(width=SIZE, height=SIZE)
    fig.toolbar_location = None
    class_scatter(
        fig,
        u,
        y_true[:N_SAMPLES],
        grid_visible=False,
    )
    figures.append(fig)

fig = bkl.row(figures)
bk.show(fig)

In [None]:
from lcc.plotting import export_png

export_png(fig, f"{MODEL_NAME.replace('/', '-')}_{DATASET_NAME.replace('/', '-')}.png")