In [1]:
import os
import math
import json
import time
import random
from dataclasses import dataclass, asdict
from pathlib import Path
from typing import Callable, Dict, List, Tuple, Optional

import numpy as np
import pandas as pd

import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.utils.data import Dataset, DataLoader, random_split

import torchvision
import torchvision.transforms as T

print("torch:", torch.__version__)
print("torchvision:", torchvision.__version__)
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print("device:", device)

torch: 2.9.0+cpu
torchvision: 0.24.0+cpu
device: cpu


In [None]:
# MNIST auxiliary-logit distillation: gradient-alignment logging
#
# This notebook implements the MNIST MLP setup from *Subliminal Learning*:
# - Teacher: train on MNIST with CE using **first 10 logits only**.
# - Student: distill **aux logits only** (last `m` logits) from the teacher using KL on **noise images**.
#
# It logs (at a configurable interval) the **trait loss**, **distill loss**, **gradient inner product**, and
# **cosine similarity**, and saves **one CSV per seed**.

@dataclass
class ExperimentConfig:
    # Runs
    seeds: List[int] = None  # e.g., [1,2,...]
    out_dir: str = "./runs_mnist_subliminal_crt_period"

    # Data
    batch_size: int = 1024
    num_workers: int = 0  # keep 0 for strict determinism
    audit_size: int = 10_000
    noise_dataset_size: int = 60_000

    # Model (MLP from paper)
    hidden_dim: int = 256
    aux_m: int = 3

    # Training
    teacher_epochs: int = 5
    student_epochs: int = 5
    lr_teacher: float = 3e-4
    lr_student: float = 3e-4

    # Logging
    metrics_every_n_steps: int = 50
    audit_batches_for_grad: Optional[int] = 1
    # If None, use the entire audit loader for trait gradients (slower but closer to full-set grads).

    trait_acc_every_n_steps: int = 50
    trait_acc_max_batches: Optional[int] = 1
    # If None, use the entire like gradient.

cfg = ExperimentConfig(
    seeds=list(range(1, 11)),
    out_dir="./runs_mnist_subliminal_crt_period",
    batch_size=1024,
    num_workers=0,
    audit_size=10_000,
    noise_dataset_size=60_000,
    hidden_dim=256,
    aux_m=3,
    teacher_epochs=5,
    student_epochs=5,
    lr_teacher=3e-4,
    lr_student=3e-4,
    metrics_every_n_steps=1,
    audit_batches_for_grad=None,
    trait_acc_every_n_steps = 1,
    trait_acc_max_batches=None,
)

Path(cfg.out_dir).mkdir(parents=True, exist_ok=True)
cfg

def set_global_seed(seed: int, deterministic: bool = True) -> None:
    """Seed python, numpy, and torch. Optionally enable deterministic algorithms."""
    os.environ["PYTHONHASHSEED"] = str(seed)
    random.seed(seed)
    np.random.seed(seed)
    torch.manual_seed(seed)
    torch.cuda.manual_seed_all(seed)

    if deterministic:
        torch.backends.cudnn.benchmark = False
        torch.backends.cudnn.deterministic = True
        try:
            torch.use_deterministic_algorithms(True)
        except Exception as e:
            print("Warning: could not enable full deterministic algorithms:", e)

def make_torch_generator(seed: int) -> torch.Generator:
    g = torch.Generator()
    g.manual_seed(seed)
    return g

class NoiseImages(Dataset):
    """Deterministic noise dataset: each index produces a reproducible noise image."""
    def __init__(self, length: int, seed: int, shape=(1, 28, 28), dist: str = "normal"):
        self.length = int(length)
        self.seed = int(seed)
        self.shape = tuple(shape)
        self.dist = dist

    def __len__(self) -> int:
        return self.length

    def __getitem__(self, idx: int):
        # Per-index deterministic generation.
        g = torch.Generator()
        g.manual_seed(self.seed * 1_000_000 + int(idx))
        if self.dist == "normal":
            x = torch.randn(self.shape, generator=g)
        elif self.dist == "uniform":
            x = torch.rand(self.shape, generator=g) * 2 - 1
        else:
            raise ValueError(f"Unknown dist: {self.dist}")
        # Dummy label (unused)
        y = 0
        return x, y

def get_mnist_datasets(root: str = "./data"):
    transform = T.Compose([T.ToTensor()])
    train = torchvision.datasets.MNIST(root=root, train=True, download=True, transform=transform)
    test = torchvision.datasets.MNIST(root=root, train=False, download=True, transform=transform)
    return train, test

def split_train_audit(train_ds, audit_size: int, seed: int):
    n = len(train_ds)
    audit_size = min(int(audit_size), n)
    train_size = n - audit_size
    g = make_torch_generator(seed)
    train_split, audit_split = random_split(train_ds, [train_size, audit_size], generator=g)
    return train_split, audit_split

def make_loader(ds, batch_size: int, shuffle: bool, seed: int, num_workers: int = 0):
    # Deterministic shuffling via DataLoader generator.
    g = make_torch_generator(seed)
    return DataLoader(
        ds,
        batch_size=batch_size,
        shuffle=shuffle,
        num_workers=num_workers,
        pin_memory=torch.cuda.is_available(),
        generator=g,
        drop_last=False,
    )

class MLPClassifier(nn.Module):
    """MLP from the Subliminal Learning MNIST experiment: (784, 256, 256, 10+m) with ReLU."""
    def __init__(self, hidden_dim: int = 256, aux_m: int = 3):
        super().__init__()
        self.hidden_dim = hidden_dim
        self.aux_m = aux_m
        self.fc1 = nn.Linear(28 * 28, hidden_dim)
        self.fc2 = nn.Linear(hidden_dim, hidden_dim)
        self.fc3 = nn.Linear(hidden_dim, 10 + aux_m)

    def forward(self, x: torch.Tensor) -> torch.Tensor:
        x = x.view(x.size(0), -1)
        x = F.relu(self.fc1(x))
        x = F.relu(self.fc2(x))
        x = self.fc3(x)
        return x

def build_model_mlp(cfg: ExperimentConfig) -> nn.Module:
    """Swap model architecture by changing this builder (or passing another builder to the runner)."""
    return MLPClassifier(hidden_dim=cfg.hidden_dim, aux_m=cfg.aux_m)

def logits_regular(logits: torch.Tensor) -> torch.Tensor:
    return logits[:, :10]

def logits_aux(logits: torch.Tensor, aux_m: int) -> torch.Tensor:
    return logits[:, 10:10 + aux_m]

@torch.no_grad()
def accuracy_on_loader(model: nn.Module, loader: DataLoader, device: torch.device) -> float:
    model.eval()
    correct, total = 0, 0
    for x, y in loader:
        x = x.to(device)
        y = y.to(device)
        logits = logits_regular(model(x))
        pred = logits.argmax(dim=1)
        correct += (pred == y).sum().item()
        total += y.numel()
    return correct / max(total, 1)

@torch.no_grad()
def accuracy_on_loader_e3(
    model: nn.Module,
    loader: DataLoader,
    device: torch.device,
    max_batches: Optional[int] = None,
) -> float:
    model.eval()
    correct, total = 0, 0
    for bi, (x,y)in enumerate(loader):
        if max_batches is not None and bi >= max_batches:
            break
        x = x.to(device)
        y = y.to(device)
        logits = logits_regular(model(x))
        pred = logits.argmax(dim=1)
        correct += (pred == y).sum().item()
        total += y.numel()
    return correct / max(total, 1)

def flatten_grads_from_params(params: List[torch.nn.Parameter]) -> torch.Tensor:
    """Flatten gradients already stored in .grad (e.g., after backward())."""
    flats = []
    for p in params:
        if p.grad is None:
            flats.append(torch.zeros_like(p).view(-1))
        else:
            flats.append(p.grad.detach().view(-1))
    return torch.cat(flats)

def flatten_grads_tuple(grads: Tuple[torch.Tensor, ...]) -> torch.Tensor:
    """Flatten gradients returned by autograd.grad()."""
    flats = [g.detach().view(-1) for g in grads]
    return torch.cat(flats)

def cosine_similarity(a: torch.Tensor, b: torch.Tensor, eps: float = 1e-12) -> float:
    denom = (a.norm() * b.norm()).clamp_min(eps)
    return float((a @ b) / denom)

def train_teacher(
    model: nn.Module,
    train_loader: DataLoader,
    test_loader: DataLoader,
    cfg: ExperimentConfig,
    device: torch.device,
) -> Dict[str, float]:
    """Train teacher on MNIST CE using regular logits only."""
    model = model.to(device)
    model.train()
    opt = torch.optim.Adam(model.parameters(), lr=cfg.lr_teacher)

    for epoch in range(cfg.teacher_epochs):
        for x, y in train_loader:
            x = x.to(device)
            y = y.to(device)
            opt.zero_grad(set_to_none=True)
            logits = logits_regular(model(x))
            loss = F.cross_entropy(logits, y)
            loss.backward()
            opt.step()

    teacher_acc = accuracy_on_loader(model, test_loader, device)
    return {"teacher_test_acc": float(teacher_acc)}

def compute_trait_loss_and_grad(
    student: nn.Module,
    audit_loader: DataLoader,
    cfg: ExperimentConfig,
    device: torch.device,
) -> Tuple[float, torch.Tensor]:
    """
    Compute trait loss + gradient wrt student params over `audit_batches_for_grad` batches
    (or full loader if None).
    """
    params = [p for p in student.parameters() if p.requires_grad]

    # Accumulate a mean loss across selected batches; gradient taken from that mean.
    losses = []
    for bi, (x, y) in enumerate(audit_loader):
        if cfg.audit_batches_for_grad is not None and bi >= cfg.audit_batches_for_grad:
            break
        x = x.to(device)
        y = y.to(device)
        logits = logits_regular(student(x))
        loss = F.cross_entropy(logits, y)
        losses.append(loss)

    if len(losses) == 0:
        raise RuntimeError("audit_loader produced no batches")

    mean_loss = torch.stack(losses).mean()
    grads = torch.autograd.grad(mean_loss, params, retain_graph=False, create_graph=False)
    g_flat = flatten_grads_tuple(grads)
    return float(mean_loss.detach()), g_flat

def distill_student_with_logging(
    student: nn.Module,
    teacher: nn.Module,
    noise_loader: DataLoader,
    audit_loader: DataLoader,
    test_loader: DataLoader,
    cfg: ExperimentConfig,
    seed: int,
    device: torch.device,
) -> Tuple[pd.DataFrame, Dict[str, float]]:
    """Train student on noise to match teacher aux logits. Log gradient alignment at intervals."""
    student = student.to(device)
    teacher = teacher.to(device)
    teacher.eval()
    for p in teacher.parameters():
        p.requires_grad_(False)

    opt = torch.optim.Adam(student.parameters(), lr=cfg.lr_student)
    kl = torch.nn.KLDivLoss(reduction="batchmean")

    logs: List[Dict[str, float]] = []
    global_step = 0

    params = [p for p in student.parameters() if p.requires_grad]

    student.train()
    for epoch in range(cfg.student_epochs):
        for x_noise, _ in noise_loader:
            x_noise = x_noise.to(device)

            # --- Distillation loss (aux logits only) ---
            with torch.no_grad():
                t_logits = teacher(x_noise)
                t_aux = logits_aux(t_logits, cfg.aux_m)
                t_prob = F.softmax(t_aux, dim=1)

            opt.zero_grad(set_to_none=True)
            s_logits = student(x_noise)
            s_aux = logits_aux(s_logits, cfg.aux_m)
            s_logprob = F.log_softmax(s_aux, dim=1)
            distill_loss = kl(s_logprob, t_prob)
            distill_loss.backward()

            do_log = (global_step % cfg.metrics_every_n_steps == 0)
            if do_log:
                # Snapshot distill gradient from .grad
                g_distill = flatten_grads_from_params(params)

                # Trait loss + gradient on audit set
                trait_loss_val, g_trait = compute_trait_loss_and_grad(
                    student=student,
                    audit_loader=audit_loader,
                    cfg=cfg,
                    device=device,
                )

                trait_acc = None
                if (global_step % cfg.trait_acc_every_n_steps) == 0:
                    # intermediate eval() for acc, then return to train()
                    student.eval()
                    trait_acc = accuracy_on_loader_e3(
                        model=student,
                        loader=test_loader,
                        device=device,
                        max_batches=cfg.trait_acc_max_batches,
                    )
                    student.train()

                inner = float(g_trait @ g_distill)
                cos = cosine_similarity(g_trait, g_distill)

                logs.append({
                    "seed": seed,
                    "step": global_step,
                    "epoch": epoch,
                    "trait_loss": trait_loss_val,
                    "distill_loss": float(distill_loss.detach()),
                    "inner_product": inner,
                    "cosine_similarity": cos,
                    "g_trait_norm": float(g_trait.norm().detach()),
                    "g_distill_norm": float(g_distill.norm().detach()),
                    "trait_test_acc_step": (float(trait_acc) if trait_acc is not None else np.nan),
                })

            opt.step()
            global_step += 1

    # Final performance
    student_acc = accuracy_on_loader(student, test_loader, device)

    df = pd.DataFrame(logs)
    info = {
        "student_test_acc": float(student_acc),
        "total_steps": int(global_step),
        "num_logged_rows": int(len(df)),
    }
    return df, info

def run_one_instance(
    seed: int,
    cfg: ExperimentConfig,
    build_model_fn: Callable[[ExperimentConfig], nn.Module],
    device: torch.device,
) -> Path:
    """
    One fully independent run:
      - builds reference init
      - trains teacher
      - distills student
      - writes CSV + metadata
    """
    set_global_seed(seed, deterministic=True)

    run_dir = Path(cfg.out_dir) / f"seed_{seed:02d}"
    run_dir.mkdir(parents=True, exist_ok=True)

    # --- Data ---
    mnist_train, mnist_test = get_mnist_datasets(root=str(Path(cfg.out_dir) / "data_cache"))
    train_split, audit_split = split_train_audit(mnist_train, audit_size=cfg.audit_size, seed=seed)

    train_loader = make_loader(train_split, cfg.batch_size, shuffle=True, seed=seed + 100, num_workers=cfg.num_workers)
    audit_loader = make_loader(audit_split, cfg.batch_size, shuffle=True, seed=seed + 200, num_workers=cfg.num_workers)
    test_loader  = make_loader(mnist_test, cfg.batch_size, shuffle=False, seed=seed + 300, num_workers=cfg.num_workers)

    noise_ds = NoiseImages(length=cfg.noise_dataset_size, seed=seed + 400, shape=(1, 28, 28), dist="normal")
    noise_loader = make_loader(noise_ds, cfg.batch_size, shuffle=True, seed=seed + 500, num_workers=cfg.num_workers)

    # --- Models: reference init, teacher trained from it, student is a fresh copy of reference init ---
    reference = build_model_fn(cfg)
    reference_state = {k: v.clone().detach().cpu() for k, v in reference.state_dict().items()}

    teacher = build_model_fn(cfg)
    teacher.load_state_dict(reference_state)

    student = build_model_fn(cfg)
    student.load_state_dict(reference_state)

    # --- Teacher training ---
    teacher_info = train_teacher(
        model=teacher,
        train_loader=train_loader,
        test_loader=test_loader,
        cfg=cfg,
        device=device,
    )

    # --- Student distillation + logging ---
    df, student_info = distill_student_with_logging(
        student=student,
        teacher=teacher,
        noise_loader=noise_loader,
        audit_loader=audit_loader,
        test_loader=test_loader,
        cfg=cfg,
        seed=seed,
        device=device,
    )

    # Add final performance columns to every row
    df["teacher_test_acc"] = teacher_info["teacher_test_acc"]
    df["student_test_acc"] = student_info["student_test_acc"]
    df["total_steps"] = student_info["total_steps"]

    # Write CSV
    csv_path = run_dir / "metrics.csv"
    df.to_csv(csv_path, index=False)

    # Write a metadata json for reproducibility
    meta = {
        "seed": seed,
        "config": asdict(cfg),
        "teacher_info": teacher_info,
        "student_info": student_info,
        "created_at_utc": time.strftime("%Y-%m-%dT%H:%M:%SZ", time.gmtime()),
        "device": str(device),
    }
    with open(run_dir / "metadata.json", "w") as f:
        json.dump(meta, f, indent=2)

    print(f"[seed {seed}] teacher_acc={teacher_info['teacher_test_acc']:.4f} "
          f"student_acc={student_info['student_test_acc']:.4f} rows={len(df)} "
          f"csv={csv_path}")
    return csv_path

# Run all instances (seeds 1..10 by default)
csv_paths = []
for s in cfg.seeds:
    csv_paths.append(run_one_instance(seed=s, cfg=cfg, build_model_fn=build_model_mlp, device=device))

print(f"Wrote {len(csv_paths)} CSVs under {cfg.out_dir}")

100%|██████████| 9.91M/9.91M [00:01<00:00, 5.99MB/s]
100%|██████████| 28.9k/28.9k [00:00<00:00, 158kB/s]
100%|██████████| 1.65M/1.65M [00:01<00:00, 1.52MB/s]
100%|██████████| 4.54k/4.54k [00:00<00:00, 6.03MB/s]


[seed 1] teacher_acc=0.9296 student_acc=0.1171 rows=295 csv=runs_mnist_subliminal_crt_period/seed_01/metrics.csv
[seed 2] teacher_acc=0.9276 student_acc=0.1760 rows=295 csv=runs_mnist_subliminal_crt_period/seed_02/metrics.csv
[seed 3] teacher_acc=0.9315 student_acc=0.3504 rows=295 csv=runs_mnist_subliminal_crt_period/seed_03/metrics.csv
[seed 4] teacher_acc=0.9291 student_acc=0.1962 rows=295 csv=runs_mnist_subliminal_crt_period/seed_04/metrics.csv
[seed 5] teacher_acc=0.9296 student_acc=0.4617 rows=295 csv=runs_mnist_subliminal_crt_period/seed_05/metrics.csv
[seed 6] teacher_acc=0.9322 student_acc=0.3791 rows=295 csv=runs_mnist_subliminal_crt_period/seed_06/metrics.csv
[seed 7] teacher_acc=0.9332 student_acc=0.3537 rows=295 csv=runs_mnist_subliminal_crt_period/seed_07/metrics.csv
[seed 8] teacher_acc=0.9318 student_acc=0.2055 rows=295 csv=runs_mnist_subliminal_crt_period/seed_08/metrics.csv
[seed 9] teacher_acc=0.9304 student_acc=0.3518 rows=295 csv=runs_mnist_subliminal_crt_period/see

In [None]:
RUNS_DIR = Path("./runs_mnist_subliminal_crt_period")

def load_all_runs(runs_dir: Path) -> pd.DataFrame:
    """Load all seed_*/metrics.csv into one dataframe."""
    all_dfs = []
    for seed_dir in sorted(runs_dir.glob("seed_*")):
        csv_path = seed_dir / "metrics.csv"
        if csv_path.exists():
            df = pd.read_csv(csv_path)
            df["run_dir"] = str(seed_dir)
            all_dfs.append(df)
    return pd.concat(all_dfs, ignore_index=True)

def compute_run_level_summary(df_all: pd.DataFrame) -> pd.DataFrame:
    """
    Compute per-run (per-seed) metrics:
      - fraction of positive alignment (cos > 0)
      - mean cosine similarity
      - mean inner product
      - performance (student_test_acc, teacher_test_acc)
    """
    def summarize_one_run(g: pd.DataFrame) -> pd.Series:
        cos = g["cosine_similarity"].to_numpy()
        inner = g["inner_product"].to_numpy()

        frac_pos = float(np.mean(cos > 0)) if len(cos) else np.nan
        mean_cos = float(np.mean(cos)) if len(cos) else np.nan
        mean_inner = float(np.mean(inner)) if len(inner) else np.nan

        # performance values are constant per run (we wrote them on every row)
        student_acc = float(g["student_test_acc"].iloc[0]) if "student_test_acc" in g.columns else np.nan
        teacher_acc = float(g["teacher_test_acc"].iloc[0]) if "teacher_test_acc" in g.columns else np.nan

        return pd.Series({
            "frac_positive_alignment": frac_pos,
            "mean_cosine_similarity": mean_cos,
            "mean_inner_product": mean_inner,
            "student_test_acc": student_acc,
            "teacher_test_acc": teacher_acc,
            "num_logged_rows": int(len(g)),
        })

    per_run = df_all.groupby("seed", sort=True).apply(summarize_one_run).reset_index()
    return per_run

def compute_overall_averages(per_run: pd.DataFrame) -> dict:
    """Average across runs (seeds), each run weighted equally."""
    keys = [
        "frac_positive_alignment",
        "mean_cosine_similarity",
        "mean_inner_product",
        "student_test_acc",
        "teacher_test_acc",
    ]
    out = {}
    for k in keys:
        out[k] = float(per_run[k].mean())
        out[k + "_std"] = float(per_run[k].std(ddof=1)) if len(per_run) > 1 else float("nan")
    out["num_runs"] = int(len(per_run))
    return out

df_all = load_all_runs(RUNS_DIR)
per_run = compute_run_level_summary(df_all)
overall = compute_overall_averages(per_run)

print("Per-run summary:")
display(per_run)

print("\nOverall averages across runs (equal weight per seed):")
for k, v in overall.items():
    print(f"{k}: {v}")

per_run.to_csv(RUNS_DIR / "per_run_summary.csv", index=False)
pd.DataFrame([overall]).to_csv(RUNS_DIR / "overall_summary.csv", index=False)
print(f"\nSaved: {RUNS_DIR/'per_run_summary.csv'} and {RUNS_DIR/'overall_summary.csv'}")

Per-run summary:


  per_run = df_all.groupby("seed", sort=True).apply(summarize_one_run).reset_index()


Unnamed: 0,seed,frac_positive_alignment,mean_cosine_similarity,mean_inner_product,student_test_acc,teacher_test_acc,num_logged_rows
0,1,0.722034,0.006719,5.2e-05,0.1171,0.9296,295.0
1,2,0.813559,0.00702,5.5e-05,0.176,0.9276,295.0
2,3,0.847458,0.006026,5.2e-05,0.3504,0.9315,295.0
3,4,0.698305,0.005548,3.4e-05,0.1962,0.9291,295.0
4,5,0.759322,0.006611,5.5e-05,0.4617,0.9296,295.0
5,6,0.857627,0.007082,5.8e-05,0.3791,0.9322,295.0
6,7,0.928814,0.007763,5.4e-05,0.3537,0.9332,295.0
7,8,0.935593,0.006979,4.4e-05,0.2055,0.9318,295.0
8,9,0.905085,0.005819,5e-05,0.3518,0.9304,295.0
9,10,0.725424,0.004164,4.8e-05,0.1048,0.9303,295.0



Overall averages across runs (equal weight per seed):
frac_positive_alignment: 0.8193220338983049
frac_positive_alignment_std: 0.08924479484400052
mean_cosine_similarity: 0.006373161120200753
mean_cosine_similarity_std: 0.0010212003111252238
mean_inner_product: 5.018745162753113e-05
mean_inner_product_std: 6.950832605318827e-06
student_test_acc: 0.26963
student_test_acc_std: 0.12379353824457523
teacher_test_acc: 0.9305300000000001
teacher_test_acc_std: 0.0016633633931819594
num_runs: 10

Saved: runs_mnist_subliminal_crt_period/per_run_summary.csv and runs_mnist_subliminal_crt_period/overall_summary.csv


In [None]:
!zip -r runs_mnist_subliminal_crt_period.zip ./runs_mnist_subliminal_crt_period/

  adding: runs_mnist_subliminal_crt_period/ (stored 0%)
  adding: runs_mnist_subliminal_crt_period/seed_06/ (stored 0%)
  adding: runs_mnist_subliminal_crt_period/seed_06/metadata.json (deflated 55%)
  adding: runs_mnist_subliminal_crt_period/seed_06/metrics.csv (deflated 59%)
  adding: runs_mnist_subliminal_crt_period/seed_02/ (stored 0%)
  adding: runs_mnist_subliminal_crt_period/seed_02/metadata.json (deflated 55%)
  adding: runs_mnist_subliminal_crt_period/seed_02/metrics.csv (deflated 59%)
  adding: runs_mnist_subliminal_crt_period/data_cache/ (stored 0%)
  adding: runs_mnist_subliminal_crt_period/data_cache/MNIST/ (stored 0%)
  adding: runs_mnist_subliminal_crt_period/data_cache/MNIST/raw/ (stored 0%)
  adding: runs_mnist_subliminal_crt_period/data_cache/MNIST/raw/t10k-labels-idx1-ubyte.gz (stored 0%)
  adding: runs_mnist_subliminal_crt_period/data_cache/MNIST/raw/t10k-images-idx3-ubyte (deflated 79%)
  adding: runs_mnist_subliminal_crt_period/data_cache/MNIST/raw/train-labels-id