# Late Fusion Stream Training (Playground)

Runs all four streams (J, B, JM, BM) for a given fold, saves sorted `score_eval.npz` per stream in `workdir/fold_{ID}_{STREAM}`, and fuses them at the end. Use `evaluate_only=True` to skip training and just run eval on existing checkpoints.

In [None]:
import os, yaml, numpy as np, torch, subprocess
from types import SimpleNamespace
from torch.utils.data import DataLoader, WeightedRandomSampler
from src.dataset import create as create_dataset
from src.model.MPGCN import MPGCN
from src.scheduler import create as create_scheduler

fold_id = "22"  # best fold
streams = ["J", "B", "JM", "BM"]
evaluate_only = False  # set True to skip training

print("Fold:", fold_id)
print("Streams:", streams)

In [None]:

def run_stream(stream_type):
    config_template = f"config/playground/mpgcn_{stream_type}.yaml"
    with open(config_template, "r") as f:
        cfg = yaml.safe_load(f)

    # Replace fold placeholder in paths
    cfg["dataset_args"]["root_folder"] = cfg["dataset_args"]["root_folder"].replace("{ID}", fold_id)
    cfg["dataset_args"]["object_folder"] = cfg["dataset_args"]["object_folder"].replace("{ID}", fold_id)
    cfg["dataset_args"]["stream_type"] = stream_type
    cfg["dataset_args"]["fold_id"] = fold_id
    cfg["work_dir"] = f"./workdir/fold_{fold_id}_{stream_type}"
    cfg.setdefault("scheduler_args", {}).setdefault(cfg["lr_scheduler"], {})["max_epoch"] = 100

    # Convert to namespaces
    args = SimpleNamespace(**cfg)
    args.dataset_args = SimpleNamespace(**args.dataset_args)
    args.model_args = SimpleNamespace(**args.model_args)
    args.optimizer_args = SimpleNamespace(**args.optimizer_args)
    args.scheduler_args = SimpleNamespace(**args.scheduler_args)

    # Dataset
    feeders, data_shape, num_class, A, parts = create_dataset(
        args.dataset,
        debug=False,
        **vars(args.dataset_args)
    )
    print(f"[{stream_type}] train samples: {len(feeders['train'])}, eval samples: {len(feeders['eval'])}")

    # Sampler (no replacement)
    train_labels = np.array([int(y) for _, y, _, _ in feeders["train"]])
    class_counts = np.bincount(train_labels, minlength=num_class) + 1e-6
    class_weights = 1.0 / class_counts
    sample_weights = class_weights[train_labels]
    train_sampler = WeightedRandomSampler(
        weights=torch.tensor(sample_weights, dtype=torch.float32),
        num_samples=len(train_labels),
        replacement=False,
    )

    train_loader = DataLoader(
        feeders["train"],
        batch_size=args.dataset_args.train_batch_size,
        num_workers=4 * len(args.gpus),
        pin_memory=True,
        sampler=train_sampler,
        shuffle=False,
        drop_last=True,
    )

    eval_loader = DataLoader(
        feeders["eval"],
        batch_size=args.dataset_args.eval_batch_size,
        num_workers=4 * len(args.gpus),
        pin_memory=True,
        shuffle=False,
        drop_last=False,
    )

    # Model/optim/scheduler
    device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
    model = MPGCN(
        data_shape=data_shape,
        num_class=num_class,
        A=torch.Tensor(A),
        parts=parts,
        num_areas=int(np.max(feeders["train"].area_ids)) + 1,
        **vars(args.model_args),
    ).to(device)

    def _to_dict(obj):
        if isinstance(obj, dict):
            return obj
        return vars(obj)

    optimizer_cfg = _to_dict(getattr(args.optimizer_args, args.optimizer))
    optimizer_cls = getattr(torch.optim, args.optimizer)
    optimizer = optimizer_cls(model.parameters(), **optimizer_cfg)

    sched_cfg = _to_dict(getattr(args.scheduler_args, args.lr_scheduler))
    lr_scheduler = create_scheduler(args.lr_scheduler, len(train_loader), **sched_cfg)
    eval_interval, lr_lambda = lr_scheduler.get_lambda()
    scheduler = torch.optim.lr_scheduler.LambdaLR(optimizer, lr_lambda=lr_lambda)

    class_weights = torch.tensor(class_weights, dtype=torch.float32).to(device)
    criterion = torch.nn.CrossEntropyLoss(weight=class_weights)

    max_epoch = sched_cfg.get("max_epoch", 100)
    best_acc = 0
    save_dir = args.work_dir
    os.makedirs(save_dir, exist_ok=True)

    if not evaluate_only:
        for epoch in range(max_epoch):
            model.train()
            running_loss = 0.0
            correct = 0
            total = 0
            for data, target, _, area_id in train_loader:
                assert data.dim() == 5, f"Expected 5D (N, C, T, V, M), got {data.shape}"
                assert area_id.dim() == 1, f"Bad area_id shape {area_id.shape}"
                if data.dim() == 5:
                    data = data.unsqueeze(1)
                data = data.float().to(device)
                target = target.long().to(device)
                area_id = area_id.long().to(device)

                optimizer.zero_grad()
                out, _ = model(data, area_id)
                loss = criterion(out, target)
                loss.backward()
                optimizer.step()
                scheduler.step()

                running_loss += loss.item() * data.size(0)
                preds = out.argmax(1)
                correct += preds.eq(target).sum().item()
                total += target.size(0)

            train_acc = correct / total
            print(f"[{stream_type}] Epoch {epoch+1}/{max_epoch} train_loss={running_loss/total:.4f} acc={train_acc:.4f}")

            model.eval()
            eval_correct, eval_total = 0, 0
            with torch.no_grad():
                for data, target, _, area_id in eval_loader:
                    assert data.dim() == 5, f"Expected 5D (N, C, T, V, M), got {data.shape}"
                    assert area_id.dim() == 1, f"Bad area_id shape {area_id.shape}"
                    if data.dim() == 5:
                        data = data.unsqueeze(1)
                    data = data.float().to(device)
                    target = target.long().to(device)
                    area_id = area_id.long().to(device)
                    out, _ = model(data, area_id)
                    preds = out.argmax(1)
                    eval_correct += preds.eq(target).sum().item()
                    eval_total += target.size(0)
            eval_acc = eval_correct / eval_total
            print(f"  Eval acc={eval_acc:.4f}")

            if eval_acc > best_acc:
                best_acc = eval_acc
                torch.save({"model": model.state_dict()}, os.path.join(save_dir, "best.pth.tar"))
                print("  Saved best model")
    else:
        print(f"[{stream_type}] Evaluate-only mode: skipping training")

    # Deterministic eval + save score
    best_ckpt = os.path.join(save_dir, "best.pth.tar")
    if os.path.exists(best_ckpt):
        state = torch.load(best_ckpt, map_location="cpu")
        model.load_state_dict(state["model"])
        print(f"[{stream_type}] Loaded best checkpoint")
    else:
        print(f"[{stream_type}] Warning: best checkpoint not found, using current weights")

    model.eval()
    all_logits, all_labels, all_names = [], [], []
    with torch.no_grad():
        for data, target, names, area_id in eval_loader:
            assert data.dim() == 5, f"Expected 5D (N, C, T, V, M), got {data.shape}"
            assert area_id.dim() == 1, f"Bad area_id shape {area_id.shape}"
            if data.dim() == 5:
                data = data.unsqueeze(1)
            data = data.float().to(device)
            target = target.long().to(device)
            area_id = area_id.long().to(device)
            out, _ = model(data, area_id)
            all_logits.append(out.cpu().numpy())
            all_labels.append(target.cpu().numpy())
            all_names.extend(names)

    logits_arr = np.concatenate(all_logits, axis=0)
    labels_arr = np.concatenate(all_labels, axis=0)
    names_arr = np.array(all_names)

    np.savez(os.path.join(args.work_dir, "score_eval.npz"), logits=logits_arr, labels=labels_arr, names=names_arr)
    print(f"[{stream_type}] Saved score_eval.npz to {args.work_dir}")

    return args.work_dir


In [None]:
# Run all streams
work_dirs = []
for st in streams:
    wd = run_stream(st)
    work_dirs.append(wd)
print("Work dirs:", work_dirs)

## Fuse streams with ensemble script

In [None]:
subprocess.run([
    "python", "script/ensemble_playground.py",
    "--fold", fold_id,
    "--streams", "J", "B", "JM", "BM",
    "--workdir", "./workdir",
])

## in-notebook mean fusion

In [None]:
streams = ["J", "B", "JM", "BM"]
scores = []
labels_ref = None
names_ref = None
for s in streams:
    path = f"./workdir/fold_{fold_id}_{s}/score_eval.npz"
    d = np.load(path)
    if labels_ref is None:
        labels_ref = d["labels"]
        names_ref = d["names"]
    else:
        if not np.array_equal(labels_ref, d["labels"]):
            raise ValueError("Label mismatch across streams")
        if not np.array_equal(names_ref, d["names"]):
            raise ValueError("Name/order mismatch across streams")
    scores.append(d["logits"])

fused = sum(scores) / len(scores)
preds = fused.argmax(1)
final_acc = (preds == labels_ref).mean()
print("Late-fusion accuracy (mean weights):", final_acc)

## Summary table

In [None]:
import pandas as pd
summary = [
    {"Stream": "J", "Acc": None},
    {"Stream": "B", "Acc": None},
    {"Stream": "JM", "Acc": None},
    {"Stream": "BM", "Acc": None},
    {"Stream": "Fusion", "Acc": None},
]
pd.DataFrame(summary)