In [1]:
# Cell 0 — Exp04c config & paths (NO semi-supervised)
from pathlib import Path
import re
import numpy as np
import pandas as pd
import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.utils.data import Dataset, DataLoader
from tqdm import tqdm

DEVICE = "cuda" if torch.cuda.is_available() else "cpu"
print("DEVICE:", DEVICE)

# ====== Paths (your provided) ======
X_TEST_DIR  = Path(r"C:\Users\asus\Desktop\ECN\DEEP\DataChallenge\data\X_test_xNbnvIa")
X_TRAIN_DIR = Path(r"C:\Users\asus\Desktop\ECN\DEEP\DataChallenge\data\X_train_uDRk9z9")
X_UNLAB_DIR = Path(r"C:\Users\asus\Desktop\ECN\DEEP\DataChallenge\data\X_unlabeled_mtkxUlo")
Y_TRAIN_CSV = Path(r"C:\Users\asus\Desktop\ECN\DEEP\DataChallenge\data\Y_train_T9NrBYo.csv")
SAMPLE_SUB  = Path(r"C:\Users\asus\Desktop\ECN\DEEP\DataChallenge\data\submission_csv_file_random_example_3qPSCtv.csv")

OUT_DIR = Path(r"exp_outputs\Exp04c_SSLCombo_then_SUP")
OUT_DIR.mkdir(parents=True, exist_ok=True)

SSL_DIR  = OUT_DIR / "ssl_pretrain"
SUP_DIR  = OUT_DIR / "supervised"
SUB_DIR  = OUT_DIR / "test_predictions"
for d in [SSL_DIR, SUP_DIR, SUB_DIR]:
    d.mkdir(parents=True, exist_ok=True)

# ====== constants ======
NUM_CLASSES  = 3
IGNORE_INDEX = 255
H            = 160
W_PAD        = 288          # train on padded width
SIZE_LABELS  = 272          # submission flatten width
torch.backends.cudnn.benchmark = True

BACKBONE = "nvidia/segformer-b2-finetuned-ade-512-512"


DEVICE: cuda


In [2]:
# Cell 1 — Utilities: de-dup list npy, parse, load/pad X, load/pad Y
NAME_RE = re.compile(r"well_(\d+)_section_(\d+)_patch_(\d+)$")

def parse_name(stem: str):
    m = NAME_RE.match(stem)
    if not m:
        raise ValueError(f"Bad patch name: {stem}")
    return int(m.group(1)), int(m.group(2)), int(m.group(3))

def list_npy_files(dir_path: Path):
    files = list(dir_path.rglob("*.npy")) + list(dir_path.rglob("*.NPY"))
    uniq = sorted({Path(p).resolve() for p in files})  # de-dup
    return [Path(p) for p in uniq]

def load_x(path: Path) -> np.ndarray:
    x = np.load(path)
    if x.ndim == 3 and x.shape[0] == 1:
        x = x[0]
    x = np.nan_to_num(x, nan=0.0, posinf=0.0, neginf=0.0).astype(np.float32)
    mn, mx = float(x.min()), float(x.max())
    if mx > mn:
        x = (x - mn) / (mx - mn)
    else:
        x = np.zeros_like(x, dtype=np.float32)
    return x  # (160,w)

def pad_x_to_wpad(x: np.ndarray) -> np.ndarray:
    h, w = x.shape
    out = np.zeros((h, W_PAD), dtype=np.float32)
    out[:, :w] = x
    return out

def make_valid_mask(w: int) -> np.ndarray:
    valid = np.zeros((H, W_PAD), dtype=np.bool_)
    valid[:, :w] = True
    return valid

y_df = pd.read_csv(Y_TRAIN_CSV, index_col=0)

def restore_mask_from_row(row_values: np.ndarray) -> np.ndarray:
    vals = row_values[row_values != -1]
    return vals.reshape(H, -1).astype(np.int64)  # (160,160) or (160,272)

def pad_mask_to_wpad(mask: np.ndarray, w: int) -> np.ndarray:
    out = np.full((H, W_PAD), IGNORE_INDEX, dtype=np.int64)
    out[:, :w] = mask
    return out


In [3]:
# Cell 2 — Build manifests: ssl_df = (train images + unlabeled images)
def build_manifest(x_dir: Path) -> pd.DataFrame:
    rows = []
    for p in list_npy_files(x_dir):
        stem = p.stem
        try:
            parse_name(stem)
        except ValueError:
            continue
        arr = np.load(p, mmap_mode="r")
        if arr.ndim == 3 and arr.shape[0] == 1:
            w = int(arr.shape[2])
        elif arr.ndim == 2:
            w = int(arr.shape[1])
        else:
            raise ValueError(f"Unexpected shape {arr.shape} for {p}")
        rows.append({"name": stem, "w": w, "path": str(p)})
    return pd.DataFrame(rows)

train_all = build_manifest(X_TRAIN_DIR)
unlab_all = build_manifest(X_UNLAB_DIR)

ssl_df = pd.concat([train_all, unlab_all], axis=0, ignore_index=True)
ssl_df = ssl_df.drop_duplicates(subset=["path"]).reset_index(drop=True)

# labeled subset for supervised
train_labeled = train_all[train_all["name"].isin(y_df.index)].reset_index(drop=True)

print("train(all):", len(train_all), "labeled:", len(train_labeled))
print("unlabeled :", len(unlab_all))
print("ssl pool  :", len(ssl_df), "width:", ssl_df["w"].value_counts().to_dict())


train(all): 4410 labeled: 4410
unlabeled : 1980
ssl pool  : 6390 width: {272: 4230, 160: 2160}


In [4]:
# Cell 3 — SSL Dataset + augment + masking (for recon)
class SSLDataset(Dataset):
    def __init__(self, df: pd.DataFrame):
        self.df = df.reset_index(drop=True)
    def __len__(self): return len(self.df)
    def __getitem__(self, idx: int):
        row = self.df.iloc[idx]
        w = int(row["w"])
        x = load_x(Path(row["path"]))
        x = pad_x_to_wpad(x)                       # (160,288)
        x_t = torch.from_numpy(x).unsqueeze(0)     # (1,160,288)
        valid = torch.from_numpy(make_valid_mask(w))  # (160,288) bool
        return x_t, valid

ssl_loader = DataLoader(
    SSLDataset(ssl_df),
    batch_size=32,
    shuffle=True,
    num_workers=0,
    pin_memory=(DEVICE=="cuda"),
)

def intensity_aug(x: torch.Tensor) -> torch.Tensor:
    # x: (B,1,H,W) in [0,1]
    B = x.size(0)
    a = torch.empty((B,1,1,1), device=x.device).uniform_(0.85, 1.15)
    b = torch.empty((B,1,1,1), device=x.device).uniform_(-0.08, 0.08)
    out = torch.clamp(x * a + b, 0.0, 1.0)
    sigma = torch.empty((B,1,1,1), device=x.device).uniform_(0.0, 0.04)
    out = torch.clamp(out + torch.randn_like(out) * sigma, 0.0, 1.0)
    if torch.rand((), device=x.device).item() < 0.5:
        out = torch.flip(out, dims=[3])
    return out

def block_mask(valid: torch.Tensor, drop_prob=0.9, block_h=16, block_w=32):
    # valid: (B,H,W) bool -> mask: (B,1,H,W) bool
    B, Hh, Ww = valid.shape
    mask = torch.zeros((B,1,Hh,Ww), dtype=torch.bool, device=valid.device)
    for i in range(B):
        if torch.rand((), device=valid.device).item() < drop_prob:
            y0 = int(torch.randint(0, Hh-block_h+1, (1,), device=valid.device).item())
            x0 = int(torch.randint(0, Ww-block_w+1, (1,), device=valid.device).item())
            mask[i, :, y0:y0+block_h, x0:x0+block_w] = True
    mask = mask & valid.unsqueeze(1)
    return mask

def apply_mask(x: torch.Tensor, m: torch.Tensor):
    x2 = x.clone()
    x2[m] = 0.0
    return x2


In [5]:
# Cell 4 — SSL Model: SegFormer encoder + SimSiam head + Recon head (combo self-supervised)
from transformers import SegformerModel

def cos_sim(a, b):
    a = F.normalize(a, dim=1)
    b = F.normalize(b, dim=1)
    return (a * b).sum(dim=1).mean()

class MLP(nn.Module):
    def __init__(self, in_dim: int, hidden: int, out_dim: int):
        super().__init__()
        self.net = nn.Sequential(
            nn.Linear(in_dim, hidden),
            nn.BatchNorm1d(hidden),
            nn.ReLU(inplace=True),
            nn.Linear(hidden, out_dim),
            nn.BatchNorm1d(out_dim),
        )
    def forward(self, x): return self.net(x)

class Predictor(nn.Module):
    def __init__(self, in_dim: int, hidden: int, out_dim: int):
        super().__init__()
        self.net = nn.Sequential(
            nn.Linear(in_dim, hidden),
            nn.BatchNorm1d(hidden),
            nn.ReLU(inplace=True),
            nn.Linear(hidden, out_dim),
        )
    def forward(self, x): return self.net(x)

class ReconHead(nn.Module):
    def __init__(self, in_ch: int):
        super().__init__()
        self.conv = nn.Sequential(
            nn.Conv2d(in_ch, in_ch//2, 3, padding=1),
            nn.ReLU(inplace=True),
            nn.Conv2d(in_ch//2, 1, 1),
        )
    def forward(self, feat, out_hw=(H, W_PAD)):
        x = self.conv(feat)  # (B,1,h',w')
        x = F.interpolate(x, size=out_hw, mode="bilinear", align_corners=False)
        return torch.sigmoid(x)  # [0,1]

class SegFormerSSLCombo(nn.Module):
    def __init__(self, backbone_name: str, proj_dim=256, pred_dim=256, hidden=1024):
        super().__init__()
        self.backbone = SegformerModel.from_pretrained(backbone_name)
        feat_dim = self.backbone.config.hidden_sizes[-1]
        self.projector = MLP(feat_dim, hidden, proj_dim)
        self.predictor = Predictor(proj_dim, hidden//2, pred_dim)
        self.recon = ReconHead(feat_dim)

    def forward_once(self, x3):
        out = self.backbone(pixel_values=x3, output_hidden_states=True)
        feat = out.hidden_states[-1]                 # (B,C,h',w')
        v = feat.mean(dim=(2,3))                     # (B,C)
        z = self.projector(v)                        # (B,D)
        p = self.predictor(z)                        # (B,D)
        r = self.recon(feat, out_hw=(H, W_PAD))      # (B,1,160,288)
        return p, z, r

    def forward(self, x1_3, x2_3):
        p1, z1, r1 = self.forward_once(x1_3)
        p2, z2, r2 = self.forward_once(x2_3)
        return p1, p2, z1.detach(), z2.detach(), r1, r2

ssl_model = SegFormerSSLCombo(BACKBONE).to(DEVICE)
print("ssl_model ready.")


ssl_model ready.


In [6]:
# Cell 5 — SSL pretrain loop (combo): SimSiam (1-cos) + masked recon MSE
SSL_EPOCHS = 10
LR = 3e-4
WD = 1e-4
LAMBDA_REC = 1.0

opt = torch.optim.AdamW(ssl_model.parameters(), lr=LR, weight_decay=WD)
scaler = torch.cuda.amp.GradScaler(enabled=(DEVICE=="cuda"))

best_ssl = 1e9
ssl_best_ckpt = SSL_DIR / "ssl_combo_best.pt"
ssl_backbone_path = SSL_DIR / "segformer_backbone_ssl_combo.pt"

for ep in range(1, SSL_EPOCHS+1):
    ssl_model.train()
    loss_sum, n = 0.0, 0

    for x, valid in tqdm(ssl_loader, desc=f"[Exp04c-SSL] ep{ep}", leave=False):
        x = x.to(DEVICE)                 # (B,1,160,288)
        valid = valid.to(DEVICE)         # (B,160,288) bool

        # two views
        x1 = intensity_aug(x)
        x2 = intensity_aug(x)

        # mask for reconstruction
        m1 = block_mask(valid, drop_prob=0.9, block_h=16, block_w=32)
        m2 = block_mask(valid, drop_prob=0.9, block_h=16, block_w=32)

        x1_in = apply_mask(x1, m1)
        x2_in = apply_mask(x2, m2)

        x1_3 = x1_in.repeat(1,3,1,1)
        x2_3 = x2_in.repeat(1,3,1,1)

        opt.zero_grad(set_to_none=True)
        with torch.cuda.amp.autocast(enabled=(DEVICE=="cuda")):
            p1, p2, z1, z2, r1, r2 = ssl_model(x1_3, x2_3)

            # SimSiam loss: (1 - cosine) >= 0, smaller is better
            sim_loss = 0.5 * ((1.0 - cos_sim(p1, z2)) + (1.0 - cos_sim(p2, z1)))

            # masked recon MSE on masked pixels only
            denom1 = m1.sum().clamp(min=1).float()
            denom2 = m2.sum().clamp(min=1).float()
            rec1 = ((r1 - x1)**2)[m1].sum() / denom1
            rec2 = ((r2 - x2)**2)[m2].sum() / denom2
            rec_loss = 0.5 * (rec1 + rec2)

            loss = sim_loss + LAMBDA_REC * rec_loss

        scaler.scale(loss).backward()
        scaler.step(opt)
        scaler.update()

        loss_sum += float(loss.item()) * x.size(0)
        n += x.size(0)

    ep_loss = loss_sum / max(1, n)
    print(f"[Exp04c-SSL] ep{ep:02d}/{SSL_EPOCHS} loss={ep_loss:.4f}")

    if ep_loss < best_ssl:
        best_ssl = ep_loss
        torch.save({"model": ssl_model.state_dict()}, ssl_best_ckpt)
        torch.save(ssl_model.backbone.state_dict(), ssl_backbone_path)

print("[Exp04c-SSL] best loss:", best_ssl)
print("saved ssl ckpt:", ssl_best_ckpt)
print("saved backbone:", ssl_backbone_path)


  scaler = torch.cuda.amp.GradScaler(enabled=(DEVICE=="cuda"))
  with torch.cuda.amp.autocast(enabled=(DEVICE=="cuda")):
                                                                   

[Exp04c-SSL] ep01/10 loss=0.1782


                                                                   

[Exp04c-SSL] ep02/10 loss=0.0928


                                                                   

[Exp04c-SSL] ep03/10 loss=0.0723


                                                                   

[Exp04c-SSL] ep04/10 loss=0.0152


                                                                   

[Exp04c-SSL] ep05/10 loss=0.0144


                                                                   

[Exp04c-SSL] ep06/10 loss=0.0134


                                                                   

[Exp04c-SSL] ep07/10 loss=0.0130


                                                                   

[Exp04c-SSL] ep08/10 loss=0.0129


                                                                   

[Exp04c-SSL] ep09/10 loss=0.0126


                                                                    

[Exp04c-SSL] ep10/10 loss=0.0121
[Exp04c-SSL] best loss: 0.012052356737996678
saved ssl ckpt: exp_outputs\Exp04c_SSLCombo_then_SUP\ssl_pretrain\ssl_combo_best.pt
saved backbone: exp_outputs\Exp04c_SSLCombo_then_SUP\ssl_pretrain\segformer_backbone_ssl_combo.pt


In [7]:
# Cell 6 — Supervised SegFormer finetune using SSL backbone (NO semi-supervised)
from transformers import SegformerForSemanticSegmentation

# split by well (you can change VAL_WELLS)
VAL_WELLS = {5}

# we need well info for split -> rebuild manifest with well
def build_manifest_with_well(x_dir: Path) -> pd.DataFrame:
    rows = []
    for p in list_npy_files(x_dir):
        stem = p.stem
        try:
            well, section, patch = parse_name(stem)
        except ValueError:
            continue
        arr = np.load(p, mmap_mode="r")
        if arr.ndim == 3 and arr.shape[0] == 1:
            w = int(arr.shape[2])
        elif arr.ndim == 2:
            w = int(arr.shape[1])
        else:
            raise ValueError(arr.shape)
        rows.append({"name": stem, "well": well, "w": w, "path": str(p)})
    return pd.DataFrame(rows)

train_all2 = build_manifest_with_well(X_TRAIN_DIR)
train_labeled2 = train_all2[train_all2["name"].isin(y_df.index)].reset_index(drop=True)

train_split = train_labeled2[~train_labeled2["well"].isin(VAL_WELLS)].reset_index(drop=True)
val_split   = train_labeled2[train_labeled2["well"].isin(VAL_WELLS)].reset_index(drop=True)
print("train_split:", len(train_split), "val_split:", len(val_split))

class LabeledDataset(Dataset):
    def __init__(self, df: pd.DataFrame, train_mode: bool, seed=123):
        self.df = df.reset_index(drop=True)
        self.train_mode = train_mode
        self.rng = np.random.RandomState(seed)
    def __len__(self): return len(self.df)
    def __getitem__(self, idx: int):
        row = self.df.iloc[idx]
        name = row["name"]
        w = int(row["w"])
        x = load_x(Path(row["path"]))
        x = pad_x_to_wpad(x)

        y_raw = restore_mask_from_row(y_df.loc[name].values)
        y = pad_mask_to_wpad(y_raw, w=w)

        # safe aug: horizontal flip
        if self.train_mode and self.rng.rand() < 0.5:
            x = np.flip(x, axis=1).copy()
            y = np.flip(y, axis=1).copy()

        x_t = torch.from_numpy(x).unsqueeze(0)   # (1,160,288)
        y_t = torch.from_numpy(y).long()         # (160,288)
        return x_t, y_t

train_loader = DataLoader(LabeledDataset(train_split, True), batch_size=8, shuffle=True, num_workers=0, pin_memory=(DEVICE=="cuda"))
val_loader   = DataLoader(LabeledDataset(val_split,   False), batch_size=8, shuffle=False, num_workers=0, pin_memory=(DEVICE=="cuda"))

# loss/metric
ce_weights = torch.tensor([1.0, 3.0, 4.0], dtype=torch.float32).to(DEVICE)
ce = nn.CrossEntropyLoss(weight=ce_weights, ignore_index=IGNORE_INDEX)

def soft_dice_loss(logits, target, smooth=1.0):
    probs = torch.softmax(logits, dim=1)
    valid = (target != IGNORE_INDEX).unsqueeze(1)
    t = target.clone()
    t[t == IGNORE_INDEX] = 0
    onehot = F.one_hot(t, num_classes=NUM_CLASSES).permute(0,3,1,2).float()
    probs = probs * valid
    onehot = onehot * valid
    inter = (probs * onehot).sum((0,2,3))
    denom = (probs + onehot).sum((0,2,3))
    dice = (2*inter + smooth) / (denom + smooth)
    return 1.0 - dice.mean()

def combo_loss(logits, y, dice_w=0.5):
    return (1-dice_w)*ce(logits, y) + dice_w*soft_dice_loss(logits, y)

def upsample_logits(logits, target_hw):
    return F.interpolate(logits, size=target_hw, mode="bilinear", align_corners=False)

def mean_iou(pred, target):
    valid = (target != IGNORE_INDEX)
    ious = []
    for c in range(NUM_CLASSES):
        p = (pred == c) & valid
        t = (target == c) & valid
        inter = (p & t).sum().float()
        union = (p | t).sum().float()
        ious.append(torch.tensor(1.0, device=pred.device) if union.item()==0 else inter/union)
    return torch.stack(ious).mean()

# model init
model = SegformerForSemanticSegmentation.from_pretrained(
    BACKBONE, num_labels=NUM_CLASSES, ignore_mismatched_sizes=True
).to(DEVICE)

# load SSL backbone into model.segformer
bb_state = torch.load(ssl_backbone_path, map_location=DEVICE)
model.segformer.load_state_dict(bb_state, strict=False)
print("Loaded SSL backbone:", ssl_backbone_path)

SUP_EPOCHS = 10
SUP_LR = 6e-5
opt = torch.optim.AdamW(model.parameters(), lr=SUP_LR, weight_decay=0.01)
scaler = torch.cuda.amp.GradScaler(enabled=(DEVICE=="cuda"))

best_miou = -1.0
sup_best = SUP_DIR / "best_state_dict.pt"

for ep in range(1, SUP_EPOCHS+1):
    model.train()
    for x, y in tqdm(train_loader, desc=f"[Exp04c-SUP] train ep{ep}", leave=False):
        x = x.to(DEVICE)             # (B,1,160,288)
        y = y.to(DEVICE)             # (B,160,288)
        x3 = x.repeat(1,3,1,1)

        opt.zero_grad(set_to_none=True)
        with torch.cuda.amp.autocast(enabled=(DEVICE=="cuda")):
            logits = model(pixel_values=x3).logits
            logits = upsample_logits(logits, y.shape[-2:])
            loss = combo_loss(logits, y, dice_w=0.5)

        scaler.scale(loss).backward()
        scaler.step(opt)
        scaler.update()

    # val
    model.eval()
    miou_sum, n = 0.0, 0
    with torch.no_grad():
        for x, y in tqdm(val_loader, desc=f"[Exp04c-SUP] val ep{ep}", leave=False):
            x = x.to(DEVICE)
            y = y.to(DEVICE)
            x3 = x.repeat(1,3,1,1)
            logits = model(pixel_values=x3).logits
            logits = upsample_logits(logits, y.shape[-2:])
            pred = torch.argmax(logits, dim=1)
            miou_sum += float(mean_iou(pred, y).item()) * x.size(0)
            n += x.size(0)
    val_miou = miou_sum / max(1, n)
    print(f"[Exp04c-SUP] ep{ep:02d}/{SUP_EPOCHS} val_mIoU={val_miou:.4f}")

    if val_miou > best_miou:
        best_miou = val_miou
        torch.save(model.state_dict(), sup_best)

print("[Exp04c-SUP] BEST val mIoU:", best_miou, "saved:", sup_best)


train_split: 4122 val_split: 288


Some weights of SegformerForSemanticSegmentation were not initialized from the model checkpoint at nvidia/segformer-b2-finetuned-ade-512-512 and are newly initialized because the shapes did not match:
- decode_head.classifier.weight: found shape torch.Size([150, 768, 1, 1]) in the checkpoint and torch.Size([3, 768, 1, 1]) in the model instantiated
- decode_head.classifier.bias: found shape torch.Size([150]) in the checkpoint and torch.Size([3]) in the model instantiated
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
  bb_state = torch.load(ssl_backbone_path, map_location=DEVICE)
  scaler = torch.cuda.amp.GradScaler(enabled=(DEVICE=="cuda"))


Loaded SSL backbone: exp_outputs\Exp04c_SSLCombo_then_SUP\ssl_pretrain\segformer_backbone_ssl_combo.pt


  with torch.cuda.amp.autocast(enabled=(DEVICE=="cuda")):
                                                                         

[Exp04c-SUP] ep01/10 val_mIoU=0.7560


                                                                         

[Exp04c-SUP] ep02/10 val_mIoU=0.7790


                                                                         

[Exp04c-SUP] ep03/10 val_mIoU=0.7931


                                                                         

[Exp04c-SUP] ep04/10 val_mIoU=0.7970


                                                                         

[Exp04c-SUP] ep05/10 val_mIoU=0.8032


                                                                         

[Exp04c-SUP] ep06/10 val_mIoU=0.7903


                                                                         

[Exp04c-SUP] ep07/10 val_mIoU=0.7898


                                                                         

[Exp04c-SUP] ep08/10 val_mIoU=0.8031


                                                                         

[Exp04c-SUP] ep09/10 val_mIoU=0.7858


                                                                          

[Exp04c-SUP] ep10/10 val_mIoU=0.7911
[Exp04c-SUP] BEST val mIoU: 0.8031896617677476 saved: exp_outputs\Exp04c_SSLCombo_then_SUP\supervised\best_state_dict.pt




In [8]:
# Cell 7 — Predict test by SAMPLE order + save npy + submission CSV (NO missing, only 972)
# 1) read sample
sample = pd.read_csv(SAMPLE_SUB)
name_col = sample.columns[0]
ordered_names_raw = sample[name_col].astype(str).tolist()

def norm_name(s: str) -> str:
    s = str(s).strip()
    if s.lower().endswith(".npy"):
        s = s[:-4]
    return s

ordered_names = [norm_name(n) for n in ordered_names_raw]
print("sample rows:", len(ordered_names), "name_col:", name_col)

# 2) index all X_test files
test_files = list_npy_files(X_TEST_DIR)
test_index = {p.stem: p for p in test_files}
test_index.update({p.stem.lower(): p for p in test_files})

# 3) load best supervised model
model.load_state_dict(torch.load(sup_best, map_location=DEVICE))
model.eval()
print("Loaded sup best:", sup_best)

# clear old preds
for p in SUB_DIR.glob("*.npy"):
    p.unlink()

with torch.no_grad():
    for name in tqdm(ordered_names, desc="[Exp04c] predict test", leave=False):
        key = name if name in test_index else name.lower()
        if key not in test_index:
            hits = list(X_TEST_DIR.rglob(f"{name}.npy")) + list(X_TEST_DIR.rglob(f"{name}.NPY"))
            if len(hits) == 0:
                raise FileNotFoundError(f"X_test missing: {name}.npy")
            x_path = hits[0]
        else:
            x_path = test_index[key]

        x = load_x(x_path)              # (160,w)
        w = x.shape[1]
        x_pad = pad_x_to_wpad(x)        # (160,288)
        x_t = torch.from_numpy(x_pad).unsqueeze(0).unsqueeze(0).to(DEVICE)  # (1,1,160,288)
        x_t = x_t.repeat(1,3,1,1)

        logits = model(pixel_values=x_t).logits
        logits = upsample_logits(logits, (H, W_PAD))
        pred = torch.argmax(logits, dim=1).squeeze(0).cpu().numpy().astype(np.int64)  # (160,288)
        pred = pred[:, :w]  # crop back to original width (160/272)
        np.save(SUB_DIR / f"{name}.npy", pred)

print("saved npy predictions to:", SUB_DIR)

# 4) build submission csv matching sample format
flat_len = H * SIZE_LABELS  # 160*272 = 43520

pred_map = {}
for p in SUB_DIR.glob("*.npy"):
    nm = p.stem
    pred = np.load(p)  # (160,160) or (160,272)
    if pred.shape[1] != SIZE_LABELS:
        aux = -1 + np.zeros(flat_len, dtype=np.int64)
        aux[0:H*H] = pred.flatten()
    else:
        aux = pred.flatten().astype(np.int64)
    pred_map[nm] = aux

missing = [n for n in ordered_names if n not in pred_map]
assert len(missing) == 0, f"missing predictions: {missing[:10]}"

data = np.stack([pred_map[n] for n in ordered_names], axis=0)  # (972, 43520)
col_names = [str(i) for i in range(flat_len)]

sub_df = pd.DataFrame(data, columns=col_names)
sub_df.insert(0, name_col, ordered_names_raw)

out_csv = OUT_DIR / "y_test_submission_MATCH_SAMPLE.csv"
sub_df.to_csv(out_csv, index=False)
print("Saved submission:", out_csv, "shape:", sub_df.shape)


sample rows: 972 name_col: Unnamed: 0


  model.load_state_dict(torch.load(sup_best, map_location=DEVICE))


Loaded sup best: exp_outputs\Exp04c_SSLCombo_then_SUP\supervised\best_state_dict.pt


                                                                        

saved npy predictions to: exp_outputs\Exp04c_SSLCombo_then_SUP\test_predictions
Saved submission: exp_outputs\Exp04c_SSLCombo_then_SUP\y_test_submission_MATCH_SAMPLE.csv shape: (972, 43521)
