In [2]:
# Cell 0 — Exp04 (Method 4) paths + config
from pathlib import Path
import re
import numpy as np
import pandas as pd
import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.utils.data import Dataset, DataLoader
from tqdm import tqdm

DEVICE = "cuda" if torch.cuda.is_available() else "cpu"
print("DEVICE:", DEVICE)

# ====== Your provided paths ======
X_TEST_DIR  = Path(r"C:\Users\asus\Desktop\ECN\DEEP\DataChallenge\data\X_test_xNbnvIa")
X_TRAIN_DIR = Path(r"C:\Users\asus\Desktop\ECN\DEEP\DataChallenge\data\X_train_uDRk9z9")
X_UNLAB_DIR = Path(r"C:\Users\asus\Desktop\ECN\DEEP\DataChallenge\data\X_unlabeled_mtkxUlo")
Y_TRAIN_CSV = Path(r"C:\Users\asus\Desktop\ECN\DEEP\DataChallenge\data\Y_train_T9NrBYo.csv")
SAMPLE_SUB  = Path(r"C:\Users\asus\Desktop\ECN\DEEP\DataChallenge\data\submission_csv_file_random_example_3qPSCtv.csv")

# ====== Outputs ======
OUT_DIR = Path(r"exp_outputs\Exp04_SSL_SegFormer_Semi")
OUT_DIR.mkdir(parents=True, exist_ok=True)

SSL_DIR  = OUT_DIR / "ssl_pretrain"
SUP_DIR  = OUT_DIR / "supervised_finetune"
SEMI_DIR = OUT_DIR / "semi_train"
for d in [SSL_DIR, SUP_DIR, SEMI_DIR]:
    d.mkdir(parents=True, exist_ok=True)

# ====== constants ======
NUM_CLASSES  = 3
IGNORE_INDEX = 255
H            = 160
W_PAD        = 288  # pad to 288, later crop to 160/272
torch.backends.cudnn.benchmark = True


DEVICE: cuda


In [3]:
# Cell 1 — utilities: file listing (no double counting), parse name, load/pad X, load/pad Y
NAME_RE = re.compile(r"well_(\d+)_section_(\d+)_patch_(\d+)$")

def parse_name(stem: str):
    m = NAME_RE.match(stem)
    if not m:
        raise ValueError(f"Bad patch name: {stem}")
    return int(m.group(1)), int(m.group(2)), int(m.group(3))

def list_npy_files(dir_path: Path):
    # de-dup robustly; avoids Windows *.npy/*.NPY double counting
    files = list(dir_path.rglob("*.npy")) + list(dir_path.rglob("*.NPY"))
    uniq = sorted({Path(p).resolve() for p in files})
    return [Path(p) for p in uniq]

def load_x(path: Path) -> np.ndarray:
    x = np.load(path)
    if x.ndim == 3 and x.shape[0] == 1:
        x = x[0]
    x = np.nan_to_num(x, nan=0.0, posinf=0.0, neginf=0.0).astype(np.float32)
    mn, mx = float(x.min()), float(x.max())
    if mx > mn:
        x = (x - mn) / (mx - mn)
    else:
        x = np.zeros_like(x, dtype=np.float32)
    return x  # (160,w)

def pad_x_to_wpad(x: np.ndarray) -> np.ndarray:
    h, w = x.shape
    out = np.zeros((h, W_PAD), dtype=np.float32)
    out[:, :w] = x
    return out

def make_valid_mask(w: int) -> np.ndarray:
    valid = np.zeros((H, W_PAD), dtype=np.bool_)
    valid[:, :w] = True
    return valid

y_df = pd.read_csv(Y_TRAIN_CSV, index_col=0)

def restore_mask_from_row(row_values: np.ndarray) -> np.ndarray:
    vals = row_values[row_values != -1]
    return vals.reshape(H, -1).astype(np.int64)  # (160,160) or (160,272)

def pad_mask_to_wpad(mask: np.ndarray, w: int) -> np.ndarray:
    out = np.full((H, W_PAD), IGNORE_INDEX, dtype=np.int64)
    out[:, :w] = mask
    return out


In [4]:
# Cell 2 — build manifests (train/unlab/test) + SSL pool = (train+unlab images)
def build_manifest(x_dir: Path) -> pd.DataFrame:
    rows = []
    for p in list_npy_files(x_dir):
        stem = p.stem
        try:
            well, section, patch = parse_name(stem)
        except ValueError:
            continue
        arr = np.load(p, mmap_mode="r")
        if arr.ndim == 3 and arr.shape[0] == 1:
            w = int(arr.shape[2])
        elif arr.ndim == 2:
            w = int(arr.shape[1])
        else:
            raise ValueError(f"Unexpected shape {arr.shape} for {p}")
        rows.append({"name": stem, "well": well, "section": section, "patch": patch, "w": w, "path": str(p)})
    return pd.DataFrame(rows)

train_df = build_manifest(X_TRAIN_DIR)
unlab_df = build_manifest(X_UNLAB_DIR)
test_df  = build_manifest(X_TEST_DIR)

# labeled train only
train_labeled_df = train_df[train_df["name"].isin(y_df.index)].reset_index(drop=True)

# SSL uses: all train images (even if labeled) + unlabeled
ssl_df = pd.concat([train_df, unlab_df], axis=0, ignore_index=True)
ssl_df = ssl_df.drop_duplicates(subset=["path"]).reset_index(drop=True)

print("train(all images):", len(train_df))
print("train(labeled):   ", len(train_labeled_df))
print("unlabeled:        ", len(unlab_df))
print("SSL pool:         ", len(ssl_df))
print("test:             ", len(test_df))


train(all images): 4410
train(labeled):    4410
unlabeled:         1980
SSL pool:          6390
test:              972


In [5]:
# Cell 3 — SSL augmentations (SimSiam): two random views from same image
def ssl_aug(x: torch.Tensor) -> torch.Tensor:
    # x: (B,1,H,W_PAD) in [0,1]
    B, _, Hh, Ww = x.shape

    # intensity jitter
    a = torch.empty((B,1,1,1), device=x.device).uniform_(0.85, 1.15)
    b = torch.empty((B,1,1,1), device=x.device).uniform_(-0.08, 0.08)
    out = torch.clamp(x * a + b, 0.0, 1.0)

    # noise
    sigma = torch.empty((B,1,1,1), device=x.device).uniform_(0.0, 0.06)
    out = torch.clamp(out + torch.randn_like(out) * sigma, 0.0, 1.0)

    # random horizontal flip
    if torch.rand(()) < 0.5:
        out = torch.flip(out, dims=[3])

    # cutout
    for i in range(B):
        if torch.rand((), device=x.device).item() < 0.5:
            ch = int(torch.randint(low=10, high=50, size=(1,), device=x.device).item())
            cw = int(torch.randint(low=10, high=80, size=(1,), device=x.device).item())
            y0 = int(torch.randint(low=0, high=Hh-ch+1, size=(1,), device=x.device).item())
            x0 = int(torch.randint(low=0, high=Ww-cw+1, size=(1,), device=x.device).item())
            out[i, :, y0:y0+ch, x0:x0+cw] = 0.0

    return out

class SSLDataset(Dataset):
    def __init__(self, df: pd.DataFrame):
        self.df = df.reset_index(drop=True)

    def __len__(self):
        return len(self.df)

    def __getitem__(self, idx: int):
        row = self.df.iloc[idx]
        x = load_x(Path(row["path"]))      # (160,w)
        x = pad_x_to_wpad(x)               # (160,288)
        x_t = torch.from_numpy(x).unsqueeze(0)  # (1,160,288)
        # return raw tensor; augment will be done on GPU in training step for speed
        return x_t

ssl_loader = DataLoader(SSLDataset(ssl_df), batch_size=32, shuffle=True, num_workers=0, pin_memory=(DEVICE=="cuda"))
xb = next(iter(ssl_loader))
print("ssl batch:", xb.shape)


ssl batch: torch.Size([32, 1, 160, 288])


In [6]:
# Cell 4 — SimSiam with SegFormer backbone
from transformers import SegformerModel

BACKBONE = "nvidia/segformer-b2-finetuned-ade-512-512"

def global_pool(feat: torch.Tensor) -> torch.Tensor:
    # feat: (B,C,H,W) -> (B,C)
    return feat.mean(dim=(2,3))

class MLP(nn.Module):
    def __init__(self, in_dim: int, hidden: int, out_dim: int):
        super().__init__()
        self.net = nn.Sequential(
            nn.Linear(in_dim, hidden),
            nn.BatchNorm1d(hidden),
            nn.ReLU(inplace=True),
            nn.Linear(hidden, out_dim),
            nn.BatchNorm1d(out_dim)
        )
    def forward(self, x):
        return self.net(x)

class Predictor(nn.Module):
    def __init__(self, in_dim: int, hidden: int, out_dim: int):
        super().__init__()
        self.net = nn.Sequential(
            nn.Linear(in_dim, hidden),
            nn.BatchNorm1d(hidden),
            nn.ReLU(inplace=True),
            nn.Linear(hidden, out_dim)
        )
    def forward(self, x):
        return self.net(x)

class SimSiamSegFormer(nn.Module):
    def __init__(self, backbone_name: str, proj_dim=256, pred_dim=256, hidden=1024):
        super().__init__()
        self.backbone = SegformerModel.from_pretrained(backbone_name)
        # infer feature dim: segformer config has hidden_sizes per stage; last stage is strongest
        feat_dim = self.backbone.config.hidden_sizes[-1]
        self.projector = MLP(feat_dim, hidden, proj_dim)
        self.predictor = Predictor(proj_dim, hidden//2, pred_dim)

    def encode(self, x3):
        # x3: (B,3,160,288)
        out = self.backbone(pixel_values=x3, output_hidden_states=True)
        # last stage feature map is hidden_states[-1] with shape (B,C,H',W')
        feat = out.hidden_states[-1]
        v = global_pool(feat)
        z = self.projector(v)
        return z

    def forward(self, x1, x2):
        z1 = self.encode(x1)
        z2 = self.encode(x2)
        p1 = self.predictor(z1)
        p2 = self.predictor(z2)
        return p1, p2, z1.detach(), z2.detach()

def neg_cos(p, z):
    p = F.normalize(p, dim=1)
    z = F.normalize(z, dim=1)
    return -(p * z).sum(dim=1).mean()

ssl_model = SimSiamSegFormer(BACKBONE).to(DEVICE)
print("SimSiam backbone last hidden:", ssl_model.backbone.config.hidden_sizes[-1])


SimSiam backbone last hidden: 512


In [7]:
# Cell 5 — SSL pretrain loop (SimSiam) -> save backbone weights
SSL_EPOCHS = 10
SSL_LR = 3e-4
SSL_WD = 1e-4

opt = torch.optim.AdamW(ssl_model.parameters(), lr=SSL_LR, weight_decay=SSL_WD)
scaler = torch.cuda.amp.GradScaler(enabled=(DEVICE=="cuda"))

best_ssl = 1e9
ssl_ckpt = SSL_DIR / "ssl_best.pt"
ssl_backbone = SSL_DIR / "segformer_backbone_ssl.pt"

ssl_model.train()
for ep in range(1, SSL_EPOCHS+1):
    loss_sum, n = 0.0, 0
    for x in tqdm(ssl_loader, desc=f"[Exp04-SSL] ep{ep}", leave=False):
        x = x.to(DEVICE)  # (B,1,160,288)

        # two views on GPU
        x1 = ssl_aug(x)
        x2 = ssl_aug(x)

        # segformer needs 3 channels
        x1 = x1.repeat(1,3,1,1)
        x2 = x2.repeat(1,3,1,1)

        opt.zero_grad(set_to_none=True)
        with torch.cuda.amp.autocast(enabled=(DEVICE=="cuda")):
            p1, p2, z1, z2 = ssl_model(x1, x2)
            loss = 0.5 * (neg_cos(p1, z2) + neg_cos(p2, z1))

        scaler.scale(loss).backward()
        scaler.step(opt)
        scaler.update()

        loss_sum += float(loss.item()) * x.size(0)
        n += x.size(0)

    ep_loss = loss_sum / max(1, n)
    print(f"[Exp04-SSL] ep{ep:02d}/{SSL_EPOCHS} loss={ep_loss:.4f}")

    if ep_loss < best_ssl:
        best_ssl = ep_loss
        torch.save({"model": ssl_model.state_dict()}, ssl_ckpt)
        # save ONLY backbone weights for later init
        torch.save(ssl_model.backbone.state_dict(), ssl_backbone)

print("[Exp04-SSL] best loss:", best_ssl)
print("saved ssl_ckpt:", ssl_ckpt)
print("saved ssl_backbone:", ssl_backbone)


  scaler = torch.cuda.amp.GradScaler(enabled=(DEVICE=="cuda"))
  with torch.cuda.amp.autocast(enabled=(DEVICE=="cuda")):
                                                                  

[Exp04-SSL] ep01/10 loss=-0.8257


                                                                  

[Exp04-SSL] ep02/10 loss=-0.9305


                                                                  

[Exp04-SSL] ep03/10 loss=-0.7892


                                                                  

[Exp04-SSL] ep04/10 loss=-0.7829


                                                                  

[Exp04-SSL] ep05/10 loss=-0.7901


                                                                  

[Exp04-SSL] ep06/10 loss=-0.7763


                                                                  

[Exp04-SSL] ep07/10 loss=-0.7881


                                                                  

[Exp04-SSL] ep08/10 loss=-0.7936


                                                                  

[Exp04-SSL] ep09/10 loss=-0.7974


                                                                   

[Exp04-SSL] ep10/10 loss=-0.8028
[Exp04-SSL] best loss: -0.9305095878938368
saved ssl_ckpt: exp_outputs\Exp04_SSL_SegFormer_Semi\ssl_pretrain\ssl_best.pt
saved ssl_backbone: exp_outputs\Exp04_SSL_SegFormer_Semi\ssl_pretrain\segformer_backbone_ssl.pt




In [8]:
# Cell 6 — Supervised finetune SegFormer (init backbone from SSL) on labeled data
from transformers import SegformerForSemanticSegmentation

# split by well
VAL_WELLS = {5}
train_split = train_labeled_df[~train_labeled_df["well"].isin(VAL_WELLS)].reset_index(drop=True)
val_split   = train_labeled_df[train_labeled_df["well"].isin(VAL_WELLS)].reset_index(drop=True)

class LabeledDatasetSup(Dataset):
    def __init__(self, df: pd.DataFrame, train_mode: bool, seed=123):
        self.df = df.reset_index(drop=True)
        self.train_mode = train_mode
        self.rng = np.random.RandomState(seed)

    def __len__(self):
        return len(self.df)

    def __getitem__(self, idx: int):
        row = self.df.iloc[idx]
        name = row["name"]
        w = int(row["w"])

        x = load_x(Path(row["path"]))
        x = pad_x_to_wpad(x)

        y_raw = restore_mask_from_row(y_df.loc[name].values)
        y = pad_mask_to_wpad(y_raw, w=w)

        if self.train_mode and self.rng.rand() < 0.5:
            x = np.flip(x, axis=1).copy()
            y = np.flip(y, axis=1).copy()

        x_t = torch.from_numpy(x).unsqueeze(0)  # (1,160,288)
        y_t = torch.from_numpy(y).long()        # (160,288)
        meta = {"name": name, "orig_w": w}
        return x_t, y_t, meta

train_loader = DataLoader(LabeledDatasetSup(train_split, True), batch_size=8, shuffle=True, num_workers=0, pin_memory=(DEVICE=="cuda"))
val_loader   = DataLoader(LabeledDatasetSup(val_split,   False), batch_size=8, shuffle=False, num_workers=0, pin_memory=(DEVICE=="cuda"))

# losses
ce_weights = torch.tensor([1.0, 3.0, 4.0], dtype=torch.float32).to(DEVICE)
ce = nn.CrossEntropyLoss(weight=ce_weights, ignore_index=IGNORE_INDEX)

def soft_dice_loss(logits, target, smooth=1.0):
    probs = torch.softmax(logits, dim=1)
    valid = (target != IGNORE_INDEX).unsqueeze(1)
    t = target.clone()
    t[t == IGNORE_INDEX] = 0
    onehot = F.one_hot(t, num_classes=NUM_CLASSES).permute(0,3,1,2).float()
    probs = probs * valid
    onehot = onehot * valid
    inter = (probs * onehot).sum((0,2,3))
    denom = (probs + onehot).sum((0,2,3))
    dice = (2*inter + smooth) / (denom + smooth)
    return 1.0 - dice.mean()

def combo_loss(logits, y, dice_w=0.5):
    return (1-dice_w)*ce(logits, y) + dice_w*soft_dice_loss(logits, y)

def upsample_logits(logits, target_hw):
    return F.interpolate(logits, size=target_hw, mode="bilinear", align_corners=False)

def mean_iou(pred, target):
    valid = (target != IGNORE_INDEX)
    ious = []
    for c in range(NUM_CLASSES):
        p = (pred == c) & valid
        t = (target == c) & valid
        inter = (p & t).sum().float()
        union = (p | t).sum().float()
        ious.append(torch.tensor(1.0, device=pred.device) if union.item()==0 else inter/union)
    return torch.stack(ious).mean()

# build model
model = SegformerForSemanticSegmentation.from_pretrained(
    BACKBONE, num_labels=NUM_CLASSES, ignore_mismatched_sizes=True
).to(DEVICE)

# load SSL backbone
ssl_backbone = SSL_DIR / "segformer_backbone_ssl.pt"
bb_state = torch.load(ssl_backbone, map_location=DEVICE)
model.segformer.load_state_dict(bb_state, strict=False)
print("Loaded SSL backbone into model.segformer:", ssl_backbone)

# train
SUP_EPOCHS = 10
SUP_LR = 6e-5
opt = torch.optim.AdamW(model.parameters(), lr=SUP_LR, weight_decay=0.01)
scaler = torch.cuda.amp.GradScaler(enabled=(DEVICE=="cuda"))

best_path = SUP_DIR / "best_state_dict.pt"
best_miou = -1.0

for ep in range(1, SUP_EPOCHS+1):
    model.train()
    tr_loss, n = 0.0, 0
    for x, y, meta in tqdm(train_loader, desc=f"[Exp04-SUP] train ep{ep}", leave=False):
        x = x.to(DEVICE)            # (B,1,160,288)
        y = y.to(DEVICE)            # (B,160,288)
        x3 = x.repeat(1,3,1,1)      # (B,3,160,288)

        opt.zero_grad(set_to_none=True)
        with torch.cuda.amp.autocast(enabled=(DEVICE=="cuda")):
            logits = model(pixel_values=x3).logits
            logits = upsample_logits(logits, y.shape[-2:])
            loss = combo_loss(logits, y, dice_w=0.5)

        scaler.scale(loss).backward()
        scaler.step(opt)
        scaler.update()

        tr_loss += float(loss.item()) * x.size(0)
        n += x.size(0)
    tr_loss /= max(1, n)

    model.eval()
    miou_sum, n = 0.0, 0
    with torch.no_grad():
        for x, y, meta in tqdm(val_loader, desc=f"[Exp04-SUP] val ep{ep}", leave=False):
            x = x.to(DEVICE)
            y = y.to(DEVICE)
            x3 = x.repeat(1,3,1,1)
            logits = model(pixel_values=x3).logits
            logits = upsample_logits(logits, y.shape[-2:])
            pred = torch.argmax(logits, dim=1)
            miou_sum += float(mean_iou(pred, y).item()) * x.size(0)
            n += x.size(0)
    val_miou = miou_sum / max(1, n)

    print(f"[Exp04-SUP] ep{ep:02d}/{SUP_EPOCHS} train_loss={tr_loss:.4f} val_mIoU={val_miou:.4f}")

    if val_miou > best_miou:
        best_miou = val_miou
        torch.save(model.state_dict(), best_path)

print("[Exp04-SUP] BEST val mIoU:", best_miou, "saved:", best_path)


Some weights of SegformerForSemanticSegmentation were not initialized from the model checkpoint at nvidia/segformer-b2-finetuned-ade-512-512 and are newly initialized because the shapes did not match:
- decode_head.classifier.weight: found shape torch.Size([150, 768, 1, 1]) in the checkpoint and torch.Size([3, 768, 1, 1]) in the model instantiated
- decode_head.classifier.bias: found shape torch.Size([150]) in the checkpoint and torch.Size([3]) in the model instantiated
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
  bb_state = torch.load(ssl_backbone, map_location=DEVICE)
  scaler = torch.cuda.amp.GradScaler(enabled=(DEVICE=="cuda"))


Loaded SSL backbone into model.segformer: exp_outputs\Exp04_SSL_SegFormer_Semi\ssl_pretrain\segformer_backbone_ssl.pt


  with torch.cuda.amp.autocast(enabled=(DEVICE=="cuda")):
                                                                        

[Exp04-SUP] ep01/10 train_loss=0.2109 val_mIoU=0.7823


                                                                        

[Exp04-SUP] ep02/10 train_loss=0.1044 val_mIoU=0.7821


                                                                        

[Exp04-SUP] ep03/10 train_loss=0.0945 val_mIoU=0.7847


                                                                        

[Exp04-SUP] ep04/10 train_loss=0.0903 val_mIoU=0.7879


                                                                        

[Exp04-SUP] ep05/10 train_loss=0.0872 val_mIoU=0.7951


                                                                        

[Exp04-SUP] ep06/10 train_loss=0.0838 val_mIoU=0.7824


                                                                        

[Exp04-SUP] ep07/10 train_loss=0.0813 val_mIoU=0.7956


                                                                        

[Exp04-SUP] ep08/10 train_loss=0.0793 val_mIoU=0.7907


                                                                        

[Exp04-SUP] ep09/10 train_loss=0.0770 val_mIoU=0.7948


                                                                         

[Exp04-SUP] ep10/10 train_loss=0.0749 val_mIoU=0.8003
[Exp04-SUP] BEST val mIoU: 0.8002571794721816 saved: exp_outputs\Exp04_SSL_SegFormer_Semi\supervised_finetune\best_state_dict.pt


In [9]:
# Cell 7 — Semi-supervised (EMA Teacher) starting from Exp04 supervised best
# unlabeled loader (reuse unlab_df)
class UnlabeledDataset(Dataset):
    def __init__(self, df: pd.DataFrame):
        self.df = df.reset_index(drop=True)
    def __len__(self): return len(self.df)
    def __getitem__(self, idx: int):
        row = self.df.iloc[idx]
        w = int(row["w"])
        x = load_x(Path(row["path"]))
        x = pad_x_to_wpad(x)
        x_t = torch.from_numpy(x).unsqueeze(0)
        valid = torch.from_numpy(make_valid_mask(w))
        return x_t, valid

unlab_loader = DataLoader(UnlabeledDataset(unlab_df), batch_size=8, shuffle=True, num_workers=0, pin_memory=(DEVICE=="cuda"))

def weak_aug(x: torch.Tensor) -> torch.Tensor:
    B = x.size(0)
    a = torch.empty((B,1,1,1), device=x.device).uniform_(0.95, 1.05)
    b = torch.empty((B,1,1,1), device=x.device).uniform_(-0.03, 0.03)
    return torch.clamp(x * a + b, 0.0, 1.0)

def strong_aug(x: torch.Tensor) -> torch.Tensor:
    B, _, Hh, Ww = x.shape
    a = torch.empty((B,1,1,1), device=x.device).uniform_(0.85, 1.15)
    b = torch.empty((B,1,1,1), device=x.device).uniform_(-0.08, 0.08)
    out = torch.clamp(x * a + b, 0.0, 1.0)
    sigma = torch.empty((B,1,1,1), device=x.device).uniform_(0.0, 0.06)
    out = torch.clamp(out + torch.randn_like(out) * sigma, 0.0, 1.0)
    for i in range(B):
        if torch.rand((), device=x.device).item() < 0.5:
            ch = int(torch.randint(low=10, high=50, size=(1,), device=x.device).item())
            cw = int(torch.randint(low=10, high=80, size=(1,), device=x.device).item())
            y0 = int(torch.randint(low=0, high=Hh-ch+1, size=(1,), device=x.device).item())
            x0 = int(torch.randint(low=0, high=Ww-cw+1, size=(1,), device=x.device).item())
            out[i, :, y0:y0+ch, x0:x0+cw] = 0.0
    return out

@torch.no_grad()
def ema_update(teacher, student, alpha: float):
    for t_p, s_p in zip(teacher.parameters(), student.parameters()):
        t_p.data.mul_(alpha).add_(s_p.data, alpha=1.0 - alpha)
    for t_b, s_b in zip(teacher.buffers(), student.buffers()):
        t_b.copy_(s_b)

def cycle(loader):
    while True:
        for b in loader:
            yield b

# init student/teacher from supervised best
student = SegformerForSemanticSegmentation.from_pretrained(
    BACKBONE, num_labels=NUM_CLASSES, ignore_mismatched_sizes=True
).to(DEVICE)
teacher = SegformerForSemanticSegmentation.from_pretrained(
    BACKBONE, num_labels=NUM_CLASSES, ignore_mismatched_sizes=True
).to(DEVICE)

sup_state = torch.load(best_path, map_location=DEVICE)
student.load_state_dict(sup_state)
teacher.load_state_dict(sup_state)
teacher.eval()
for p in teacher.parameters():
    p.requires_grad_(False)

# unlabeled CE loss (ignore_index)
ce_u = nn.CrossEntropyLoss(ignore_index=IGNORE_INDEX)

def rampup(epoch: int, ramp_epochs=5):
    return min(1.0, float(epoch+1)/float(ramp_epochs))

SEMI_EPOCHS = 10
LR = 6e-5
opt = torch.optim.AdamW(student.parameters(), lr=LR, weight_decay=0.01)
scaler = torch.cuda.amp.GradScaler(enabled=(DEVICE=="cuda"))

TAU = 0.95
EMA_ALPHA = 0.996
LAMBDA_U = 1.0
RAMP_E = 5

semi_best = SEMI_DIR / "best_state_dict.pt"
best_miou = -1.0

unlab_iter = cycle(unlab_loader)

for ep in range(1, SEMI_EPOCHS+1):
    lam_u = LAMBDA_U * rampup(ep-1, RAMP_E)
    student.train()
    teacher.eval()

    for x_l, y_l, meta in tqdm(train_loader, desc=f"[Exp04-SEMI] train ep{ep} (lam_u={lam_u:.2f})", leave=False):
        x_u, valid_u = next(unlab_iter)

        x_l = x_l.to(DEVICE)              # (B,1,160,288)
        y_l = y_l.to(DEVICE)              # (B,160,288)
        x_u = x_u.to(DEVICE)              # (B,1,160,288)
        valid_u = valid_u.to(DEVICE)      # (B,160,288)

        x_l3 = x_l.repeat(1,3,1,1)

        x_u_w = weak_aug(x_u).repeat(1,3,1,1)
        x_u_s = strong_aug(x_u).repeat(1,3,1,1)

        opt.zero_grad(set_to_none=True)
        with torch.cuda.amp.autocast(enabled=(DEVICE=="cuda")):
            # labeled
            logits_l = student(pixel_values=x_l3).logits
            logits_l = upsample_logits(logits_l, y_l.shape[-2:])
            loss_l = combo_loss(logits_l, y_l, dice_w=0.5)

            # teacher pseudo
            with torch.no_grad():
                logits_t = teacher(pixel_values=x_u_w).logits
                logits_t = upsample_logits(logits_t, (H, W_PAD))
                probs_t = torch.softmax(logits_t, dim=1)
                conf, pseudo = torch.max(probs_t, dim=1)  # (B,160,288)
                mask = (conf >= TAU) & valid_u
                pseudo_pl = pseudo.clone()
                pseudo_pl[~mask] = IGNORE_INDEX

            # unlabeled loss
            if lam_u > 0:
                logits_u = student(pixel_values=x_u_s).logits
                logits_u = upsample_logits(logits_u, (H, W_PAD))
                loss_u = ce_u(logits_u, pseudo_pl)
            else:
                loss_u = torch.tensor(0.0, device=DEVICE)

            loss = loss_l + lam_u * loss_u

        scaler.scale(loss).backward()
        scaler.step(opt)
        scaler.update()

        ema_update(teacher, student, EMA_ALPHA)

    # val
    student.eval()
    miou_sum, n = 0.0, 0
    with torch.no_grad():
        for x, y, meta in tqdm(val_loader, desc=f"[Exp04-SEMI] val ep{ep}", leave=False):
            x = x.to(DEVICE)
            y = y.to(DEVICE)
            x3 = x.repeat(1,3,1,1)
            logits = student(pixel_values=x3).logits
            logits = upsample_logits(logits, y.shape[-2:])
            pred = torch.argmax(logits, dim=1)
            miou_sum += float(mean_iou(pred, y).item()) * x.size(0)
            n += x.size(0)
    val_miou = miou_sum / max(1, n)
    print(f"[Exp04-SEMI] ep{ep:02d}/{SEMI_EPOCHS} val_mIoU={val_miou:.4f}")

    if val_miou > best_miou:
        best_miou = val_miou
        torch.save(student.state_dict(), semi_best)

print("[Exp04-SEMI] BEST val mIoU:", best_miou, "saved:", semi_best)


Some weights of SegformerForSemanticSegmentation were not initialized from the model checkpoint at nvidia/segformer-b2-finetuned-ade-512-512 and are newly initialized because the shapes did not match:
- decode_head.classifier.weight: found shape torch.Size([150, 768, 1, 1]) in the checkpoint and torch.Size([3, 768, 1, 1]) in the model instantiated
- decode_head.classifier.bias: found shape torch.Size([150]) in the checkpoint and torch.Size([3]) in the model instantiated
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
Some weights of SegformerForSemanticSegmentation were not initialized from the model checkpoint at nvidia/segformer-b2-finetuned-ade-512-512 and are newly initialized because the shapes did not match:
- decode_head.classifier.weight: found shape torch.Size([150, 768, 1, 1]) in the checkpoint and torch.Size([3, 768, 1, 1]) in the model instantiated
- decode_head.classifier.bias: found shape torch.Size([150]) in 

[Exp04-SEMI] ep01/10 val_mIoU=0.8008


                                                                                      

[Exp04-SEMI] ep02/10 val_mIoU=0.7994


                                                                                      

[Exp04-SEMI] ep03/10 val_mIoU=0.8046


                                                                                      

[Exp04-SEMI] ep04/10 val_mIoU=0.8065


                                                                                      

[Exp04-SEMI] ep05/10 val_mIoU=0.7993


                                                                                      

[Exp04-SEMI] ep06/10 val_mIoU=0.7985


                                                                                      

[Exp04-SEMI] ep07/10 val_mIoU=0.7940


                                                                                      

[Exp04-SEMI] ep08/10 val_mIoU=0.7984


                                                                                      

[Exp04-SEMI] ep09/10 val_mIoU=0.7915


                                                                                       

[Exp04-SEMI] ep10/10 val_mIoU=0.7981
[Exp04-SEMI] BEST val mIoU: 0.806527747048272 saved: exp_outputs\Exp04_SSL_SegFormer_Semi\semi_train\best_state_dict.pt




In [10]:
# Cell 8 — Predict test (by sample order) using Exp04 semi best, save npy + submission CSV
sample = pd.read_csv(SAMPLE_SUB)
name_col = sample.columns[0]
ordered_names_raw = sample[name_col].astype(str).tolist()

def norm_name(s: str) -> str:
    s = str(s).strip()
    if s.lower().endswith(".npy"):
        s = s[:-4]
    return s

ordered_names = [norm_name(n) for n in ordered_names_raw]
print("sample rows:", len(ordered_names), "name_col:", name_col)

test_files = list_npy_files(X_TEST_DIR)
test_index = {p.stem: p for p in test_files}
test_index.update({p.stem.lower(): p for p in test_files})

# load best semi student
student = SegformerForSemanticSegmentation.from_pretrained(
    BACKBONE, num_labels=NUM_CLASSES, ignore_mismatched_sizes=True
).to(DEVICE)
student.load_state_dict(torch.load(semi_best, map_location=DEVICE))
student.eval()
print("Loaded Exp04 semi best:", semi_best)

pred_dir = OUT_DIR / "test_predictions"
pred_dir.mkdir(parents=True, exist_ok=True)
for p in pred_dir.glob("*.npy"):
    p.unlink()

with torch.no_grad():
    for name in tqdm(ordered_names, desc="[Exp04] predict test", leave=False):
        key = name if name in test_index else name.lower()
        if key not in test_index:
            hits = list(X_TEST_DIR.rglob(f"{name}.npy")) + list(X_TEST_DIR.rglob(f"{name}.NPY"))
            if len(hits) == 0:
                raise FileNotFoundError(f"X_test missing: {name}.npy")
            x_path = hits[0]
        else:
            x_path = test_index[key]

        x = load_x(x_path)
        w = x.shape[1]
        x_pad = pad_x_to_wpad(x)
        x_t = torch.from_numpy(x_pad).unsqueeze(0).unsqueeze(0).to(DEVICE)  # (1,1,160,288)
        x_t = x_t.repeat(1,3,1,1)

        logits = student(pixel_values=x_t).logits
        logits = upsample_logits(logits, (H, W_PAD))
        pred = torch.argmax(logits, dim=1).squeeze(0).cpu().numpy().astype(np.int64)
        pred = pred[:, :w]
        np.save(pred_dir / f"{name}.npy", pred)

print("saved npy predictions to:", pred_dir)

# build submission CSV (exact sample format)
size_labels = 272
flat_len = H * size_labels

pred_map = {}
for p in pred_dir.glob("*.npy"):
    nm = p.stem
    pred = np.load(p)
    if pred.shape[1] != size_labels:
        aux = -1 + np.zeros(flat_len, dtype=np.int64)
        aux[0:H*H] = pred.flatten()
    else:
        aux = pred.flatten().astype(np.int64)
    pred_map[nm] = aux

missing = [n for n in ordered_names if n not in pred_map]
assert len(missing) == 0, f"missing predictions: {missing[:10]}"

data = np.stack([pred_map[n] for n in ordered_names], axis=0)
col_names = [str(i) for i in range(flat_len)]
sub_df = pd.DataFrame(data, columns=col_names)
sub_df.insert(0, name_col, ordered_names_raw)

out_csv = OUT_DIR / "y_test_submission_MATCH_SAMPLE.csv"
sub_df.to_csv(out_csv, index=False)
print("Saved submission:", out_csv, "shape:", sub_df.shape)


sample rows: 972 name_col: Unnamed: 0


Some weights of SegformerForSemanticSegmentation were not initialized from the model checkpoint at nvidia/segformer-b2-finetuned-ade-512-512 and are newly initialized because the shapes did not match:
- decode_head.classifier.weight: found shape torch.Size([150, 768, 1, 1]) in the checkpoint and torch.Size([3, 768, 1, 1]) in the model instantiated
- decode_head.classifier.bias: found shape torch.Size([150]) in the checkpoint and torch.Size([3]) in the model instantiated
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
  student.load_state_dict(torch.load(semi_best, map_location=DEVICE))


Loaded Exp04 semi best: exp_outputs\Exp04_SSL_SegFormer_Semi\semi_train\best_state_dict.pt


                                                                       

saved npy predictions to: exp_outputs\Exp04_SSL_SegFormer_Semi\test_predictions
Saved submission: exp_outputs\Exp04_SSL_SegFormer_Semi\y_test_submission_MATCH_SAMPLE.csv shape: (972, 43521)
