In [2]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

In [3]:
# 1) Imports & Paths
# ============================================================
import os, zipfile, random, shutil, urllib.request, json, tarfile
from pathlib import Path
from typing import Dict, Tuple
import numpy as np
from PIL import Image
import matplotlib.pyplot as plt

import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.utils.data import Dataset, DataLoader, ConcatDataset, random_split, Subset
import torchvision.transforms.functional as TF
from torchvision.transforms import InterpolationMode
from torchvision.models.segmentation import deeplabv3_resnet101, DeepLabV3_ResNet101_Weights

from pycocotools.coco import COCO
from pycocotools import mask as mask_utils

In [4]:
SEED = 42
random.seed(SEED); np.random.seed(SEED); torch.manual_seed(SEED)
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print("Device:", device)

WORK = Path("/kaggle/working")
DATA_ROOT = WORK / "data"
COCO_ROOT = DATA_ROOT / "coco"
ADE_ROOT  = DATA_ROOT / "ADEChallengeData2016"
ADE_SUB   = DATA_ROOT / "ADE20K_subset"   # small validation subset for speed
DATA_ROOT.mkdir(parents=True, exist_ok=True)

Device: cuda


In [5]:
# ============================================================
# 2) Download datasets (COCO val2017 + annotations + ADE20K full), then subset ADE
# ============================================================
COCO_VAL_ZIP = "http://images.cocodataset.org/zips/val2017.zip"
COCO_ANN_ZIP = "http://images.cocodataset.org/annotations/annotations_trainval2017.zip"
ADE_FULL_ZIP = "http://data.csail.mit.edu/places/ADEchallenge/ADEChallengeData2016.zip"

ZIPS_DIR = DATA_ROOT / "zips"
ZIPS_DIR.mkdir(parents=True, exist_ok=True)

def _download(url: str, out_path: Path):
    out_path.parent.mkdir(parents=True, exist_ok=True)
    if out_path.exists():
        print(f"[skip] {out_path.name} already exists")
        return
    print(f"[download] {url} -> {out_path}")
    urllib.request.urlretrieve(url, str(out_path))

def _unzip(zip_path: Path, to_dir: Path):
    print(f"[unzip] {zip_path.name} -> {to_dir}")
    with zipfile.ZipFile(zip_path, 'r') as z:
        z.extractall(to_dir)

# --- COCO ---
_download(COCO_VAL_ZIP, ZIPS_DIR / "val2017.zip")
_unzip(ZIPS_DIR / "val2017.zip", COCO_ROOT)

_download(COCO_ANN_ZIP, ZIPS_DIR / "annotations_trainval2017.zip")
_unzip(ZIPS_DIR / "annotations_trainval2017.zip", COCO_ROOT)

# Ensure standard layout: coco/images/val2017 & coco/annotations/instances_val2017.json
(COCO_ROOT / "images").mkdir(exist_ok=True)
if not (COCO_ROOT / "images" / "val2017").exists() and (COCO_ROOT / "val2017").exists():
    shutil.move(str(COCO_ROOT / "val2017"), str(COCO_ROOT / "images" / "val2017"))

assert (COCO_ROOT / "images" / "val2017").exists(), "COCO val2017 images not found"
assert (COCO_ROOT / "annotations" / "instances_val2017.json").exists(), "COCO instances_val2017.json not found"

# --- ADE20K ---
_download(ADE_FULL_ZIP, ZIPS_DIR / "ADEChallengeData2016.zip")
if not ADE_ROOT.exists():
    _unzip(ZIPS_DIR / "ADEChallengeData2016.zip", DATA_ROOT)

def build_ade_subset(src_root: Path, dst_root: Path, n_samples: int = 150):
    if dst_root.exists():
        print(f"[skip] ADE subset exists at {dst_root}")
        return
    img_src = src_root / "images" / "validation"
    ann_src = src_root / "annotations" / "validation"
    assert img_src.exists() and ann_src.exists(), "ADE20K validation split not found"

    dst_img = dst_root / "images" / "validation"
    dst_ann = dst_root / "annotations" / "validation"
    dst_img.mkdir(parents=True, exist_ok=True)
    dst_ann.mkdir(parents=True, exist_ok=True)

    img_paths = sorted(list(img_src.glob("*.jpg")))
    pairs = [(ip, ann_src / (ip.stem + ".png")) for ip in img_paths]
    pairs = [p for p in pairs if p[1].exists()]

    random.seed(0)
    random.shuffle(pairs)
    pairs = pairs[:n_samples]

    for ip, mp in pairs:
        shutil.copy2(ip, dst_img / ip.name)
        shutil.copy2(mp, dst_ann / mp.name)

    info_file = src_root / "objectInfo150.txt"
    if info_file.exists():
        shutil.copy2(info_file, dst_root / "objectInfo150.txt")
    print(f"[done] ADE subset with {len(pairs)} samples at {dst_root}")

build_ade_subset(ADE_ROOT, ADE_SUB, n_samples=150)
assert (ADE_SUB / "images" / "validation").exists()
assert (ADE_SUB / "annotations" / "validation").exists()

[download] http://images.cocodataset.org/zips/val2017.zip -> /kaggle/working/data/zips/val2017.zip
[unzip] val2017.zip -> /kaggle/working/data/coco
[download] http://images.cocodataset.org/annotations/annotations_trainval2017.zip -> /kaggle/working/data/zips/annotations_trainval2017.zip
[unzip] annotations_trainval2017.zip -> /kaggle/working/data/coco
[download] http://data.csail.mit.edu/places/ADEchallenge/ADEChallengeData2016.zip -> /kaggle/working/data/zips/ADEChallengeData2016.zip
[unzip] ADEChallengeData2016.zip -> /kaggle/working/data
[done] ADE subset with 150 samples at /kaggle/working/data/ADE20K_subset


In [6]:
# ============================================================
# Build unified dataset -> export -> compute weights -> train DeepLabV3 (Kaggle)
# ============================================================

# ---------- 3) Unified classes + mappings ----------
UNIFIED_CLASSES = ['background','wall','floor','ceiling','window','door','seating','table','cabinet']
U2I = {n:i for i,n in enumerate(UNIFIED_CLASSES)}

COCO_TO_UNIFIED_BY_NAME = {
    'chair': 'seating',
    'couch': 'seating',
    'bench': 'seating',
    'dining table': 'table',
    'bed': 'seating',  # boosts positives
}

ADE_RULES = {
    'wall':    ('wall','partition','wall panel'),
    'floor':   ('floor','tiling','carpet','floorboard'),
    'ceiling': ('ceiling','attic ceiling'),
    'window':  ('window','windowpane','bay window','skylight','window frame'),
    'door':    ('door','doorway','sliding door'),
    'seating': ('sofa','couch','chair','seat','bench','stool','armchair','settee'),
    'table':   ('table','desk','dining table','coffee table','workbench'),
    'cabinet': ('cabinet','cupboard','wardrobe','closet','bookcase','bookshelf','shelf','shelving','drawer'),
}

# ---------- 4) Datasets ----------
class COCOSemanticFromInstances(Dataset):
    def __init__(self, coco_root: Path, split: str="val", target_size: Tuple[int,int]=(512,512)):
        assert split == "val"
        self.img_dir = coco_root / "images" / "val2017"
        ann_file = coco_root / "annotations" / "instances_val2017.json"
        self.coco = COCO(str(ann_file))
        self.img_ids = self.coco.getImgIds()
        self.target_size = target_size

        cats = self.coco.loadCats(self.coco.getCatIds())
        name_to_id = {c['name']: c['id'] for c in cats}
        self.cid_to_unified = {name_to_id[n]: U2I[u]
                               for n,u in COCO_TO_UNIFIED_BY_NAME.items()
                               if n in name_to_id}
        self.unified_priority = {U2I['seating']:20, U2I['cabinet']:15, U2I['table']:10}

    @staticmethod
    def _ann_to_mask(ann, h, w):
        seg = ann.get("segmentation", None)
        if seg is None: return None
        if isinstance(seg, list):
            rles = mask_utils.frPyObjects(seg, h, w)
            rle  = mask_utils.merge(rles)
        elif isinstance(seg, dict):
            rle  = mask_utils.frPyObjects(seg, h, w) if isinstance(seg.get("counts",None), list) else seg
        else:
            return None
        m = mask_utils.decode(rle)
        if m is None: return None
        if m.ndim == 3: m = np.any(m, axis=2)
        return m.astype(np.uint8)

    def __len__(self): return len(self.img_ids)

    def __getitem__(self, idx: int):
        img_id = self.img_ids[idx]
        info = self.coco.loadImgs([img_id])[0]
        img_path = self.img_dir / info["file_name"]

        image = Image.open(img_path).convert("RGB")
        w, h = image.size
        sem = np.zeros((h, w), dtype=np.uint8)

        ann_ids = self.coco.getAnnIds(imgIds=[img_id], iscrowd=None)
        anns = [a for a in self.coco.loadAnns(ann_ids) if a['category_id'] in self.cid_to_unified]
        anns_sorted = sorted(anns, key=lambda a: self.unified_priority.get(self.cid_to_unified[a['category_id']],0))

        for a in anns_sorted:
            uni = self.cid_to_unified[a['category_id']]
            m = self._ann_to_mask(a, h, w)
            if m is not None:
                sem[m.astype(bool)] = uni

        image = TF.resize(image, self.target_size, interpolation=InterpolationMode.BILINEAR)
        mask_img = Image.fromarray(sem.astype(np.uint8))
        mask_img = TF.resize(mask_img, self.target_size, interpolation=InterpolationMode.NEAREST)

        image_t = TF.to_tensor(image)
        mask_t  = torch.from_numpy(np.array(mask_img, dtype=np.uint8))
        return {"image": image_t, "mask": mask_t, "source":"coco", "path": str(img_path)}

class ADE20KUnifiedSeg(Dataset):
    def __init__(self, ade_root: Path, split: str="validation", target_size: Tuple[int,int]=(512,512)):
        self.img_dir = ade_root / "images" / split
        self.ann_dir = ade_root / "annotations" / split
        self.target_size = target_size

        imgs = sorted(self.img_dir.glob("*.jpg"))
        anns = [self.ann_dir / (p.stem + ".png") for p in imgs]
        pairs = [(i,a) for i,a in zip(imgs, anns) if a.exists()]
        assert pairs, "No ADE20K samples found"
        self.img_paths, self.ann_paths = map(list, zip(*pairs))
        self.id2uni = self._build_id_to_unified(ade_root / "objectInfo150.txt")

    def _build_id_to_unified(self, info_file: Path) -> np.ndarray:
        print(f"[ADE] id->unified from: {info_file}")
        max_id=150; id2uni = np.zeros((max_id+1,), dtype=np.uint8)
        names: Dict[int,str] = {}
        if info_file.exists():
            with open(info_file, "r", encoding="utf-8", errors="ignore") as f:
                for line in f:
                    parts = line.strip().split()
                    if not parts: continue
                    idx_pos = next((i for i,t in enumerate(parts) if t.isdigit()), None)
                    if idx_pos is None: continue
                    cid = int(parts[idx_pos]); name = " ".join(parts[idx_pos+1:]).lower()
                    names[cid] = name
        else:
            print("[WARN] objectInfo150.txt missing; all ADE -> background")
        def match_unified(name: str)->int:
            lname = name.lower()
            for uni, subs in ADE_RULES.items():
                for s in subs:
                    if s in lname: return U2I[uni]
            return U2I['background']
        for cid,cname in names.items():
            if 0 <= cid < len(id2uni): id2uni[cid] = match_unified(cname)
        id2uni[0]=U2I['background']
        return id2uni

    def __len__(self): return len(self.img_paths)

    def __getitem__(self, idx:int):
        ip = self.img_paths[idx]; mp = self.ann_paths[idx]
        image = Image.open(ip).convert("RGB")
        mask  = Image.open(mp)
        m_np = np.array(mask, dtype=np.int64)
        m_np = self.id2uni[np.clip(m_np, 0, len(self.id2uni)-1)]
        image = TF.resize(image, self.target_size, interpolation=InterpolationMode.BILINEAR)
        mask_img = Image.fromarray(m_np.astype(np.uint8))
        mask_img = TF.resize(mask_img, self.target_size, interpolation=InterpolationMode.NEAREST)
        image_t = TF.to_tensor(image)
        mask_t  = torch.from_numpy(np.array(mask_img, dtype=np.uint8))
        return {"image": image_t, "mask": mask_t, "source":"ade20k", "path": str(ip)}

def seg_collate(batch):
    images = torch.stack([b["image"] for b in batch], 0)
    masks  = torch.stack([b["mask"]  for b in batch], 0)
    return images, masks

# ---------- 5) Build combined dataset -> EXPORT to ./unified_dataset ----------
coco_ds = COCOSemanticFromInstances(COCO_ROOT, split="val", target_size=(512,512))
ade_ds  = ADE20KUnifiedSeg(ADE_SUB, split="validation", target_size=(512,512))
combined = ConcatDataset([coco_ds, ade_ds])

export_loader = DataLoader(combined, batch_size=8, shuffle=False, num_workers=2,
                           pin_memory=True, collate_fn=seg_collate)

OUT_ROOT = Path("./unified_dataset")
IMG_DIR  = OUT_ROOT / "images"
MSK_DIR  = OUT_ROOT / "masks"
IMG_DIR.mkdir(parents=True, exist_ok=True)
MSK_DIR.mkdir(parents=True, exist_ok=True)

start_idx = len(list(MSK_DIR.glob("*.png"))) + 1
counter = start_idx
print(f"[export] Writing unified PNGs to {OUT_ROOT} ...")
for images, masks in export_loader:
    images = images.cpu(); masks = masks.cpu()
    B = images.shape[0]
    for i in range(B):
        img_pil = TF.to_pil_image(images[i].clamp(0,1))
        m_np = masks[i].numpy().astype(np.uint8)
        m_pil = Image.fromarray(m_np)
        stem = f"{counter:05d}.png"
        img_pil.save(IMG_DIR / stem)
        m_pil.save(MSK_DIR / stem)
        counter += 1
print(f"[export] Saved {counter - start_idx} samples to ./unified_dataset.")

# ---------- 6) Class weights (compute once, save) ----------
stats_dir = OUT_ROOT / "stats"
stats_dir.mkdir(parents=True, exist_ok=True)
ce_weights_path = stats_dir / "class_weights_norm_inv_mean1.pt"

def compute_inverse_pixel_weights(mask_dir: Path, num_classes=9):
    counts = np.zeros((num_classes,), dtype=np.int64)
    for mp in sorted(mask_dir.glob("*.png")):
        m = np.array(Image.open(mp), dtype=np.uint8)
        binc = np.bincount(m.ravel(), minlength=max(num_classes, 256))
        counts += binc[:num_classes]
    with np.errstate(divide='ignore', invalid='ignore'):
        inv = np.where(counts > 0, 1.0 / counts, 0.0)
    s = inv.sum()
    w = inv / s if s > 0 else np.ones((num_classes,), dtype=np.float64) / num_classes
    w_mean1 = w / w.mean()
    return torch.tensor(w_mean1, dtype=torch.float32), counts

if ce_weights_path.exists():
    ce_weights = torch.load(ce_weights_path, map_location="cpu").float()
    print(f"[weights] Loaded: {ce_weights_path}")
else:
    ce_weights, counts = compute_inverse_pixel_weights(MSK_DIR, num_classes=9)
    torch.save(ce_weights, ce_weights_path)
    np.save(stats_dir / "pixel_counts.npy", counts)
    print(f"[weights] Computed & saved. Pixel counts: {counts.tolist()}")

# ---------- 7) Train/Val split from ./unified_dataset ----------
IMAGENET_MEAN = [0.485, 0.456, 0.406]
IMAGENET_STD  = [0.229, 0.224, 0.225]
IMAGE_SIZE    = None  # already 512x512

class UnifiedSegFromDisk(Dataset):
    def __init__(self, img_dir: Path, msk_dir: Path, size: int | None = IMAGE_SIZE):
        self.img_dir, self.msk_dir = Path(img_dir), Path(msk_dir)
        self.size = size
        self.mask_paths = sorted(self.msk_dir.glob("*.png"))
        self.pairs = [(self.img_dir / mp.name, mp) for mp in self.mask_paths if (self.img_dir / mp.name).exists()]
        assert self.pairs, "No matched image-mask pairs."
    def __len__(self): return len(self.pairs)
    def __getitem__(self, idx: int):
        ip, mp = self.pairs[idx]
        img = Image.open(ip).convert("RGB")
        msk = Image.open(mp)
        if self.size is not None:
            img = TF.resize(img, [self.size, self.size], interpolation=InterpolationMode.BILINEAR)
            msk = TF.resize(msk, [self.size, self.size], interpolation=InterpolationMode.NEAREST)
        img_t = TF.to_tensor(img)
        img_t = TF.normalize(img_t, IMAGENET_MEAN, IMAGENET_STD)
        m_t = torch.from_numpy(np.array(msk, dtype=np.uint8))
        return {"image": img_t, "mask": m_t, "path": str(ip)}

def seg_collate2(batch):
    images = torch.stack([b["image"] for b in batch], 0)
    masks  = torch.stack([b["mask"]  for b in batch], 0)
    paths  = [b["path"] for b in batch]
    return images, masks, paths

full_ds = UnifiedSegFromDisk(IMG_DIR, MSK_DIR, size=IMAGE_SIZE)
val_ratio = 0.2
val_len = int(len(full_ds) * val_ratio)
train_len = len(full_ds) - val_len
torch.manual_seed(SEED)
train_ds, val_ds = random_split(full_ds, [train_len, val_len])
print(f"[split] total={len(full_ds)} | train={len(train_ds)} | val={len(val_ds)}")

BATCH_SIZE  = 8   # bump to 8/12 if GPU memory allows; lower to 4 if OOM
NUM_WORKERS = min(2, os.cpu_count() or 2)
train_loader = DataLoader(train_ds, batch_size=BATCH_SIZE, shuffle=True,
                          num_workers=NUM_WORKERS, pin_memory=True, collate_fn=seg_collate2)
val_loader   = DataLoader(val_ds,   batch_size=BATCH_SIZE, shuffle=False,
                          num_workers=NUM_WORKERS, pin_memory=True, collate_fn=seg_collate2)

# ---------- 8) Model, loss, optimizer, scheduler (EPOCHS=25) ----------
NUM_CLASSES = 9
try:
    weights_enum = DeepLabV3_ResNet101_Weights.DEFAULT
    model = deeplabv3_resnet101(weights=weights_enum)
    print("[model] ImageNet-pretrained backbone loaded.")
except Exception as e:
    print(f"[model] Pretrained load failed ({e}), using random init.")
    model = deeplabv3_resnet101(weights=None)

model.classifier[4] = nn.Conv2d(256, NUM_CLASSES, kernel_size=1)
model = model.to(device)

ce_weights = ce_weights.to(device)
criterion = nn.CrossEntropyLoss(weight=ce_weights)
optimizer = torch.optim.AdamW(model.parameters(), lr=3e-4, weight_decay=1e-4)

EPOCHS    = 25
warmup_ep = max(1, min(2, EPOCHS // 5))
warmup    = torch.optim.lr_scheduler.LinearLR(optimizer, start_factor=0.1, total_iters=warmup_ep)
cosine    = torch.optim.lr_scheduler.CosineAnnealingLR(optimizer, T_max=max(1, EPOCHS - warmup_ep))
scheduler = torch.optim.lr_scheduler.SequentialLR(optimizer, [warmup, cosine], milestones=[warmup_ep])

scaler = torch.amp.GradScaler('cuda', enabled=(device.type == "cuda"))

@torch.no_grad()
def compute_miou(model, loader, num_classes: int) -> float:
    model.eval()
    hist = torch.zeros((num_classes, num_classes), device=device, dtype=torch.int64)
    for images, masks, _ in loader:
        images = images.to(device, non_blocking=True)
        masks  = masks.long().to(device, non_blocking=True)
        preds  = model(images)["out"].argmax(1)
        k = (masks >= 0) & (masks < num_classes)
        labels = num_classes * masks[k] + preds[k]
        bins = torch.bincount(labels, minlength=num_classes**2)
        hist += bins.reshape(num_classes, num_classes)
    tp = torch.diag(hist).float()
    fp = (hist.sum(0) - tp)
    fn = (hist.sum(1) - tp)
    denom = (tp + fp + fn)
    iou = torch.zeros_like(tp)
    valid = denom > 0
    iou[valid] = tp[valid] / denom[valid]
    return iou[valid].mean().item() if valid.any() else 0.0

def train_one_epoch(model, loader, optimizer, scaler):
    model.train()
    running, n = 0.0, 0
    for images, masks, _ in loader:
        images = images.to(device, non_blocking=True)
        masks  = masks.long().to(device, non_blocking=True)
        optimizer.zero_grad(set_to_none=True)
        with torch.amp.autocast('cuda', enabled=(device.type == "cuda")):
            out  = model(images)["out"]
            loss = criterion(out, masks)
        scaler.scale(loss).backward()
        torch.nn.utils.clip_grad_norm_(model.parameters(), max_norm=1.0)
        scaler.step(optimizer)
        scaler.update()
        running += loss.item() * images.size(0)
        n += images.size(0)
    return running / max(n, 1)

@torch.no_grad()
def validate(model, loader):
    model.eval()
    running, n = 0.0, 0
    for images, masks, _ in loader:
        images = images.to(device, non_blocking=True)
        masks  = masks.long().to(device, non_blocking=True)
        with torch.amp.autocast('cuda', enabled=(device.type == "cuda")):
            out  = model(images)["out"]
            loss = criterion(out, masks)
        running += loss.item() * images.size(0)
        n += images.size(0)
    val_loss = running / max(n, 1)
    val_miou = compute_miou(model, loader, NUM_CLASSES)
    return val_loss, val_miou

BEST_PATH = WORK / "best_model.pth"
best_miou = -1.0

print("\n[train] Starting (25 epochs)…")
for epoch in range(1, EPOCHS+1):
    train_loss = train_one_epoch(model, train_loader, optimizer, scaler)
    val_loss, val_miou = validate(model, val_loader)
    scheduler.step()
    if val_miou > best_miou:
        best_miou = val_miou
        torch.save({"model_state_dict": model.state_dict(), "num_classes": NUM_CLASSES}, BEST_PATH)
    print(f"Epoch {epoch:02d}/{EPOCHS} | "
          f"train_loss={train_loss:.4f} | val_loss={val_loss:.4f} | "
          f"val_mIoU={val_miou:.4f} | best_mIoU={best_miou:.4f}")

print(f"\n[done] Best model saved to: {BEST_PATH}")



loading annotations into memory...
Done (t=0.65s)
creating index...
index created!
[ADE] id->unified from: /kaggle/working/data/ADE20K_subset/objectInfo150.txt
[export] Writing unified PNGs to unified_dataset ...
[export] Saved 5150 samples to ./unified_dataset.
[weights] Computed & saved. Pixel counts: [1263531292, 6780223, 2705095, 1759656, 672274, 365909, 35541703, 38041483, 643965]
[split] total=5150 | train=4120 | val=1030


Downloading: "https://download.pytorch.org/models/deeplabv3_resnet101_coco-586e9e4e.pth" to /root/.cache/torch/hub/checkpoints/deeplabv3_resnet101_coco-586e9e4e.pth
100%|██████████| 233M/233M [00:01<00:00, 222MB/s] 


[model] ImageNet-pretrained backbone loaded.

[train] Starting (25 epochs)…
Epoch 01/25 | train_loss=1.4150 | val_loss=0.9964 | val_mIoU=0.1302 | best_mIoU=0.1302




Epoch 02/25 | train_loss=0.8607 | val_loss=0.6939 | val_mIoU=0.2182 | best_mIoU=0.2182
Epoch 03/25 | train_loss=0.9362 | val_loss=0.9823 | val_mIoU=0.1522 | best_mIoU=0.2182
Epoch 04/25 | train_loss=0.8944 | val_loss=0.9439 | val_mIoU=0.1929 | best_mIoU=0.2182
Epoch 05/25 | train_loss=0.9475 | val_loss=1.2452 | val_mIoU=0.1376 | best_mIoU=0.2182
Epoch 06/25 | train_loss=1.0478 | val_loss=1.0600 | val_mIoU=0.1766 | best_mIoU=0.2182
Epoch 07/25 | train_loss=0.9028 | val_loss=0.9852 | val_mIoU=0.1888 | best_mIoU=0.2182
Epoch 08/25 | train_loss=0.7550 | val_loss=0.9449 | val_mIoU=0.2024 | best_mIoU=0.2182
Epoch 09/25 | train_loss=0.6417 | val_loss=1.0317 | val_mIoU=0.2429 | best_mIoU=0.2429
Epoch 10/25 | train_loss=0.7666 | val_loss=1.1892 | val_mIoU=0.2002 | best_mIoU=0.2429
Epoch 11/25 | train_loss=0.6201 | val_loss=0.9784 | val_mIoU=0.1742 | best_mIoU=0.2429
Epoch 12/25 | train_loss=0.4839 | val_loss=1.1393 | val_mIoU=0.2618 | best_mIoU=0.2618
Epoch 13/25 | train_loss=0.4982 | val_loss=

In [10]:
# ---------- 9) Visualization: save a few val preds (fixed loader) ----------
PALETTE = np.array([
    [0, 0, 0],       # 0 background
    [128, 0, 0],     # 1 wall
    [0, 128, 0],     # 2 floor
    [128, 128, 0],   # 3 ceiling
    [0, 0, 128],     # 4 window
    [128, 0, 128],   # 5 door
    [0, 128, 128],   # 6 seating
    [128, 128, 128], # 7 table
    [64, 0, 0],      # 8 cabinet
], dtype=np.uint8)

def colorize_mask(mask_np: np.ndarray) -> np.ndarray:
    return PALETTE[np.clip(mask_np, 0, len(PALETTE)-1)]

def denorm(x: torch.Tensor) -> torch.Tensor:
    mean = torch.tensor([0.485,0.456,0.406], device=x.device).view(-1,1,1)
    std  = torch.tensor([0.229,0.224,0.225], device=x.device).view(-1,1,1)
    return (x * std + mean).clamp(0,1)

@torch.no_grad()
def visualize_predictions(model_path=BEST_PATH, num_samples=3, out_dir=WORK / "vis"):
    out_dir.mkdir(parents=True, exist_ok=True)
    ckpt = torch.load(model_path, map_location=device)

    # Build a clean model skeleton WITHOUT pretrained weights & WITHOUT aux head
    # (prevents accidental downloads and avoids needing aux keys).
    try:
        vis_model = deeplabv3_resnet101(weights=None, weights_backbone=None, aux_loss=False).to(device)
    except TypeError:
        # Older torchvision: no weights_backbone kw
        vis_model = deeplabv3_resnet101(weights=None, aux_loss=False).to(device)

    # Match the trained classifier output channels
    vis_model.classifier[4] = nn.Conv2d(256, NUM_CLASSES, kernel_size=1).to(device)

    # Tolerate aux keys present in the checkpoint
    missing, unexpected = vis_model.load_state_dict(ckpt["model_state_dict"], strict=False)
    print(f"[load] missing={len(missing)} unexpected={len(unexpected)}")
    if unexpected:
        # Expect aux_classifier.* to show up here, which is fine
        print("  unexpected keys (ignored):", [k for k in unexpected if k.startswith('aux_classifier')][:4], "...")

    vis_model.eval()

    # Save a few visualizations from the val split
    for i in range(num_samples):
        idx = i if i < len(val_ds) else 0
        s = val_ds[idx]
        img_t = s["image"].unsqueeze(0).to(device)
        gt    = s["mask"].cpu().numpy().astype(np.uint8)

        pr    = vis_model(img_t)["out"].argmax(1)[0].cpu().numpy().astype(np.uint8)
        img_disp = TF.to_pil_image(denorm(s["image"]))

        fig, axes = plt.subplots(1,3,figsize=(12,4))
        axes[0].imshow(img_disp);            axes[0].set_title("Image");    axes[0].axis("off")
        axes[1].imshow(colorize_mask(gt));   axes[1].set_title("GT Mask");  axes[1].axis("off")
        axes[2].imshow(colorize_mask(pr));   axes[2].set_title("Pred Mask");axes[2].axis("off")
        plt.tight_layout()
        out_path = out_dir / f"val_vis_{i:02d}.png"
        plt.savefig(out_path, dpi=120); plt.close(fig)
        print(f"[vis] saved {out_path}")

visualize_predictions(num_samples=3)

[load] missing=0 unexpected=8
  unexpected keys (ignored): ['aux_classifier.0.weight', 'aux_classifier.1.weight', 'aux_classifier.1.bias', 'aux_classifier.1.running_mean'] ...
[vis] saved /kaggle/working/vis/val_vis_00.png
[vis] saved /kaggle/working/vis/val_vis_01.png
[vis] saved /kaggle/working/vis/val_vis_02.png


In [11]:
from pathlib import Path
import zipfile

BEST_PATH = Path("/kaggle/working/best_model.pth")
VIS_DIR   = Path("/kaggle/working/vis")
ZIP_PATH  = Path("/kaggle/working/deeplabv3_unified_best.zip")

with zipfile.ZipFile(ZIP_PATH, "w", zipfile.ZIP_DEFLATED) as z:
    z.write(BEST_PATH, arcname="best_model.pth")
    if VIS_DIR.exists():
        for p in sorted(VIS_DIR.glob("*.png")):
            z.write(p, arcname=f"vis/{p.name}")

from IPython.display import FileLink
FileLink(str(ZIP_PATH))  # click to download the zip

In [12]:
import os, json, shutil, subprocess
from pathlib import Path

EXPORT_DIR = Path("/kaggle/working/export_model")
EXPORT_DIR.mkdir(exist_ok=True)

shutil.copy("/kaggle/working/best_model.pth", EXPORT_DIR / "best_model.pth")
meta = {
  "title": "deeplabv3-unified-best",
  "id": "<your-kaggle-username>/deeplabv3-unified-best",
  "licenses": [{"name": "CC0-1.0"}]
}
with open(EXPORT_DIR / "dataset-metadata.json", "w") as f:
    json.dump(meta, f, indent=2)

!kaggle datasets create -p /kaggle/working/export_model -r zip

Traceback (most recent call last):
  File "/usr/local/bin/kaggle", line 4, in <module>
    from kaggle.cli import main
  File "/usr/local/lib/python3.11/dist-packages/kaggle/__init__.py", line 6, in <module>
    api.authenticate()
  File "/usr/local/lib/python3.11/dist-packages/kaggle/api/kaggle_api_extended.py", line 434, in authenticate
    raise IOError('Could not find {}. Make sure it\'s located in'
OSError: Could not find kaggle.json. Make sure it's located in /root/.config/kaggle. Or use the environment method. See setup instructions at https://github.com/Kaggle/kaggle-api/


In [13]:
Traceback (most recent call last):
  File "/usr/local/bin/kaggle", line 4, in <module>
    from kaggle.cli import main
  File "/usr/local/lib/python3.11/dist-packages/kaggle/__init__.py", line 6, in <module>
    api.authenticate()
  File "/usr/local/lib/python3.11/dist-packages/kaggle/api/kaggle_api_extended.py", line 434, in authenticate
    raise IOError('Could not find {}. Make sure it\'s located in'
OSError: Could not find kaggle.json. Make sure it's located in /root/.config/kaggle. Or use the environment method. See setup instructions at https://github.com/Kaggle/kaggle-api/

SyntaxError: unterminated string literal (detected at line 8) (1584782344.py, line 8)

In [14]:
from pathlib import Path
import zipfile
from IPython.display import FileLink

BEST_PATH = Path("/kaggle/working/best_model.pth")
VIS_DIR   = Path("/kaggle/working/vis")
ZIP_PATH  = Path("/kaggle/working/artifacts.zip")

with zipfile.ZipFile(ZIP_PATH, "w", zipfile.ZIP_DEFLATED) as z:
    z.write(BEST_PATH, arcname="best_model.pth")
    if VIS_DIR.exists():
        for p in sorted(VIS_DIR.glob("*.png")):
            z.write(p, arcname=f"vis/{p.name}")

print(f"Zipped -> {ZIP_PATH}")
display(FileLink(str(ZIP_PATH)))  # ← click to download the zip


Zipped -> /kaggle/working/artifacts.zip


In [15]:
from pathlib import Path
from IPython.display import FileLink, FileLinks

BEST_PATH = Path("/kaggle/working/best_model.pth")
VIS_DIR   = Path("/kaggle/working/vis")

# Model link
print(f"Model: {BEST_PATH}  |  size={BEST_PATH.stat().st_size/1e6:.1f} MB")
display(FileLink(str(BEST_PATH)))  # ← click to download

# All visualization PNGs as links
if VIS_DIR.exists():
    print(f"Visualizations in: {VIS_DIR}")
    display(FileLinks(str(VIS_DIR)))  # ← click any file to download
else:
    print("No /kaggle/working/vis directory found.")

Model: /kaggle/working/best_model.pth  |  size=244.6 MB


Visualizations in: /kaggle/working/vis


In [16]:
from pathlib import Path
from IPython.display import FileLink, FileLinks
import os

# Go to the directory Kaggle exposes as "Outputs"
os.chdir("/kaggle/working")

# Sanity check
for p in ["artifacts.zip", "best_model.pth", "vis"]:
    print(p, "->", Path(p).exists())

# Click these links to download
display(FileLink("artifacts.zip"))   # zip with model + images (if you created it)
display(FileLink("best_model.pth"))  # raw model, if you want just this

# List everything in the folder with clickable links
display(FileLinks("."))


artifacts.zip -> True
best_model.pth -> True
vis -> True


In [17]:
from pathlib import Path
import zipfile
from IPython.display import FileLink
import os

os.chdir("/kaggle/working")
BEST_PATH = Path("best_model.pth")
VIS_DIR   = Path("vis")
ZIP_PATH  = Path("artifacts.zip")

with zipfile.ZipFile(ZIP_PATH, "w", zipfile.ZIP_DEFLATED) as z:
    if BEST_PATH.exists():
        z.write(BEST_PATH, arcname="best_model.pth")
    if VIS_DIR.exists():
        for p in sorted(VIS_DIR.glob("*.png")):
            z.write(p, arcname=f"vis/{p.name}")

print("Zipped ->", ZIP_PATH.resolve())
display(FileLink("artifacts.zip"))  # click to download

Zipped -> /kaggle/working/artifacts.zip


In [20]:
import torch
import numpy as np

@torch.no_grad()
def evaluate_segmentation_metrics(
    model,
    loader,
    num_classes: int,
    device: torch.device,
    ignore_index: int | None = None,
    class_names: list[str] | None = None,
):
    """
    Computes Pixel Acc, Mean Class Acc, mIoU, FWIoU and per-class stats.
    """
    model.eval()
    hist = torch.zeros((num_classes, num_classes), device=device, dtype=torch.int64)

    total_labeled = 0
    total_correct = 0

    for images, masks, _ in loader:
        images = images.to(device, non_blocking=True)
        masks  = masks.long().to(device, non_blocking=True)   # (B,H,W)

        logits = model(images)["out"]                         # (B,C,H,W)
        preds  = torch.argmax(logits, dim=1)                  # (B,H,W)

        # Valid pixels (optionally drop ignore_index like 255)
        if ignore_index is not None:
            valid = masks != ignore_index
        else:
            valid = (masks >= 0) & (masks < num_classes)

        total_labeled += valid.sum().item()
        total_correct += (preds[valid] == masks[valid]).sum().item()

        # Update confusion matrix
        labels = num_classes * masks[valid] + preds[valid]
        hist += torch.bincount(labels, minlength=num_classes**2).reshape(num_classes, num_classes)

    # Derive metrics from confusion matrix
    diag   = hist.diag().float()          # TP per class
    gt_sum = hist.sum(1).float()          # GT pixels per class
    pr_sum = hist.sum(0).float()          # Predicted pixels per class
    union  = gt_sum + pr_sum - diag

    per_class_acc = torch.where(gt_sum > 0, diag / gt_sum, torch.zeros_like(diag))
    mean_class_acc = per_class_acc[gt_sum > 0].mean().item() if (gt_sum > 0).any() else 0.0

    per_class_iou = torch.where(union > 0, diag / union, torch.zeros_like(diag))
    miou = per_class_iou[union > 0].mean().item() if (union > 0).any() else 0.0

    pixel_acc = total_correct / max(total_labeled, 1)

    # Frequency Weighted IoU
    weights = gt_sum / max(gt_sum.sum().item(), 1.0)
    fw_iou = (weights * per_class_iou).sum().item()

    # Pretty print
    print(f"Pixel Accuracy        : {pixel_acc:.4f}")
    print(f"Mean Class Accuracy   : {mean_class_acc:.4f}")
    print(f"mIoU                  : {miou:.4f}")
    print(f"Frequency-Weighted IoU: {fw_iou:.4f}")

    if class_names:
        print("\nPer-class (Acc, IoU):")
        for i, name in enumerate(class_names):
            print(f"{i:2d} {name:>10s} | acc={per_class_acc[i].item():.3f}  iou={per_class_iou[i].item():.3f}")

    return {
        "pixel_acc": pixel_acc,
        "mean_class_acc": mean_class_acc,
        "miou": miou,
        "fw_iou": fw_iou,
        "per_class_acc": per_class_acc.detach().cpu().numpy(),
        "per_class_iou": per_class_iou.detach().cpu().numpy(),
        "confusion": hist.detach().cpu().numpy(),
    }

# ---- Run on validation set ----
metrics = evaluate_segmentation_metrics(
    model,            # your trained model (already on device)
    val_loader,       # your validation DataLoader
    NUM_CLASSES,      # 9
    device,
    ignore_index=None,            # or 255 if you used an ignore label
    class_names=UNIFIED_CLASSES,  # optional pretty names
)

Pixel Accuracy        : 0.9402
Mean Class Accuracy   : 0.3786
mIoU                  : 0.2726
Frequency-Weighted IoU: 0.9013

Per-class (Acc, IoU):
 0 background | acc=0.976  iou=0.941
 1       wall | acc=0.350  iou=0.153
 2      floor | acc=0.613  iou=0.301
 3    ceiling | acc=0.451  iou=0.376
 4     window | acc=0.320  iou=0.164
 5       door | acc=0.000  iou=0.000
 6    seating | acc=0.405  iou=0.299
 7      table | acc=0.290  iou=0.217
 8    cabinet | acc=0.002  iou=0.001


In [21]:
import numpy as np

# after the call you already ran:
# metrics = evaluate_segmentation_metrics(model, val_loader, NUM_CLASSES, device,
#                                         ignore_index=None, class_names=UNIFIED_CLASSES)

fg_idx = np.arange(1, NUM_CLASSES)             # classes 1..8
fg_acc  = metrics["per_class_acc"][fg_idx]
fg_iou  = metrics["per_class_iou"][fg_idx]

print(f"Foreground Mean Class Accuracy: {fg_acc.mean():.4f}")
print(f"Foreground mIoU:                {fg_iou.mean():.4f}")

Foreground Mean Class Accuracy: 0.3039
Foreground mIoU:                0.1890


In [22]:
import torch

@torch.no_grad()
def evaluate_on_nonempty(model, loader, num_classes, device):
    model.eval()
    total_correct = 0
    total_labeled = 0
    hist = torch.zeros((num_classes, num_classes), device=device, dtype=torch.int64)

    for images, masks, _ in loader:
        # keep only batches with at least one non-empty mask
        nz = (masks.view(masks.size(0), -1).max(dim=1).values > 0)
        if not nz.any():
            continue

        images = images[nz].to(device, non_blocking=True)
        masks  = masks[nz].long().to(device, non_blocking=True)

        logits = model(images)["out"]
        preds  = logits.argmax(1)
        valid  = (masks >= 0) & (masks < num_classes)

        total_labeled += valid.sum().item()
        total_correct += (preds[valid] == masks[valid]).sum().item()

        labels = num_classes * masks[valid] + preds[valid]
        hist += torch.bincount(labels, minlength=num_classes**2).reshape(num_classes, num_classes)

    diag   = hist.diag().float()
    gt_sum = hist.sum(1).float()
    pr_sum = hist.sum(0).float()
    union  = gt_sum + pr_sum - diag

    per_class_acc = torch.where(gt_sum > 0, diag / gt_sum, torch.zeros_like(diag))
    per_class_iou = torch.where(union > 0, diag / union, torch.zeros_like(diag))

    pixel_acc = total_correct / max(total_labeled, 1)
    miou = per_class_iou[union > 0].mean().item() if (union > 0).any() else 0.0

    return pixel_acc, miou, per_class_acc.cpu().numpy(), per_class_iou.cpu().numpy()

pix_acc_nz, miou_nz, acc_c_nz, iou_c_nz = evaluate_on_nonempty(model, val_loader, NUM_CLASSES, device)
print(f"Non-empty Pixel Acc: {pix_acc_nz:.4f} | Non-empty mIoU: {miou_nz:.4f}")

Non-empty Pixel Acc: 0.8042 | Non-empty mIoU: 0.2815
