In [1]:
# -*- coding: utf-8 -*-
"""
DPT (Dense Prediction Transformer) - Full runnable code (NO cv2)
Using segmentation-models-pytorch (smp) built-in DPT.

Why this works:
- DPT uses a ViT-like backbone, usually requires fixed input size (e.g., 224x224).
- We follow the same safe pipeline as before:
    (160, w) -> pad to (160,272) -> resize to (224,224) -> model
    model output (224,224) -> resize back to (160,272) -> crop to raw_w
- submission.csv must be 160*272 flattened with -1 padding

Data:
- Train images: Desktop/deep_datachallenge/X_train_uDRk9z9/images (well1-6)
- Train labels: Desktop/deep_datachallenge/Y_train_T9NrBYo.csv
- Test images:  Desktop/deep_datachallenge/X_test_xNbnvIa/images (well7-11)

Split:
- Train: well1-5
- Val:   well6
"""

import re
from pathlib import Path

import numpy as np
import pandas as pd

import torch
import torch.nn.functional as F
from torch.utils.data import Dataset, DataLoader, Subset

import segmentation_models_pytorch as smp


# =========================
# 0) Config
# =========================
DATA_ROOT = Path(r"C:\Users\lenovo\Desktop\deep_datachallenge")

TRAIN_IMG_DIR = DATA_ROOT / "X_train_uDRk9z9" / "images"
TEST_IMG_DIR  = DATA_ROOT / "X_test_xNbnvIa" / "images"
Y_TRAIN_CSV   = DATA_ROOT / "Y_train_T9NrBYo.csv"

# submission resolution (fixed by challenge)
H_SUB, W_SUB = 160, 272

# model input resolution (ViT/DPT often expects fixed size)
H_MODEL, W_MODEL = 224, 224

NUM_CLASSES = 3
IGNORE_INDEX = -1

BATCH_SIZE = 4          # 4060(8GB): start with 2~4
LR = 1e-4
WEIGHT_DECAY = 1e-4
EPOCHS = 20

DEVICE = "cuda" if torch.cuda.is_available() else "cpu"


# =========================
# 1) Utils
# =========================
def parse_well_id(name: str) -> int:
    """Extract well id from 'well_6_section_...' -> 6"""
    m = re.search(r"well_(\d+)_", name)
    return int(m.group(1)) if m else -1


def minmax_norm(x: np.ndarray) -> np.ndarray:
    """Normalize to [0,1], handle NaN/Inf."""
    x = x.astype(np.float32)
    x = np.nan_to_num(x, nan=0.0, posinf=0.0, neginf=0.0)
    mn, mx = float(x.min()), float(x.max())
    if mx - mn < 1e-6:
        return np.zeros_like(x, dtype=np.float32)
    return (x - mn) / (mx - mn)


def pad_to_160x272(img: np.ndarray) -> np.ndarray:
    """(160,w)->(160,272), pad right with 0."""
    h, w = img.shape
    assert h == H_SUB, f"Expected height {H_SUB}, got {h}"
    out = np.zeros((H_SUB, W_SUB), dtype=img.dtype)
    out[:, :min(w, W_SUB)] = img[:, :min(w, W_SUB)]
    return out


def decode_mask_from_csv_row(row: np.ndarray) -> np.ndarray:
    """
    CSV row is flattened mask with -1 padding.
    Remove -1 then reshape to (160, w).
    """
    valid = row[row != IGNORE_INDEX]
    assert len(valid) % H_SUB == 0, f"Valid mask length {len(valid)} not divisible by {H_SUB}"
    w = len(valid) // H_SUB
    return valid.reshape(H_SUB, w).astype(np.int64)


def pad_mask_to_160x272(mask: np.ndarray) -> np.ndarray:
    """(160,w)->(160,272) with -1 padding on the right."""
    h, w = mask.shape
    assert h == H_SUB
    out = np.full((H_SUB, W_SUB), IGNORE_INDEX, dtype=np.int64)
    out[:, :min(w, W_SUB)] = mask[:, :min(w, W_SUB)]
    return out


def resize_img_np_to_model(img_160x272: np.ndarray) -> np.ndarray:
    """(160,272)->(224,224), bilinear"""
    x = torch.from_numpy(img_160x272).unsqueeze(0).unsqueeze(0).float()  # (1,1,160,272)
    x = F.interpolate(x, size=(H_MODEL, W_MODEL), mode="bilinear", align_corners=False)
    return x.squeeze(0).squeeze(0).numpy()


def resize_mask_np_to_model(mask_160x272: np.ndarray) -> np.ndarray:
    """(160,272)->(224,224), nearest (for labels)"""
    x = torch.from_numpy(mask_160x272).unsqueeze(0).unsqueeze(0).float()  # (1,1,160,272)
    x = F.interpolate(x, size=(H_MODEL, W_MODEL), mode="nearest")
    return x.squeeze(0).squeeze(0).long().numpy()


def resize_pred_np_to_sub(pred_224x224: np.ndarray) -> np.ndarray:
    """(224,224)->(160,272), nearest"""
    x = torch.from_numpy(pred_224x224).unsqueeze(0).unsqueeze(0).float()
    x = F.interpolate(x, size=(H_SUB, W_SUB), mode="nearest")
    return x.squeeze(0).squeeze(0).long().numpy()


# =========================
# 2) Dataset
# =========================
class WellSegDataset(Dataset):
    def __init__(self, images_dir: Path, y_csv_path: Path = None):
        self.images_dir = images_dir
        self.paths = sorted(images_dir.glob("*.npy"))
        self.names = [p.stem for p in self.paths]

        self.has_label = y_csv_path is not None
        self.y_df = pd.read_csv(y_csv_path, index_col=0) if self.has_label else None

    def __len__(self):
        return len(self.paths)

    def __getitem__(self, idx: int):
        name = self.names[idx]
        img = np.load(self.paths[idx])      # (160,160) or (160,272)
        raw_w = int(img.shape[1])

        img = minmax_norm(img)
        img_160x272 = pad_to_160x272(img)
        img_224 = resize_img_np_to_model(img_160x272)
        x = torch.from_numpy(img_224).unsqueeze(0).float()  # (1,224,224)

        if not self.has_label:
            return {"name": name, "image": x, "raw_w": raw_w}

        row = self.y_df.loc[name].values.astype(np.int64)
        mask = decode_mask_from_csv_row(row)     # (160,w)
        mask_160x272 = pad_mask_to_160x272(mask) # (160,272)
        mask_224 = resize_mask_np_to_model(mask_160x272)    # (224,224)
        y = torch.from_numpy(mask_224).long()

        return {"name": name, "image": x, "mask": y, "raw_w": raw_w}


# =========================
# 3) Model: DPT (smp)
# =========================
def build_dpt(num_classes: int) -> torch.nn.Module:
    """
    SMP has DPT model. You choose a ViT / transformer encoder from timm.

    Common safe encoders (usually available):
    - "tu-vit_base_patch16_224"
    - "vit_base_patch16_224"
    If pretrained weights download fails, set encoder_weights=None.
    """
    encoder_name = "tu-vit_base_patch16_224"   # you can also try "vit_base_patch16_224"
    encoder_weights = "imagenet"              # if this fails, set to None

    model = smp.DPT(
        encoder_name=encoder_name,
        encoder_weights=encoder_weights,
        in_channels=1,
        classes=num_classes,
        activation=None,
        # If you want to try variable sizes (if supported by encoder):
        # dynamic_img_size=True,
    )
    return model


# =========================
# 4) Train / Val
# =========================
def train_one_epoch(model, loader, optimizer):
    model.train()
    total = 0.0

    for b in loader:
        x = b["image"].to(DEVICE)   # (B,1,224,224)
        y = b["mask"].to(DEVICE)    # (B,224,224)

        logits = model(x)           # (B,C,224,224)
        loss = F.cross_entropy(logits, y, ignore_index=IGNORE_INDEX)

        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        total += float(loss.item()) * x.size(0)

    return total / len(loader.dataset)


@torch.no_grad()
def eval_one_epoch(model, loader):
    model.eval()
    total = 0.0

    for b in loader:
        x = b["image"].to(DEVICE)
        y = b["mask"].to(DEVICE)
        logits = model(x)
        loss = F.cross_entropy(logits, y, ignore_index=IGNORE_INDEX)
        total += float(loss.item()) * x.size(0)

    return total / len(loader.dataset)


# =========================
# 5) Inference & submission
# =========================
@torch.no_grad()
def predict_and_make_submission(model, test_images_dir: Path, out_csv: Path):
    model.eval()

    test_ds = WellSegDataset(test_images_dir, y_csv_path=None)
    test_loader = DataLoader(test_ds, batch_size=1, shuffle=False, num_workers=0)

    preds = {}

    for b in test_loader:
        name = b["name"][0]
        raw_w = int(b["raw_w"][0])
        x = b["image"].to(DEVICE)   # (1,1,224,224)

        logits = model(x)           # (1,C,224,224)
        pred224 = torch.argmax(logits, dim=1).squeeze(0).cpu().numpy().astype(np.int64)  # (224,224)

        pred160x272 = resize_pred_np_to_sub(pred224)      # (160,272)
        pred160 = pred160x272[:, :raw_w]                  # (160,raw_w)

        flat = np.full((H_SUB * W_SUB,), IGNORE_INDEX, dtype=np.int64)
        flat[: H_SUB * raw_w] = pred160.flatten()
        preds[name] = flat

    pd.DataFrame(preds, dtype="int64").T.to_csv(out_csv)
    print(f"[OK] submission saved to: {out_csv}")


# =========================
# 6) Main
# =========================
def main():
    print("DEVICE:", DEVICE)
    print("Train:", TRAIN_IMG_DIR)
    print("Test :", TEST_IMG_DIR)

    ds_all = WellSegDataset(TRAIN_IMG_DIR, Y_TRAIN_CSV)

    train_idx, val_idx = [], []
    for i, n in enumerate(ds_all.names):
        if parse_well_id(n) == 6:
            val_idx.append(i)
        else:
            train_idx.append(i)

    train_ds = Subset(ds_all, train_idx)  # well1-5
    val_ds   = Subset(ds_all, val_idx)    # well6

    train_loader = DataLoader(train_ds, batch_size=BATCH_SIZE, shuffle=True, num_workers=0)
    val_loader   = DataLoader(val_ds, batch_size=BATCH_SIZE, shuffle=False, num_workers=0)

    print(f"Train samples: {len(train_ds)} | Val samples: {len(val_ds)}")

    model = build_dpt(NUM_CLASSES).to(DEVICE)
    optimizer = torch.optim.AdamW(model.parameters(), lr=LR, weight_decay=WEIGHT_DECAY)

    best_val = 1e9
    best_path = DATA_ROOT / "best_dpt.pth"

    for epoch in range(1, EPOCHS + 1):
        tr = train_one_epoch(model, train_loader, optimizer)
        va = eval_one_epoch(model, val_loader)
        print(f"Epoch {epoch:02d}/{EPOCHS} | train_loss={tr:.4f} | val_loss={va:.4f}")

        if va < best_val:
            best_val = va
            torch.save(model.state_dict(), best_path)
            print("  -> best saved:", best_path)

    # test inference
    model.load_state_dict(torch.load(best_path, map_location=DEVICE, weights_only=True))
    out_csv = DATA_ROOT / "submission.csv"
    predict_and_make_submission(model, TEST_IMG_DIR, out_csv)


if __name__ == "__main__":
    main()


  from .autonotebook import tqdm as notebook_tqdm


DEVICE: cuda
Train: C:\Users\lenovo\Desktop\deep_datachallenge\X_train_uDRk9z9\images
Test : C:\Users\lenovo\Desktop\deep_datachallenge\X_test_xNbnvIa\images
Train samples: 2790 | Val samples: 1620


To support symlinks on Windows, you either need to activate Developer Mode or to run Python as an administrator. In order to activate developer mode, see this article: https://docs.microsoft.com/en-us/windows/apps/get-started/enable-your-device-for-development


Epoch 01/20 | train_loss=0.1310 | val_loss=0.0810
  -> best saved: C:\Users\lenovo\Desktop\deep_datachallenge\best_dpt.pth
Epoch 02/20 | train_loss=0.0776 | val_loss=0.0825
Epoch 03/20 | train_loss=0.0669 | val_loss=0.0937
Epoch 04/20 | train_loss=0.0643 | val_loss=0.0849
Epoch 05/20 | train_loss=0.0606 | val_loss=0.1009
Epoch 06/20 | train_loss=0.0598 | val_loss=0.0824
Epoch 07/20 | train_loss=0.0532 | val_loss=0.0823
Epoch 08/20 | train_loss=0.0509 | val_loss=0.0885
Epoch 09/20 | train_loss=0.0520 | val_loss=0.1033
Epoch 10/20 | train_loss=0.0472 | val_loss=0.0876
Epoch 11/20 | train_loss=0.0444 | val_loss=0.0891
Epoch 12/20 | train_loss=0.0443 | val_loss=0.0897
Epoch 13/20 | train_loss=0.0406 | val_loss=0.0911
Epoch 14/20 | train_loss=0.0420 | val_loss=0.0902
Epoch 15/20 | train_loss=0.0395 | val_loss=0.0965
Epoch 16/20 | train_loss=0.0346 | val_loss=0.0980
Epoch 17/20 | train_loss=0.0315 | val_loss=0.1069
Epoch 18/20 | train_loss=0.0291 | val_loss=0.1004
Epoch 19/20 | train_loss=0.