In [None]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

In [1]:
import os

print(os.listdir("/kaggle/input"))


['crackforest-dataset-private']


In [4]:
print(os.listdir("/kaggle/input/crackforest-dataset-private"))


['README.md', 'groundTruth', '.git', 'seg', 'image']


In [6]:
import os

root = "/kaggle/input/crackforest-dataset-private"

print("Root content:", os.listdir(root))
print("Images:", len(os.listdir(os.path.join(root, "image"))))
print("GT mats:", len(os.listdir(os.path.join(root, "groundTruth"))))
print("SEG files:", len(os.listdir(os.path.join(root, "seg"))))


Root content: ['README.md', 'groundTruth', '.git', 'seg', 'image']
Images: 156
GT mats: 118
SEG files: 118


In [2]:
# CrackForest crack segmentation (Kaggle single-cell version)
# Pipeline:
# 1) Load CrackForest dataset from /kaggle/input
# 2) Convert .mat ground truth to binary PNG masks
# 3) Train a small U-Net with BCE (pos_weight) + Dice loss
# 4) Save best checkpoint by val IoU
# 5) Visualize a few validation predictions (image / GT / prob / overlay)

import os
import random
import time
import numpy as np

from PIL import Image
from scipy.io import loadmat

import torch
import torch.nn as nn
from torch.utils.data import Dataset, DataLoader

# Albumentations is optional, but Kaggle usually has it
try:
    import albumentations as A
    HAS_ALB = True
except Exception:
    HAS_ALB = False

# -----------------------------
# Basic config
# -----------------------------
SEED = 42
H, W = 320, 480        # CrackForest images are typically 320x480
BATCH = 6              # If you get OOM, set 4 or 2
EPOCHS = 20
LR = 1e-3
NUM_WORKERS = 2
THRESH = 0.35          # For visualization (lower -> more red pixels)

random.seed(SEED)
np.random.seed(SEED)
torch.manual_seed(SEED)
torch.cuda.manual_seed_all(SEED)

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print("Device:", device)

# -----------------------------
# Locate dataset
# -----------------------------
ROOT = "/kaggle/input/crackforest-dataset-private"
assert os.path.isdir(ROOT), f"Dataset folder not found: {ROOT}"

img_dir = os.path.join(ROOT, "image")
gt_dir  = os.path.join(ROOT, "groundTruth")

assert os.path.isdir(img_dir), f"Missing folder: {img_dir}"
assert os.path.isdir(gt_dir),  f"Missing folder: {gt_dir}"

print("CrackForest root:", ROOT)

# -----------------------------
# Convert .mat to binary mask PNG
# -----------------------------
mask_dir = "/kaggle/working/masks_png"
os.makedirs(mask_dir, exist_ok=True)

def mat_to_binary_mask(mat_path: str) -> np.ndarray | None:
    """
    CrackForest .mat has key: groundTruth (structured array)
    We read groundTruth['Segmentation'] which is a (H,W) uint8 array.
    In this dataset, seg values are usually {1,2}. We treat '2' as crack.
    Output is 0/255 uint8.
    """
    data = loadmat(mat_path)
    if "groundTruth" not in data:
        return None

    gt = data["groundTruth"]
    if gt.size == 0:
        return None

    try:
        seg = gt["Segmentation"][0, 0]
    except Exception:
        return None

    seg = np.squeeze(np.array(seg))
    if seg.ndim != 2:
        return None

    mask = (seg == 2).astype(np.uint8) * 255
    return mask

mats = sorted([f for f in os.listdir(gt_dir) if f.lower().endswith(".mat")])
converted = 0
for fn in mats:
    sid = os.path.splitext(fn)[0]
    out_path = os.path.join(mask_dir, f"{sid}.png")
    if os.path.exists(out_path):
        continue
    mask = mat_to_binary_mask(os.path.join(gt_dir, fn))
    if mask is None:
        continue
    Image.fromarray(mask).save(out_path)
    converted += 1

print("Masks ready in:", mask_dir, "| newly converted:", converted)

# -----------------------------
# Pair images with masks
# -----------------------------
img_files = sorted([f for f in os.listdir(img_dir) if f.lower().endswith((".jpg", ".jpeg", ".png"))])
img_ids = [os.path.splitext(f)[0] for f in img_files]
mask_ids = set(os.path.splitext(f)[0] for f in os.listdir(mask_dir) if f.lower().endswith(".png"))

pairs = []
missing = 0
for fname in img_files:
    sid = os.path.splitext(fname)[0]
    if sid in mask_ids:
        pairs.append((sid, os.path.join(img_dir, fname), os.path.join(mask_dir, f"{sid}.png")))
    else:
        missing += 1

print(f"Found images: {len(img_files)}, paired: {len(pairs)}, missing masks: {missing}")
assert len(pairs) > 0, "No paired samples found."

random.shuffle(pairs)
n_val = max(1, int(0.2 * len(pairs)))
val_pairs = pairs[:n_val]
train_pairs = pairs[n_val:]
print("Train:", len(train_pairs), "Val:", len(val_pairs))

# -----------------------------
# Augmentations (important: keep final size fixed)
# Avoid transforms that swap H/W (e.g., RandomRotate90)
# -----------------------------
if HAS_ALB:
    train_tf = A.Compose([
        A.RandomBrightnessContrast(p=0.3),
        A.GaussianBlur(blur_limit=(3, 5), p=0.15),
        A.GaussNoise(std_range=(0.02, 0.08), p=0.25),
        A.Affine(scale=(0.95, 1.05),
                 translate_percent=(0.0, 0.05),
                 rotate=(-10, 10),
                 shear=(-5, 5),
                 p=0.7),
        A.HorizontalFlip(p=0.5),
        A.Resize(H, W),
    ])
    val_tf = A.Compose([A.Resize(H, W)])
else:
    train_tf = None
    val_tf = None

# -----------------------------
# Dataset / Dataloader
# -----------------------------
class CrackDataset(Dataset):
    def __init__(self, pairs, tf=None):
        self.pairs = pairs
        self.tf = tf

    def __len__(self):
        return len(self.pairs)

    def __getitem__(self, idx):
        sid, img_path, mask_path = self.pairs[idx]

        img = np.array(Image.open(img_path).convert("RGB"))
        mask = np.array(Image.open(mask_path).convert("L"))
        mask = (mask > 127).astype(np.uint8)  # 0/1

        if self.tf is not None:
            aug = self.tf(image=img, mask=mask)
            img, mask = aug["image"], aug["mask"]

        # Safety: enforce fixed size even if no albumentations
        if img.shape[0] != H or img.shape[1] != W:
            img = np.array(Image.fromarray(img).resize((W, H), resample=Image.BILINEAR))
        if mask.shape[0] != H or mask.shape[1] != W:
            m = Image.fromarray((mask * 255).astype(np.uint8)).resize((W, H), resample=Image.NEAREST)
            mask = (np.array(m) > 127).astype(np.uint8)

        img_t = torch.from_numpy(img).permute(2, 0, 1).float() / 255.0
        mask_t = torch.from_numpy(mask).unsqueeze(0).float()
        return img_t, mask_t, sid

train_ds = CrackDataset(train_pairs, tf=train_tf)
val_ds   = CrackDataset(val_pairs,   tf=val_tf)

train_loader = DataLoader(
    train_ds, batch_size=BATCH, shuffle=True,
    num_workers=NUM_WORKERS, pin_memory=(device.type == "cuda"),
    drop_last=True
)
val_loader = DataLoader(
    val_ds, batch_size=BATCH, shuffle=False,
    num_workers=NUM_WORKERS, pin_memory=(device.type == "cuda"),
    drop_last=False
)

# Quick sanity check (shape must be consistent)
xb, yb, sids = next(iter(train_loader))
print("Batch imgs:", xb.shape, "Batch masks:", yb.shape, "mask unique:", torch.unique(yb))

# -----------------------------
# Estimate pos_weight for BCE (cracks are sparse)
# -----------------------------
def estimate_pos_weight(pairs, sample_k=40):
    sample = random.sample(pairs, k=min(sample_k, len(pairs)))
    pos, neg = 0, 0
    for sid, _, mpath in sample:
        m = np.array(Image.open(mpath).convert("L")) > 127
        pos += int(m.sum())
        neg += int(m.size - m.sum())
    pos = max(pos, 1)
    # Clip to avoid extreme values
    return float(np.clip(neg / pos, 1.0, 50.0))

pos_w = estimate_pos_weight(train_pairs)
print("Estimated pos_weight:", round(pos_w, 2))

# -----------------------------
# U-Net model
# -----------------------------
class DoubleConv(nn.Module):
    def __init__(self, in_ch, out_ch):
        super().__init__()
        self.net = nn.Sequential(
            nn.Conv2d(in_ch, out_ch, 3, padding=1, bias=False),
            nn.BatchNorm2d(out_ch),
            nn.ReLU(inplace=True),
            nn.Conv2d(out_ch, out_ch, 3, padding=1, bias=False),
            nn.BatchNorm2d(out_ch),
            nn.ReLU(inplace=True),
        )
    def forward(self, x):
        return self.net(x)

class UNet(nn.Module):
    def __init__(self, in_ch=3, out_ch=1, base=32):
        super().__init__()
        self.enc1 = DoubleConv(in_ch, base)
        self.pool1 = nn.MaxPool2d(2)
        self.enc2 = DoubleConv(base, base * 2)
        self.pool2 = nn.MaxPool2d(2)
        self.enc3 = DoubleConv(base * 2, base * 4)
        self.pool3 = nn.MaxPool2d(2)
        self.enc4 = DoubleConv(base * 4, base * 8)
        self.pool4 = nn.MaxPool2d(2)

        self.mid = DoubleConv(base * 8, base * 16)

        self.up4 = nn.ConvTranspose2d(base * 16, base * 8, 2, stride=2)
        self.dec4 = DoubleConv(base * 16, base * 8)
        self.up3 = nn.ConvTranspose2d(base * 8, base * 4, 2, stride=2)
        self.dec3 = DoubleConv(base * 8, base * 4)
        self.up2 = nn.ConvTranspose2d(base * 4, base * 2, 2, stride=2)
        self.dec2 = DoubleConv(base * 4, base * 2)
        self.up1 = nn.ConvTranspose2d(base * 2, base, 2, stride=2)
        self.dec1 = DoubleConv(base * 2, base)

        self.out = nn.Conv2d(base, out_ch, 1)

    def forward(self, x):
        e1 = self.enc1(x)
        e2 = self.enc2(self.pool1(e1))
        e3 = self.enc3(self.pool2(e2))
        e4 = self.enc4(self.pool3(e3))
        m  = self.mid(self.pool4(e4))

        d4 = self.up4(m)
        d4 = self.dec4(torch.cat([d4, e4], dim=1))
        d3 = self.up3(d4)
        d3 = self.dec3(torch.cat([d3, e3], dim=1))
        d2 = self.up2(d3)
        d2 = self.dec2(torch.cat([d2, e2], dim=1))
        d1 = self.up1(d2)
        d1 = self.dec1(torch.cat([d1, e1], dim=1))
        return self.out(d1)  # logits

model = UNet(base=32).to(device)

# -----------------------------
# Loss + metrics
# -----------------------------
bce = nn.BCEWithLogitsLoss(pos_weight=torch.tensor([pos_w], device=device))

def dice_loss_from_logits(logits, targets, eps=1e-6):
    probs = torch.sigmoid(logits)
    inter = (probs * targets).sum(dim=(2, 3))
    den = (probs + targets).sum(dim=(2, 3)) + eps
    dice = (2 * inter + eps) / den
    return 1 - dice.mean()

def total_loss(logits, targets):
    return bce(logits, targets) + dice_loss_from_logits(logits, targets)

@torch.no_grad()
def compute_iou_dice(logits, targets, thr=0.5, eps=1e-6):
    probs = torch.sigmoid(logits)
    preds = (probs > thr).float()
    inter = (preds * targets).sum(dim=(2, 3))
    union = (preds + targets - preds * targets).sum(dim=(2, 3)) + eps
    iou = (inter / union).mean().item()

    den = (preds + targets).sum(dim=(2, 3)) + eps
    dice = (2 * inter / den).mean().item()
    return iou, dice

# -----------------------------
# Train loop (with AMP on CUDA)
# -----------------------------
optimizer = torch.optim.AdamW(model.parameters(), lr=LR, weight_decay=1e-4)
scaler = torch.amp.GradScaler('cuda', enabled=(device.type == "cuda"))

@torch.no_grad()
def evaluate():
    model.eval()
    losses, ious, dices = [], [], []
    for x, y, _ in val_loader:
        x = x.to(device, non_blocking=True)
        y = y.to(device, non_blocking=True)
        logits = model(x)
        loss = total_loss(logits, y).item()
        iou, dice = compute_iou_dice(logits, y, thr=0.5)
        losses.append(loss)
        ious.append(iou)
        dices.append(dice)
    return float(np.mean(losses)), float(np.mean(ious)), float(np.mean(dices))

def train_one_epoch():
    model.train()
    losses = []
    for x, y, _ in train_loader:
        x = x.to(device, non_blocking=True)
        y = y.to(device, non_blocking=True)

        optimizer.zero_grad(set_to_none=True)
        with torch.autocast(device_type='cuda', enabled=(device.type == "cuda")):
            logits = model(x)
            loss = total_loss(logits, y)

        scaler.scale(loss).backward()
        scaler.step(optimizer)
        scaler.update()

        losses.append(loss.item())
    return float(np.mean(losses))

best_iou = -1.0
save_path = "/kaggle/working/best_unet_crackforest.pt"

start = time.time()
for epoch in range(1, EPOCHS + 1):
    tr_loss = train_one_epoch()
    va_loss, va_iou, va_dice = evaluate()

    if va_iou > best_iou:
        best_iou = va_iou
        torch.save(model.state_dict(), save_path)

    print(f"Epoch {epoch:02d}/{EPOCHS} | "
          f"train_loss={tr_loss:.4f} | val_loss={va_loss:.4f} | "
          f"val_IoU={va_iou:.4f} | val_Dice={va_dice:.4f} | best_IoU={best_iou:.4f}")

print("Done. Best checkpoint saved to:", save_path)
print("Total time (min):", (time.time() - start) / 60)

# -----------------------------
# Visualization (save to /kaggle/working for download)
# -----------------------------
import matplotlib.pyplot as plt

@torch.no_grad()
def save_examples(num=5, thr=THRESH):
    model.eval()
    model.load_state_dict(torch.load(save_path, map_location=device))

    # Take a few samples from val set (deterministic enough)
    pick = list(range(min(num, len(val_ds))))

    for i in pick:
        x, y, sid = val_ds[i]
        x_in = x.unsqueeze(0).to(device)
        logits = model(x_in)[0, 0]
        prob = torch.sigmoid(logits).detach().cpu().numpy()

        img = (x.permute(1, 2, 0).numpy() * 255).astype(np.uint8)
        gt = y[0].numpy()

        pred = (prob > thr).astype(np.uint8)

        overlay = img.copy()
        overlay[pred == 1] = (overlay[pred == 1] * 0.4 + np.array([255, 0, 0]) * 0.6).astype(np.uint8)

        # Save a 1x4 panel image
        fig = plt.figure(figsize=(14, 4))
        ax1 = fig.add_subplot(1, 4, 1); ax1.imshow(img); ax1.set_title(f"Image {sid}"); ax1.axis("off")
        ax2 = fig.add_subplot(1, 4, 2); ax2.imshow(gt, cmap="gray"); ax2.set_title("GT"); ax2.axis("off")
        ax3 = fig.add_subplot(1, 4, 3); ax3.imshow(prob, cmap="magma"); ax3.set_title("Prob"); ax3.axis("off")
        ax4 = fig.add_subplot(1, 4, 4); ax4.imshow(overlay); ax4.set_title(f"Overlay (thr={thr})"); ax4.axis("off")
        fig.tight_layout()

        out_path = f"/kaggle/working/example_{sid}.png"
        fig.savefig(out_path, dpi=150)
        plt.close(fig)

        print("Saved:", out_path)

save_examples(num=5, thr=THRESH)


Device: cuda
CrackForest root: /kaggle/input/crackforest-dataset-private
Masks ready in: /kaggle/working/masks_png | newly converted: 0
Found images: 155, paired: 118, missing masks: 37
Train: 95 Val: 23
Batch imgs: torch.Size([6, 3, 320, 480]) Batch masks: torch.Size([6, 1, 320, 480]) mask unique: tensor([0., 1.])
Estimated pos_weight: 50.0
Epoch 01/20 | train_loss=1.8543 | val_loss=2.2781 | val_IoU=0.0000 | val_Dice=0.0000 | best_IoU=0.0000
Epoch 02/20 | train_loss=1.5905 | val_loss=1.7550 | val_IoU=0.3327 | val_Dice=0.4801 | best_IoU=0.3327
Epoch 03/20 | train_loss=1.4941 | val_loss=1.6256 | val_IoU=0.2485 | val_Dice=0.3889 | best_IoU=0.3327
Epoch 04/20 | train_loss=1.4251 | val_loss=1.3788 | val_IoU=0.2268 | val_Dice=0.3576 | best_IoU=0.3327
Epoch 05/20 | train_loss=1.3932 | val_loss=1.3848 | val_IoU=0.3003 | val_Dice=0.4503 | best_IoU=0.3327
Epoch 06/20 | train_loss=1.3574 | val_loss=1.3747 | val_IoU=0.3257 | val_Dice=0.4810 | best_IoU=0.3327
Epoch 07/20 | train_loss=1.2685 | val_