# This notebook is for what we used in Kaggle to replicate the repo and run training 

In [None]:
import torch
print("CUDA available:", torch.cuda.is_available())
if torch.cuda.is_available():
    print("GPU:", torch.cuda.get_device_name(0))

## Cloning repo from GitHub into Kaggle/working

In [None]:
!git clone https://github.com/jonny0349/kaggle-sci-image-forgery-seg.git /kaggle/working/kaggle-sci-image-forgery-seg

## Installing ONLY non-torch deps to avoid breaking CUDA torch

This prevents the "CUDA available False" + torch wheel mess we were having before.


In [None]:
!pip install -U --no-cache-dir \
    segmentation-models-pytorch \
    albumentations \
    timm \
    ruamel.yaml

## Create Kaggle config from baseline and patch paths + GPU settings

In [None]:
from ruamel.yaml import YAML
from pathlib import Path

REPO = Path("/kaggle/working/kaggle-sci-image-forgery-seg")
BASE = REPO / "configs" / "baseline.yaml"
KCFG = REPO / "configs" / "kaggle.yaml"

DATASET_ROOT = "/kaggle/input/recodai-luc-scientific-image-forgery-detection"

#Copy baseline -> Kaggle
KCFG.write_text(BASE.read_text())

yaml = YAML()
with KCFG.open("r") as f:
    cfg = yaml.load(f)

#Core kaggle changes
cfg["project"]["device"] = "cuda"
cfg["data"]["root"] = DATASET_ROOT

#Outputs must be writable on Kaggle
if "outputs" in cfg and isinstance(cfg["outputs"], dict) and "root" in cfg["outputs"]:
    cfg["outputs"]["root"] = "/kaggle/working/outputs"

#Reasonable first real run on a T4
cfg["train"]["epochs"] = 10
cfg["train"]["batch_size"] = 8
cfg["train"]["num_workers"] = 2
cfg["val"]["interval_epochs"] = 1

with KCFG.open("w") as f:
    yaml.dump(cfg, f)

print("Wrote:", KCFG)
print("data.root =", cfg["data"]["root"])

## Patch train.py to avoid TensorBoard crash

In [None]:
from pathlib import Path
import re

p = Path("/kaggle/working/kaggle-sci-image-forgery-seg/src/train.py")
s = p.read_text()

# 1) Ensure the SummaryWriter import is wrapped safely
s = re.sub(
    r"^\s*from torch\.utils\.tensorboard import SummaryWriter.*$",
    "try:\n    from torch.utils.tensorboard import SummaryWriter  # type: ignore\nexcept Exception:\n    SummaryWriter = None",
    s,
    flags=re.MULTILINE
)

# 2) Replace writer creation with a safe conditional
# Common pattern in your file: writer = SummaryWriter(log_dir=paths["logs"])
s = s.replace(
    'writer = SummaryWriter(log_dir=paths["logs"])',
    'writer = SummaryWriter(log_dir=paths["logs"]) if SummaryWriter is not None else None'
)

# 3) Guard add_scalar calls (only if not already guarded)
# This will wrap any bare "writer.add_scalar(" lines.
lines = s.splitlines()
out = []
for line in lines:
    if "writer.add_scalar(" in line and "if writer is not None" not in line:
        indent = re.match(r"^(\s*)", line).group(1)
        out.append(f"{indent}if writer is not None:")
        out.append(f"{indent}    {line.strip()}")
    else:
        out.append(line)
s2 = "\n".join(out)

p.write_text(s2)
print("Patched train.py: SummaryWriter now truly optional")

In [None]:
from pathlib import Path
import re

p = Path("/kaggle/working/kaggle-sci-image-forgery-seg/src/train.py")
s = p.read_text()

# 1) Remove/neutralize the TensorBoard import (no try/except at all)
s = re.sub(
    r"^\s*from torch\.utils\.tensorboard import SummaryWriter.*$",
    "SummaryWriter = None  # TensorBoard disabled on Kaggle (protobuf/tensorboard mismatch)",
    s,
    flags=re.MULTILINE
)

# 2) Force writer to None (handles the common exact line)
s = s.replace(
    'writer = SummaryWriter(log_dir=paths["logs"])',
    'writer = None  # TensorBoard disabled on Kaggle'
)

# 3) Comment out any writer.add_scalar lines
s = re.sub(
    r"^(\s*)writer\.add_scalar\(",
    r"\1# writer.add_scalar(",
    s,
    flags=re.MULTILINE
)

p.write_text(s)
print("TensorBoard disabled safely in train.py")

Patching YAML again to point to the right folders on the input data set. We were pointing to train_images: train/images but that folder does not exist. So we point to the right folder train_images/

In [None]:
from ruamel.yaml import YAML
from pathlib import Path

cfg_path = Path("/kaggle/working/kaggle-sci-image-forgery-seg/configs/kaggle.yaml")
yaml = YAML()
with cfg_path.open("r") as f:
    cfg = yaml.load(f)

# Helper: set key if it exists
def set_if_exists(d, key, val):
    if isinstance(d, dict) and key in d:
        d[key] = val

# Common conventions across configs
set_if_exists(cfg.get("data", {}), "train_images", "train_images")
set_if_exists(cfg.get("data", {}), "train_masks", "train_masks")
set_if_exists(cfg.get("data", {}), "val_images", "train_images")   # temporary: reuse train as val if no val split
set_if_exists(cfg.get("data", {}), "val_masks", "train_masks")

set_if_exists(cfg.get("train", {}), "images_dir", "train_images")
set_if_exists(cfg.get("train", {}), "masks_dir", "train_masks")
set_if_exists(cfg.get("val", {}), "images_dir", "train_images")    # temporary
set_if_exists(cfg.get("val", {}), "masks_dir", "train_masks")

with cfg_path.open("w") as f:
    yaml.dump(cfg, f)

print("Patched kaggle.yaml to use train_images/train_masks")

Its still pointing to the wrong path, so we are patching data.py to see if that fixes the issue

In [None]:
from pathlib import Path
import re

path = Path("/kaggle/working/kaggle-sci-image-forgery-seg/src/data.py")
txt = path.read_text()

# Inject a small resolver helper after imports (only once)
if "_resolve_kaggle_dir" not in txt:
    insert_after = "import os"
    resolver = """
def _resolve_kaggle_dir(p: str) -> str:
    \"\"\"Return an existing directory path by trying Kaggle-specific fallbacks.
    This keeps local configs working while adapting to Kaggle datasets that use
    train_images/train_masks instead of train/images, etc.
    \"\"\"
    if p is None:
        return p
    p = str(p)

    # If it already exists, we're done
    if os.path.isdir(p):
        return p

    # Common Kaggle layout: train_images / train_masks at dataset root
    candidates = [
        p.replace("/train/images", "/train_images"),
        p.replace("/train/masks",  "/train_masks"),
        p.replace("/val/images",   "/train_images"),
        p.replace("/val/masks",    "/train_masks"),
        p.replace("train/images",  "train_images"),
        p.replace("train/masks",   "train_masks"),
        p.replace("val/images",    "train_images"),
        p.replace("val/masks",     "train_masks"),
    ]

    for c in candidates:
        if c != p and os.path.isdir(c):
            return c

    # last resort: try sibling folders under the same dataset root
    root = p
    for _ in range(4):
        root = os.path.dirname(root)
    for c in [os.path.join(root, "train_images"),
              os.path.join(root, "train_masks"),
              os.path.join(root, "test_images")]:
        if os.path.isdir(c):
            # if asked for images, return train_images; if asked for masks, train_masks
            if p.endswith("images") or "images" in p:
                return os.path.join(root, "train_images") if os.path.isdir(os.path.join(root, "train_images")) else p
            if p.endswith("masks") or "masks" in p:
                return os.path.join(root, "train_masks") if os.path.isdir(os.path.join(root, "train_masks")) else p

    return p
"""
    txt = txt.replace(insert_after, insert_after + resolver)

# Now ensure ImageMaskDataset.__init__ resolves dirs before collecting ids
# We look for the start of __init__ and inject two lines after images_dir/masks_dir assignment.
if "self.images_dir = _resolve_kaggle_dir(self.images_dir)" not in txt:
    # Insert after the first time self.images_dir and self.masks_dir exist in __init__
    txt = re.sub(
        r"(self\.images_dir\s*=\s*.*\n\s*self\.masks_dir\s*=\s*.*\n)",
        r"\1        self.images_dir = _resolve_kaggle_dir(self.images_dir)\n        self.masks_dir  = _resolve_kaggle_dir(self.masks_dir)\n",
        txt,
        count=1
    )

path.write_text(txt)
print("Patched data.py with Kaggle folder fallback resolver")

That fixed it but we found another issue. Currently, our collect_ids cannot resolve paths for the image pairs if they are inside a different folder, which they currently are inside the recod.ai/LUC paths (train_images/authentic, train_images/forged). Since the algorithm is incapable of searching inside these folders, we get an error saying that we didn't find any pairs when trying to train. Due to this, we will force data.py to safely find pairs inside other folders.

In [None]:
from pathlib import Path

p = Path("/kaggle/working/kaggle-sci-image-forgery-seg/src/data.py")
lines = p.read_text().splitlines()

# Find start of _collect_ids
start = None
for i, line in enumerate(lines):
    if line.lstrip().startswith("def _collect_ids("):
        start = i
        break
if start is None:
    raise RuntimeError("Could not find def _collect_ids(")

# Find end by indentation
base_indent = len(lines[start]) - len(lines[start].lstrip())
end = None
for j in range(start + 1, len(lines)):
    l = lines[j]
    if not l.strip():
        continue
    indent_len = len(l) - len(l.lstrip())
    if indent_len <= base_indent and (l.strip().startswith("def ") or l.lstrip().startswith("class ")):
        end = j
        break
if end is None:
    end = len(lines)

indent = " " * base_indent

new_fn = [
f"{indent}def _collect_ids(self) -> list[str]:",
f"{indent}    \"\"\"Collect matching (image, mask) pairs.",
f"{indent}",
f"{indent}    Supports Kaggle layout where images are nested:",
f"{indent}        train_images/authentic/<stem>.<ext>",
f"{indent}        train_images/forged/<stem>.<ext>",
f"{indent}    while masks are flat:",
f"{indent}        train_masks/<stem>.npy",
f"{indent}    \"\"\"",
f"{indent}    import os",
f"{indent}    from glob import glob",
f"{indent}",
f"{indent}    img_dir = self.images_dir",
f"{indent}    msk_dir = self.masks_dir",
f"{indent}",
f"{indent}    if not os.path.isdir(img_dir):",
f"{indent}        raise RuntimeError(f\"Images directory not found: {{img_dir}}\")",
f"{indent}    if not os.path.isdir(msk_dir):",
f"{indent}        raise RuntimeError(f\"Masks directory not found: {{msk_dir}}\")",
f"{indent}",
f"{indent}    exts = ('.png', '.jpg', '.jpeg', '.tif', '.tiff', '.bmp')",
f"{indent}    img_paths = []",
f"{indent}    for ext in exts:",
f"{indent}        img_paths.extend(glob(os.path.join(img_dir, '**', f'*{{ext}}'), recursive=True))",
f"{indent}",
f"{indent}    stem_to_img = {{}}",
f"{indent}    for ip in sorted(img_paths):",
f"{indent}        stem = os.path.splitext(os.path.basename(ip))[0]",
f"{indent}        stem_to_img.setdefault(stem, ip)",
f"{indent}",
f"{indent}    mask_paths = glob(os.path.join(msk_dir, '*.npy'))",
f"{indent}    stem_to_msk = {{os.path.splitext(os.path.basename(mp))[0]: mp for mp in mask_paths}}",
f"{indent}",
f"{indent}    ids = sorted(set(stem_to_img.keys()) & set(stem_to_msk.keys()))",
f"{indent}",
f"{indent}    if len(ids) == 0:",
f"{indent}        raise RuntimeError(",
f"{indent}            f\"No (image, mask) pairs found under {{img_dir}} and {{msk_dir}}.\\n\"",
f"{indent}            f\"Found images: {{len(stem_to_img)}} (sample={{list(stem_to_img)[:10]}})\\n\"",
f"{indent}            f\"Found masks : {{len(stem_to_msk)}} (sample={{list(stem_to_msk)[:10]}})\\n\"",
f"{indent}            f\"Expected matching stems between images and masks.\"",
f"{indent}        )",
f"{indent}",
f"{indent}    self._stem_to_img = stem_to_img",
f"{indent}    self._stem_to_msk = stem_to_msk",
f"{indent}    return ids",
]

patched = lines[:start] + new_fn + lines[end:]
p.write_text("\n".join(patched) + "\n")

print(f"Patched _collect_ids() from line {start+1} to {end}")

Patching getitem as well to use resolved paths

In [None]:
from pathlib import Path

p = Path("/kaggle/working/kaggle-sci-image-forgery-seg/src/data.py")
lines = p.read_text().splitlines()

# Find the cv2.imread line
target_idx = None
for i, line in enumerate(lines):
    if "cv2.imread" in line:
        target_idx = i
        break

if target_idx is None:
    raise RuntimeError("Could not find cv2.imread in data.py")

indent = " " * (len(lines[target_idx]) - len(lines[target_idx].lstrip()))

inject = [
    f"{indent}# --- Kaggle nested layout support (force correct path right before read) ---",
    f"{indent}stem = self.ids[idx]",
    f"{indent}img_path = getattr(self, '_stem_to_img', {{}}).get(stem)",
    f"{indent}if img_path is None:",
    f"{indent}    from glob import glob",
    f"{indent}    cands = glob(os.path.join(self.images_dir, '**', f'{{stem}}.*'), recursive=True)",
    f"{indent}    img_path = cands[0] if cands else os.path.join(self.images_dir, f'{{stem}}.png')",
]

lines2 = lines[:target_idx] + inject + lines[target_idx:]
p.write_text("\n".join(lines2) + "\n")

print("Correctly injected nested image resolution (escaped f-strings)")

## Run training

In [None]:
!cd /kaggle/working/kaggle-sci-image-forgery-seg && python -m src.train --cfg configs/kaggle.yaml

We are getting a crash at the end after we disabled TensorBoard(writer = None) but the script is still calling writer.close() unconditionally. We will patch src/train.py to finish cleanly.

In [None]:
from pathlib import Path

p = Path("/kaggle/working/kaggle-sci-image-forgery-seg/src/train.py")
s = p.read_text()

# Replace writer.close() with a guarded close
s = s.replace("writer.close()", "if writer is not None:\n        writer.close()")

p.write_text(s)
print("Patched writer.close() guard")

Running Inference on a small batch to check if the computer learned something.

In [None]:
!cd /kaggle/working/kaggle-sci-image-forgery-seg && python -m src.infer \
  --cfg configs/kaggle.yaml \
  --checkpoint /kaggle/working/kaggle-sci-image-forgery-seg/outputs/checkpoints/best_dice.pt \
  --input_dir /kaggle/input/recodai-luc-scientific-image-forgery-detection/train_images/forged \
  --output_dir /kaggle/working/outputs/preds/forged_demo \
  --thr 0.5 \
  --save_overlay