In [1]:
# =========================
# BLOCK 1: Runtime + Storage
# =========================

import os
from pathlib import Path

# (Notebook) Mount Google Drive
from google.colab import drive
drive.mount('/content/drive')

# Base dataset folder in your Drive
BASE_DIR = Path("/content/drive/MyDrive/CSIRO")

# Sanity check: confirm expected files/folders exist
expected = [
    BASE_DIR / "train.csv",
    BASE_DIR / "test.csv",
    BASE_DIR / "sample_submission.csv",
    BASE_DIR / "train",
    BASE_DIR / "test",
]
missing = [str(p) for p in expected if not p.exists()]
print("BASE_DIR:", BASE_DIR)
if missing:
    print("❌ Missing these paths:")
    for m in missing:
        print("  -", m)
    raise FileNotFoundError("Fix missing dataset paths in Google Drive.")
else:
    print("✅ Dataset paths look good.")

# Create run folder structure (persisted on Drive)
RUN_DIR = BASE_DIR / "runs" / "run_001"

DIRS_TO_CREATE = [
    RUN_DIR / "checkpoints" / "modelA_dinov2",
    RUN_DIR / "checkpoints" / "modelB_siglip",
    RUN_DIR / "oof",
    RUN_DIR / "preds_test",
    RUN_DIR / "submissions",
    RUN_DIR / "logs",
]

for d in DIRS_TO_CREATE:
    d.mkdir(parents=True, exist_ok=True)

print("✅ Run directory created at:", RUN_DIR)
print("Folders:")
for d in DIRS_TO_CREATE:
    print(" -", d)

# (Notebook) Confirm GPU is available (A100 expected)
import torch
print("\nTorch CUDA available:", torch.cuda.is_available())
if torch.cuda.is_available():
    print("GPU name:", torch.cuda.get_device_name(0))
else:
    print("❌ No GPU detected. Go to Runtime → Change runtime type → GPU.")


try:
    import subprocess
    out = subprocess.check_output(["nvidia-smi"], text=True)
    print("\n===== nvidia-smi =====")
    print(out)
except Exception as e:
    print("nvidia-smi not available:", e)


Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).
BASE_DIR: /content/drive/MyDrive/CSIRO
✅ Dataset paths look good.
✅ Run directory created at: /content/drive/MyDrive/CSIRO/runs/run_001
Folders:
 - /content/drive/MyDrive/CSIRO/runs/run_001/checkpoints/modelA_dinov2
 - /content/drive/MyDrive/CSIRO/runs/run_001/checkpoints/modelB_siglip
 - /content/drive/MyDrive/CSIRO/runs/run_001/oof
 - /content/drive/MyDrive/CSIRO/runs/run_001/preds_test
 - /content/drive/MyDrive/CSIRO/runs/run_001/submissions
 - /content/drive/MyDrive/CSIRO/runs/run_001/logs

Torch CUDA available: True
GPU name: NVIDIA A100-SXM4-40GB

===== nvidia-smi =====
Sun Dec 28 20:56:31 2025       
+-----------------------------------------------------------------------------------------+
| NVIDIA-SMI 550.54.15              Driver Version: 550.54.15      CUDA Version: 12.4     |
|-----------------------------------------+------------------------+----

In [2]:
# BLOCK 2 Load + verify dataset quickly
import os, numpy as np, pandas as pd
from pathlib import Path

assert "BASE_DIR" in globals() and "RUN_DIR" in globals(), "Run Block 1 first."
TRAIN_CSV, TEST_CSV, SUB_CSV = BASE_DIR/"train.csv", BASE_DIR/"test.csv", BASE_DIR/"sample_submission.csv"
train_long, test_df, sub_df = pd.read_csv(TRAIN_CSV), pd.read_csv(TEST_CSV), pd.read_csv(SUB_CSV)

TARGETS = ["Dry_Green_g","Dry_Dead_g","Dry_Clover_g","GDM_g","Dry_Total_g"]
WEIGHTS = {"Dry_Green_g":0.1,"Dry_Dead_g":0.1,"Dry_Clover_g":0.1,"GDM_g":0.2,"Dry_Total_g":0.5}

# Required columns
for c in ["sample_id","image_path","target_name","target"]:
    if c not in train_long.columns: raise ValueError(f"train.csv missing {c}")
for c in ["sample_id","image_path","target_name"]:
    if c not in test_df.columns: raise ValueError(f"test.csv missing {c}")

# True image id from composite sample_id (IDxxxx__Target)
train_long["image_id"] = train_long["sample_id"].astype(str).str.split("__").str[0]
test_df["image_id"] = test_df["sample_id"].astype(str).str.split("__").str[0]

print("Shapes:", train_long.shape, test_df.shape, sub_df.shape)
print("Train targets:", sorted(train_long["target_name"].unique()))
print("Test targets :", sorted(test_df["target_name"].unique()))

extra_tr = set(train_long["target_name"].unique()) - set(TARGETS)
extra_te = set(test_df["target_name"].unique()) - set(TARGETS)
if extra_tr: print("⚠️ Unexpected train target_name:", extra_tr)
if extra_te: print("⚠️ Unexpected test target_name :", extra_te)

# Long-format sanity
print("Unique images:", train_long["image_id"].nunique(),
      "| median targets/image:", int(train_long.groupby("image_id")["target_name"].nunique().median()))

# Path sanity (sample 20)
np.random.seed(1947)
paths = train_long["image_path"].dropna().sample(min(20, len(train_long)), random_state=1947).tolist()
missing = [p for p in paths if not (BASE_DIR/p).exists()]
print("Missing sampled image paths:", len(missing))
if missing: print("Example missing:", missing[0], "->", BASE_DIR/missing[0])

# Quick target ranges
stats = train_long.groupby("target_name")["target"].agg(["count","min","median","max"]).reindex(TARGETS)
print(stats)


Shapes: (1785, 10) (5, 4) (5, 2)
Train targets: ['Dry_Clover_g', 'Dry_Dead_g', 'Dry_Green_g', 'Dry_Total_g', 'GDM_g']
Test targets : ['Dry_Clover_g', 'Dry_Dead_g', 'Dry_Green_g', 'Dry_Total_g', 'GDM_g']
Unique images: 357 | median targets/image: 5
Missing sampled image paths: 0
              count   min   median       max
target_name                                 
Dry_Green_g     357  0.00  20.8000  157.9836
Dry_Dead_g      357  0.00   7.9809   83.8407
Dry_Clover_g    357  0.00   1.4235   71.7865
GDM_g           357  1.04  27.1082  157.9836
Dry_Total_g     357  1.04  40.3000  185.7000


In [3]:
# BLOCK 3  Pivot long->wide (1 row per image_id) + keep metadata
import pandas as pd, numpy as np

assert "train_long" in globals(), "Run Block 2 first."

META_COLS = ["image_id","image_path","Sampling_Date","State","Species","Pre_GSHH_NDVI","Height_Ave_cm"]
META_COLS = [c for c in META_COLS if c in train_long.columns]

meta = train_long[META_COLS].drop_duplicates("image_id").reset_index(drop=True)

wide = train_long.pivot_table(index="image_id", columns="target_name", values="target", aggfunc="first").reset_index()
train_wide = meta.merge(wide, on="image_id", how="inner")

TARGETS = ["Dry_Green_g","Dry_Dead_g","Dry_Clover_g","GDM_g","Dry_Total_g"]
for t in TARGETS:
    if t not in train_wide.columns: train_wide[t] = np.nan

# Learn 3 components; derive 2 (stored for eval only)
train_wide["Der_Dry_Total_g"] = train_wide["Dry_Green_g"] + train_wide["Dry_Dead_g"] + train_wide["Dry_Clover_g"]
train_wide["Der_GDM_g"] = train_wide["Dry_Green_g"] + train_wide["Dry_Clover_g"]

#  date features for grouping (no model usage unless test has them)
if "Sampling_Date" in train_wide.columns:
    d = pd.to_datetime(train_wide["Sampling_Date"], errors="coerce")
    train_wide["month"] = d.dt.month.fillna(0).astype(int)

print("train_wide shape:", train_wide.shape)
print("Missing rate (components):", train_wide[["Dry_Green_g","Dry_Dead_g","Dry_Clover_g"]].isna().mean().to_dict())

# Quick constraint check
err_total = (train_wide["Dry_Total_g"] - train_wide["Der_Dry_Total_g"]).abs()
err_gdm   = (train_wide["GDM_g"] - train_wide["Der_GDM_g"]).abs()
print("Mean abs constraint error |Total|:", float(err_total.mean(skipna=True)))
print("Mean abs constraint error |GDM|  :", float(err_gdm.mean(skipna=True)))


train_wide shape: (357, 15)
Missing rate (components): {'Dry_Green_g': 0.0, 'Dry_Dead_g': 0.0, 'Dry_Clover_g': 0.0}
Mean abs constraint error |Total|: 0.0008784313725504181
Mean abs constraint error |GDM|  : 1.484593837576958e-05


In [4]:
# BLOCK 4 5-fold split + weighted R² (global)
import numpy as np
from sklearn.model_selection import GroupKFold

assert "train_wide" in globals(), "Run Block 3 first."

WEIGHTS = {"Dry_Green_g":0.1,"Dry_Dead_g":0.1,"Dry_Clover_g":0.1,"GDM_g":0.2,"Dry_Total_g":0.5}

#  stronger if State+month exists, else image_id
if ("State" in train_wide.columns) and ("month" in train_wide.columns) and (train_wide["month"].nunique() > 1):
    train_wide["group_key"] = train_wide["State"].astype(str) + "_" + train_wide["month"].astype(str)
else:
    train_wide["group_key"] = train_wide["image_id"].astype(str)

gkf = GroupKFold(n_splits=5)
train_wide["fold"] = -1
for f, (_, va) in enumerate(gkf.split(train_wide, groups=train_wide["group_key"])):
    train_wide.loc[va, "fold"] = f

print("Fold counts:", train_wide["fold"].value_counts().sort_index().to_dict())
print("Using group_key:", "State_month" if "State" in train_wide.columns and "month" in train_wide.columns else "image_id")

def weighted_r2_long(y_true_long, y_pred_long, w_long):
    y_true_long = np.asarray(y_true_long, float)
    y_pred_long = np.asarray(y_pred_long, float)
    w_long = np.asarray(w_long, float)
    w_long = np.clip(w_long, 1e-12, None)
    y_bar = np.sum(w_long * y_true_long) / np.sum(w_long)
    ss_res = np.sum(w_long * (y_true_long - y_pred_long) ** 2)
    ss_tot = np.sum(w_long * (y_true_long - y_bar) ** 2)
    return 1.0 - ss_res / max(ss_tot, 1e-12)

def wide_to_long_for_metric(df_wide, pred_prefix="pred_"):
    # expects either true cols or pred_ cols; uses derived logic for totals/GDM if prefix provided
    tG, tD, tC = pred_prefix+"Dry_Green_g", pred_prefix+"Dry_Dead_g", pred_prefix+"Dry_Clover_g"
    out = []
    for t in ["Dry_Green_g","Dry_Dead_g","Dry_Clover_g"]:
        out.append((t, df_wide[t].values, df_wide[pred_prefix+t].values, WEIGHTS[t]))
    pred_total = df_wide[tG].values + df_wide[tD].values + df_wide[tC].values
    pred_gdm   = df_wide[tG].values + df_wide[tC].values
    out.append(("Dry_Total_g", df_wide["Dry_Total_g"].values, pred_total, WEIGHTS["Dry_Total_g"]))
    out.append(("GDM_g", df_wide["GDM_g"].values, pred_gdm, WEIGHTS["GDM_g"]))
    y_true = np.concatenate([a[1] for a in out]); y_pred = np.concatenate([a[2] for a in out])
    w = np.concatenate([np.full(len(df_wide), a[3]) for a in out])
    return y_true, y_pred, w


Fold counts: {0: 74, 1: 66, 2: 75, 3: 67, 4: 75}
Using group_key: State_month


In [8]:
# PATCH v2: remove RandomResizedCrop (albumentations API mismatch), use stable crop pipeline
import cv2, torch, numpy as np, albumentations as A
from torch.utils.data import Dataset
from albumentations.pytorch import ToTensorV2

print("albumentations version:", A.__version__)
IMG_SIZE = 384
Y_COLS = ["Dry_Green_g","Dry_Dead_g","Dry_Clover_g"]

def get_tfms(train=True):
    if train:
        return A.Compose([
            A.RandomScale(scale_limit=0.25, p=0.8),
            A.PadIfNeeded(min_height=IMG_SIZE, min_width=IMG_SIZE,
                          border_mode=cv2.BORDER_REFLECT_101, p=1.0),
            A.RandomCrop(height=IMG_SIZE, width=IMG_SIZE, p=1.0),
            A.HorizontalFlip(p=0.5), A.VerticalFlip(p=0.2),
            A.RandomRotate90(p=0.2),
            A.RandomBrightnessContrast(p=0.25),
            A.Normalize(), ToTensorV2()
        ])
    return A.Compose([A.Resize(height=IMG_SIZE, width=IMG_SIZE), A.Normalize(), ToTensorV2()])

class PastureDS(Dataset):
    def __init__(self, df, train=True):
        self.df = df.reset_index(drop=True); self.tfms = get_tfms(train)
    def __len__(self): return len(self.df)
    def __getitem__(self, i):
        r = self.df.iloc[i]
        fp = str(BASE_DIR / r["image_path"])
        img = cv2.cvtColor(cv2.imread(fp), cv2.COLOR_BGR2RGB)
        x = self.tfms(image=img)["image"]
        y = torch.tensor(r[Y_COLS].values.astype(np.float32))
        return x, y, r["image_id"]

# smoke test
ds = PastureDS(train_wide.sample(4, random_state=1947), train=True)
x, y, iid = ds[0]
print("x:", tuple(x.shape), "y:", y.tolist(), "image_id:", iid)


albumentations version: 2.0.8
x: (3, 384, 384) y: [6.599999904632568, 0.4000000059604645, 0.5] image_id: ID146920896


In [9]:
# BLOCK 6 (<=50 lines): install timm + loaders + 3-output model + derive helper
!pip -q install timm

import os, torch, torch.nn as nn, numpy as np, timm
from torch.utils.data import DataLoader

assert "train_wide" in globals() and "PastureDS" in globals(), "Run Blocks 1–5 first."
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
BATCH = 16; WORKERS = os.cpu_count() or 2

def make_loaders(fold):
    tr = train_wide[train_wide.fold != fold].reset_index(drop=True)
    va = train_wide[train_wide.fold == fold].reset_index(drop=True)
    dl_tr = DataLoader(PastureDS(tr, True), batch_size=BATCH, shuffle=True,  num_workers=WORKERS, pin_memory=True)
    dl_va = DataLoader(PastureDS(va, False), batch_size=BATCH, shuffle=False, num_workers=WORKERS, pin_memory=True)
    return dl_tr, dl_va, va

class ImgReg3(nn.Module):
    def __init__(self, backbone, pretrained=True, drop=0.2):
        super().__init__()
        self.backbone = timm.create_model(backbone, pretrained=pretrained, num_classes=0, global_pool="avg")
        d = self.backbone.num_features
        self.head = nn.Sequential(nn.LayerNorm(d), nn.Dropout(drop), nn.Linear(d,256), nn.GELU(),
                                  nn.Dropout(drop), nn.Linear(256,3))
    def forward(self, x): return self.head(self.backbone(x))

@torch.no_grad()
def derive5(pred3):
    g, d, c = pred3[:,0], pred3[:,1], pred3[:,2]
    return {"Dry_Green_g":g, "Dry_Dead_g":d, "Dry_Clover_g":c, "Dry_Total_g":g+d+c, "GDM_g":g+c}

print("device:", device, "| batch:", BATCH, "| workers:", WORKERS)
dl_tr, dl_va, va_df = make_loaders(0)
xb, yb, _ = next(iter(dl_tr))
print("batch x:", tuple(xb.shape), "batch y:", tuple(yb.shape))


device: cuda | batch: 16 | workers: 12
batch x: (16, 3, 384, 384) batch y: (16, 3)


In [10]:
# BLOCK 7 (<=50 lines): weighted loss (incl. derived) + weighted R² eval
import numpy as np, torch

assert "WEIGHTS" in globals(), "WEIGHTS not found (Block 2)."

W = WEIGHTS  # alias

def weighted_loss_from_pred3(pred3, y3_true):
    # pred3,y3_true: (B,3) => targets: Green,Dead,Clover
    pg, pd, pc = pred3[:,0], pred3[:,1], pred3[:,2]
    tg, td, tc = y3_true[:,0], y3_true[:,1], y3_true[:,2]
    p_total, t_total = pg+pd+pc, tg+td+tc
    p_gdm,   t_gdm   = pg+pc,    tg+tc
    loss = 0.0
    loss += W["Dry_Green_g"]  * torch.mean((pg - tg) ** 2)
    loss += W["Dry_Dead_g"]   * torch.mean((pd - td) ** 2)
    loss += W["Dry_Clover_g"] * torch.mean((pc - tc) ** 2)
    loss += W["Dry_Total_g"]  * torch.mean((p_total - t_total) ** 2)
    loss += W["GDM_g"]        * torch.mean((p_gdm - t_gdm) ** 2)
    return loss

def weighted_r2(y_true, y_pred, w):
    y_true, y_pred, w = np.asarray(y_true,float), np.asarray(y_pred,float), np.asarray(w,float)
    w = np.clip(w, 1e-12, None)
    ybar = (w*y_true).sum() / w.sum()
    ss_res = (w*(y_true-y_pred)**2).sum()
    ss_tot = (w*(y_true-ybar)**2).sum()
    return 1.0 - ss_res / max(ss_tot, 1e-12)

def eval_weighted_r2_from_pred3(y3_true, pred3):
    # y3_true,pred3: (N,3); returns competition-like weighted R² across 5 targets
    yg, yd, yc = y3_true[:,0], y3_true[:,1], y3_true[:,2]
    pg, pd, pc = pred3[:,0], pred3[:,1], pred3[:,2]
    y = np.concatenate([yg, yd, yc, (yg+yd+yc), (yg+yc)])
    p = np.concatenate([pg, pd, pc, (pg+pd+pc), (pg+pc)])
    w = np.concatenate([
        np.full(len(yg), W["Dry_Green_g"]),
        np.full(len(yd), W["Dry_Dead_g"]),
        np.full(len(yc), W["Dry_Clover_g"]),
        np.full(len(yg), W["Dry_Total_g"]),
        np.full(len(yg), W["GDM_g"]),
    ])
    return weighted_r2(y, p, w)

print("Weighted loss + eval ready.")


Weighted loss + eval ready.


In [16]:
# PATCH v4: make IMG_SIZE divisible by patch size 14 (DINOv2/SigLIP patch14)
IMG_SIZE = 392  # 14 * 28; replaces 384

# update transforms + dataset to use new IMG_SIZE
def get_tfms(train=True):
    import albumentations as A
    from albumentations.pytorch import ToTensorV2
    import cv2
    if train:
        return A.Compose([
            A.RandomScale(scale_limit=0.25, p=0.8),
            A.PadIfNeeded(min_height=IMG_SIZE, min_width=IMG_SIZE, border_mode=cv2.BORDER_REFLECT_101, p=1.0),
            A.RandomCrop(height=IMG_SIZE, width=IMG_SIZE, p=1.0),
            A.HorizontalFlip(p=0.5), A.VerticalFlip(p=0.2),
            A.RandomRotate90(p=0.2),
            A.RandomBrightnessContrast(p=0.25),
            A.Normalize(), ToTensorV2()
        ])
    return A.Compose([A.Resize(height=IMG_SIZE, width=IMG_SIZE), A.Normalize(), ToTensorV2()])

# Recreate loaders and re-smoke-test model
dl_tr, _, _ = make_loaders(0)
xb, yb, _ = next(iter(dl_tr))
m = ImgReg3(BACKBONE_A, pretrained=True).to(device).eval()
with torch.no_grad(), torch.amp.autocast(device_type="cuda", enabled=(device.type=="cuda")):
    out = m(xb.to(device))
print("OK -> IMG_SIZE:", IMG_SIZE, "batch:", tuple(xb.shape), "out:", tuple(out.shape))


OK -> IMG_SIZE: 392 batch: (16, 3, 392, 392) out: (16, 3)


In [17]:
# BLOCK 8 (<=50 lines): Train ModelA (DINOv2) 1 fold (freeze->partial FT) + save best + OOF
import torch, numpy as np
from tqdm import tqdm

FOLD = 0
dl_tr, dl_va, va_df = make_loaders(FOLD)
BACKBONE_A = globals().get("BACKBONE_A", "vit_giant_patch14_dinov2")
print("Train ModelA:", BACKBONE_A, "| IMG_SIZE:", IMG_SIZE)

model = ImgReg3(BACKBONE_A, pretrained=True).to(device)
ckpt_dir = RUN_DIR/"checkpoints"/"modelA_dinov2"/f"fold_{FOLD}"; ckpt_dir.mkdir(parents=True, exist_ok=True)
best_path = ckpt_dir/"best.pt"; best_r2 = -1e9

def set_trainable(phase2=False, unfreeze_last=2):
    for p in model.backbone.parameters(): p.requires_grad = False
    for p in model.head.parameters(): p.requires_grad = True
    if phase2 and hasattr(model.backbone, "blocks"):
        for b in model.backbone.blocks[-unfreeze_last:]:
            for p in b.parameters(): p.requires_grad = True

def run_val():
    model.eval(); P=[]; Y=[]
    with torch.no_grad():
        for x,y,_ in dl_va:
            with torch.amp.autocast("cuda", enabled=(device.type=="cuda")):
                p = model(x.to(device))
            P.append(p.float().cpu().numpy()); Y.append(y.numpy())
    return eval_weighted_r2_from_pred3(np.concatenate(Y), np.concatenate(P)), np.concatenate(P)

def run_train(opt):
    model.train()
    for x,y,_ in tqdm(dl_tr, leave=False):
        x,y = x.to(device), y.to(device)
        with torch.amp.autocast("cuda", enabled=(device.type=="cuda")):
            p = model(x); loss = weighted_loss_from_pred3(p, y)
        opt.zero_grad(set_to_none=True); scaler.scale(loss).backward(); scaler.step(opt); scaler.update()

# Phase A1
set_trainable(False); opt = torch.optim.AdamW(filter(lambda p:p.requires_grad, model.parameters()), lr=2e-3, weight_decay=1e-4)
for ep in range(2):
    run_train(opt); r2,_ = run_val(); print(f"A1 ep{ep+1} val_wR2={r2:.5f}")
    if r2 > best_r2: best_r2=r2; torch.save(model.state_dict(), best_path)

# Phase A2
set_trainable(True, unfreeze_last=2); opt = torch.optim.AdamW(filter(lambda p:p.requires_grad, model.parameters()), lr=3e-4, weight_decay=1e-4)
for ep in range(4):
    run_train(opt); r2,_ = run_val(); print(f"A2 ep{ep+1} val_wR2={r2:.5f}")
    if r2 > best_r2: best_r2=r2; torch.save(model.state_dict(), best_path)

# OOF save
model.load_state_dict(torch.load(best_path, map_location=device)); r2, P = run_val()
np.savez(RUN_DIR/"oof"/f"oof_modelA_fold{FOLD}.npz", image_id=va_df["image_id"].values, pred3=P)
print("Best val_wR2:", best_r2, "| OOF wR2:", r2, "| saved:", best_path)


Train ModelA: vit_giant_patch14_dinov2 | IMG_SIZE: 392




A1 ep1 val_wR2=0.18503




A1 ep2 val_wR2=0.46867




A2 ep1 val_wR2=0.21187




A2 ep2 val_wR2=0.42753




A2 ep3 val_wR2=0.53017




A2 ep4 val_wR2=0.52143
Best val_wR2: 0.5301685070125199 | OOF wR2: 0.5301685070125199 | saved: /content/drive/MyDrive/CSIRO/runs/run_001/checkpoints/modelA_dinov2/fold_0/best.pt


In [18]:
# BLOCK 9 (<=50 lines): Train ModelB (SigLIP) 1 fold (freeze->partial FT) + save best + OOF
import torch, numpy as np
from tqdm import tqdm

FOLD = 0
dl_tr, dl_va, va_df = make_loaders(FOLD)

siglip = [m for m in timm.list_models() if "siglip" in m]
pref = ["vit_so400m_patch14_siglip_384","vit_so400m_patch14_siglip","vit_base_patch16_siglip_384"]
BACKBONE_B = next((p for p in pref if p in siglip), siglip[0])
print("Train ModelB:", BACKBONE_B, "| IMG_SIZE:", IMG_SIZE)

model = ImgReg3(BACKBONE_B, pretrained=True).to(device)
ckpt_dir = RUN_DIR/"checkpoints"/"modelB_siglip"/f"fold_{FOLD}"; ckpt_dir.mkdir(parents=True, exist_ok=True)
best_path = ckpt_dir/"best.pt"; best_r2 = -1e9

def set_trainable(phase2=False, unfreeze_last=2):
    for p in model.backbone.parameters(): p.requires_grad = False
    for p in model.head.parameters(): p.requires_grad = True
    if phase2 and hasattr(model.backbone, "blocks"):
        for b in model.backbone.blocks[-unfreeze_last:]:
            for p in b.parameters(): p.requires_grad = True

def val():
    model.eval(); P=[]; Y=[]
    with torch.no_grad():
        for x,y,_ in dl_va:
            with torch.amp.autocast("cuda", enabled=(device.type=="cuda")):
                p = model(x.to(device))
            P.append(p.float().cpu().numpy()); Y.append(y.numpy())
    P,Y = np.concatenate(P), np.concatenate(Y)
    return eval_weighted_r2_from_pred3(Y, P), P

def train_epoch(opt):
    model.train()
    for x,y,_ in tqdm(dl_tr, leave=False):
        x,y = x.to(device), y.to(device)
        with torch.amp.autocast("cuda", enabled=(device.type=="cuda")):
            p = model(x); loss = weighted_loss_from_pred3(p, y)
        opt.zero_grad(set_to_none=True); scaler.scale(loss).backward(); scaler.step(opt); scaler.update()

# Phase B1
set_trainable(False); opt = torch.optim.AdamW(filter(lambda p:p.requires_grad, model.parameters()), lr=2e-3, weight_decay=1e-4)
for ep in range(2):
    train_epoch(opt); r2,_ = val(); print(f"B1 ep{ep+1} val_wR2={r2:.5f}")
    if r2 > best_r2: best_r2=r2; torch.save(model.state_dict(), best_path)

# Phase B2
set_trainable(True, unfreeze_last=2); opt = torch.optim.AdamW(filter(lambda p:p.requires_grad, model.parameters()), lr=3e-4, weight_decay=1e-4)
for ep in range(4):
    train_epoch(opt); r2,_ = val(); print(f"B2 ep{ep+1} val_wR2={r2:.5f}")
    if r2 > best_r2: best_r2=r2; torch.save(model.state_dict(), best_path)

model.load_state_dict(torch.load(best_path, map_location=device)); r2, P = val()
np.savez(RUN_DIR/"oof"/f"oof_modelB_fold{FOLD}.npz", image_id=va_df["image_id"].values, pred3=P)
print("Best val_wR2:", best_r2, "| OOF wR2:", r2, "| saved:", best_path)


Train ModelB: vit_so400m_patch14_siglip_384 | IMG_SIZE: 392


model.safetensors:   0%|          | 0.00/1.71G [00:00<?, ?B/s]



B1 ep1 val_wR2=0.12675




B1 ep2 val_wR2=0.18123




B2 ep1 val_wR2=0.11791




B2 ep2 val_wR2=0.32513




B2 ep3 val_wR2=-0.00008




B2 ep4 val_wR2=0.07684
Best val_wR2: 0.3251272394609881 | OOF wR2: 0.3251272394609881 | saved: /content/drive/MyDrive/CSIRO/runs/run_001/checkpoints/modelB_siglip/fold_0/best.pt


In [19]:
# BLOCK 10 (<=50 lines): OOF health checks (fold0) + ensemble weight suggestion
import numpy as np

FOLD=0
a = np.load(RUN_DIR/"oof"/f"oof_modelA_fold{FOLD}.npz", allow_pickle=True)
b = np.load(RUN_DIR/"oof"/f"oof_modelB_fold{FOLD}.npz", allow_pickle=True)
id_a, pA = a["image_id"], a["pred3"]
id_b, pB = b["image_id"], b["pred3"]
assert np.all(id_a==id_b), "OOF image_id mismatch between models."

va = train_wide[train_wide.fold==FOLD].reset_index(drop=True)
Y = va[["Dry_Green_g","Dry_Dead_g","Dry_Clover_g"]].values.astype(np.float32)

r2A = eval_weighted_r2_from_pred3(Y, pA)
r2B = eval_weighted_r2_from_pred3(Y, pB)

# try a few ensemble weights: pred = w*A + (1-w)*B
ws = np.linspace(0,1,11)
r2s = []
for w in ws:
    p = w*pA + (1-w)*pB
    r2s.append(eval_weighted_r2_from_pred3(Y, p))
best_i = int(np.argmax(r2s))
print("Fold0 wR2: A=", round(r2A,5), " B=", round(r2B,5))
print("Ensemble grid best: w(A)=", float(ws[best_i]), " wR2=", float(r2s[best_i]))

# per-target debug metrics (derived totals included)
def per_target_rmse_mae(y3, p3):
    yg,yd,yc = y3[:,0],y3[:,1],y3[:,2]; pg,pd,pc = p3[:,0],p3[:,1],p3[:,2]
    Y5 = {"Dry_Green_g":yg,"Dry_Dead_g":yd,"Dry_Clover_g":yc,"Dry_Total_g":yg+yd+yc,"GDM_g":yg+yc}
    P5 = {"Dry_Green_g":pg,"Dry_Dead_g":pd,"Dry_Clover_g":pc,"Dry_Total_g":pg+pd+pc,"GDM_g":pg+pc}
    out={}
    for k in Y5:
        e = P5[k]-Y5[k]
        out[k] = (float(np.sqrt(np.mean(e**2))), float(np.mean(np.abs(e))))
    return out

dbgA = per_target_rmse_mae(Y, pA)
dbgB = per_target_rmse_mae(Y, pB)
print("\nRMSE, MAE (A):", dbgA)
print("RMSE, MAE (B):", dbgB)


Fold0 wR2: A= 0.53017  B= 0.32513
Ensemble grid best: w(A)= 1.0  wR2= 0.5301685070125199

RMSE, MAE (A): {'Dry_Green_g': (23.59312629699707, 18.4628963470459), 'Dry_Dead_g': (15.26081371307373, 11.639562606811523), 'Dry_Clover_g': (17.01992416381836, 10.674093246459961), 'Dry_Total_g': (25.13693618774414, 17.048023223876953), 'GDM_g': (19.574140548706055, 13.56136703491211)}
RMSE, MAE (B): {'Dry_Green_g': (26.01597785949707, 19.010896682739258), 'Dry_Dead_g': (16.194494247436523, 12.09109878540039), 'Dry_Clover_g': (18.8719425201416, 12.040096282958984), 'Dry_Total_g': (31.0206241607666, 21.07330894470215), 'GDM_g': (23.26336097717285, 16.551250457763672)}


In [22]:
# PATCH: Train only FOLD=4 with local checkpoint, then copy to Drive (<100 lines)
import os, shutil, torch, numpy as np
from tqdm import tqdm
from pathlib import Path

FOLD = 4
dl_tr, dl_va, va_df = make_loaders(FOLD)
BACKBONE_A = globals().get("BACKBONE_A", "vit_giant_patch14_dinov2")
print("Training fold", FOLD, "| backbone:", BACKBONE_A, "| IMG_SIZE:", IMG_SIZE)

# local temp paths (reliable)
LOCAL_DIR = Path(f"/content/tmp_modelA_fold{FOLD}")
LOCAL_DIR.mkdir(parents=True, exist_ok=True)
local_best = LOCAL_DIR / "best.pt"

# drive paths (final destination)
drive_ckpt_dir = RUN_DIR/"checkpoints"/"modelA_dinov2"/f"fold_{FOLD}"
drive_ckpt_dir.mkdir(parents=True, exist_ok=True)
drive_best = drive_ckpt_dir / "best.pt"
drive_oof  = RUN_DIR/"oof"/f"oof_modelA_fold{FOLD}.npz"

model = ImgReg3(BACKBONE_A, pretrained=True).to(device)
torch.save(model.state_dict(), local_best)  # guarantee exists locally
best_r2 = -1e9

def set_trainable(phase2=False, unfreeze_last=2):
    for p in model.backbone.parameters(): p.requires_grad = False
    for p in model.head.parameters(): p.requires_grad = True
    if phase2 and hasattr(model.backbone, "blocks"):
        for b in model.backbone.blocks[-unfreeze_last:]:
            for p in b.parameters(): p.requires_grad = True

def val():
    model.eval(); P=[]; Y=[]
    with torch.no_grad():
        for x,y,_ in dl_va:
            with torch.amp.autocast("cuda", enabled=(device.type=="cuda")):
                p = model(x.to(device))
            P.append(p.float().cpu().numpy()); Y.append(y.numpy())
    P,Y = np.concatenate(P), np.concatenate(Y)
    return eval_weighted_r2_from_pred3(Y, P), P

def train_epoch(opt):
    model.train()
    for x,y,_ in tqdm(dl_tr, leave=False):
        x,y = x.to(device), y.to(device)
        with torch.amp.autocast("cuda", enabled=(device.type=="cuda")):
            p = model(x); loss = weighted_loss_from_pred3(p, y)
        if not torch.isfinite(loss): continue
        opt.zero_grad(set_to_none=True); scaler.scale(loss).backward(); scaler.step(opt); scaler.update()

# Phase A1
set_trainable(False)
opt = torch.optim.AdamW(filter(lambda p:p.requires_grad, model.parameters()), lr=2e-3, weight_decay=1e-4)
for ep in range(2):
    train_epoch(opt); r2,_ = val()
    print(f"A1 ep{ep+1} val_wR2={r2:.5f}")
    if np.isfinite(r2) and r2 > best_r2: best_r2=r2; torch.save(model.state_dict(), local_best)

# Phase A2
set_trainable(True, unfreeze_last=2)
opt = torch.optim.AdamW(filter(lambda p:p.requires_grad, model.parameters()), lr=3e-4, weight_decay=1e-4)
for ep in range(4):
    train_epoch(opt); r2,_ = val()
    print(f"A2 ep{ep+1} val_wR2={r2:.5f}")
    if np.isfinite(r2) and r2 > best_r2: best_r2=r2; torch.save(model.state_dict(), local_best)

# Final eval + save OOF locally
model.load_state_dict(torch.load(local_best, map_location=device))
r2, P = val()
local_oof = LOCAL_DIR / "oof.npz"
np.savez(local_oof, image_id=va_df["image_id"].values, pred3=P)

# Copy to Drive (atomic-ish)
shutil.copyfile(local_best, drive_best)
shutil.copyfile(local_oof, drive_oof)

print("DONE fold", FOLD, "| best_wR2:", round(best_r2,5), "| final_wR2:", round(r2,5))
print("Saved to Drive:", drive_best, "and", drive_oof)


Training fold 4 | backbone: vit_giant_patch14_dinov2 | IMG_SIZE: 392




A1 ep1 val_wR2=0.14549




A1 ep2 val_wR2=0.26539




A2 ep1 val_wR2=0.47167




A2 ep2 val_wR2=0.34288




A2 ep3 val_wR2=0.16961




A2 ep4 val_wR2=0.22469
DONE fold 4 | best_wR2: 0.47167 | final_wR2: 0.47167
Saved to Drive: /content/drive/MyDrive/CSIRO/runs/run_001/checkpoints/modelA_dinov2/fold_4/best.pt and /content/drive/MyDrive/CSIRO/runs/run_001/oof/oof_modelA_fold4.npz


In [25]:
# BLOCK 12  Merge ModelA OOF (fold0-4) + overall weighted R²
import numpy as np
from pathlib import Path

oof_dir = RUN_DIR/"oof"
preds = {}
for f in range(5):
    z = np.load(oof_dir/f"oof_modelA_fold{f}.npz", allow_pickle=True)
    for iid, p in zip(z["image_id"], z["pred3"]):
        preds[str(iid)] = p

# align to full train_wide order
ids = train_wide["image_id"].astype(str).values
P = np.stack([preds[i] for i in ids]).astype(np.float32)
Y = train_wide[["Dry_Green_g","Dry_Dead_g","Dry_Clover_g"]].values.astype(np.float32)

oof_r2 = eval_weighted_r2_from_pred3(Y, P)
print("ModelA overall OOF weighted R²:", round(float(oof_r2), 6))

# save merged OOF for later ensembling
np.savez(oof_dir/"oof_modelA_all.npz", image_id=ids, pred3=P)
print("Saved:", oof_dir/"oof_modelA_all.npz")


ModelA overall OOF weighted R²: 0.582206
Saved: /content/drive/MyDrive/CSIRO/runs/run_001/oof/oof_modelA_all.npz


In [26]:
# BLOCK 13 ModelA test inference (5-fold ensemble) + save preds_test_modelA
import numpy as np, torch, cv2
from torch.utils.data import Dataset, DataLoader

assert "test_df" in globals() and "BACKBONE_A" in globals(), "Need Block 2 + patches (test_df, BACKBONE_A)."

test_imgs = test_df[["image_id","image_path"]].drop_duplicates("image_id").reset_index(drop=True)

class TestDS(Dataset):
    def __init__(self, df):
        self.df = df.reset_index(drop=True); self.tfms = get_tfms(train=False)
    def __len__(self): return len(self.df)
    def __getitem__(self, i):
        r = self.df.iloc[i]
        img = cv2.cvtColor(cv2.imread(str(BASE_DIR / r["image_path"])), cv2.COLOR_BGR2RGB)
        x = self.tfms(image=img)["image"]
        return x, r["image_id"]

dl = DataLoader(TestDS(test_imgs), batch_size=BATCH, shuffle=False, num_workers=WORKERS, pin_memory=True)

Psum = None
for f in range(5):
    ckpt = RUN_DIR/"checkpoints"/"modelA_dinov2"/f"fold_{f}"/"best.pt"
    model = ImgReg3(BACKBONE_A, pretrained=False).to(device)
    model.load_state_dict(torch.load(ckpt, map_location=device)); model.eval()
    Ps = []
    with torch.no_grad():
        for x,_ in dl:
            with torch.amp.autocast("cuda", enabled=(device.type=="cuda")):
                p = model(x.to(device)).float().cpu().numpy()
            Ps.append(p)
    P = np.concatenate(Ps).astype(np.float32)
    Psum = P if Psum is None else (Psum + P)

P3 = (Psum / 5.0)
P3 = np.clip(P3, 0.0, None)  # clamp negatives
pg,pd,pc = P3[:,0],P3[:,1],P3[:,2]
out = {"image_id": test_imgs["image_id"].astype(str).values,
       "Dry_Green_g":pg, "Dry_Dead_g":pd, "Dry_Clover_g":pc,
       "Dry_Total_g":(pg+pd+pc), "GDM_g":(pg+pc)}

np.savez(RUN_DIR/"preds_test"/"preds_test_modelA.npz", **out)
print("Saved:", RUN_DIR/"preds_test"/"preds_test_modelA.npz", "| images:", len(test_imgs))


Saved: /content/drive/MyDrive/CSIRO/runs/run_001/preds_test/preds_test_modelA.npz | images: 1


In [27]:
# PATCH Block 13  fix test image_id using image_path stem, then redo ModelA inference
import numpy as np, torch, cv2
from pathlib import Path
from torch.utils.data import Dataset, DataLoader

print("test_df shape:", test_df.shape)
test_df["image_key"] = test_df["image_path"].astype(str).apply(lambda p: Path(p).stem)
test_imgs = test_df[["image_key","image_path"]].drop_duplicates("image_key").reset_index(drop=True)
print("Unique test images:", len(test_imgs))

class TestDS(Dataset):
    def __init__(self, df): self.df=df.reset_index(drop=True); self.tfms=get_tfms(train=False)
    def __len__(self): return len(self.df)
    def __getitem__(self, i):
        r=self.df.iloc[i]
        img=cv2.cvtColor(cv2.imread(str(BASE_DIR/r["image_path"])), cv2.COLOR_BGR2RGB)
        return self.tfms(image=img)["image"], r["image_key"]

dl = DataLoader(TestDS(test_imgs), batch_size=BATCH, shuffle=False, num_workers=WORKERS, pin_memory=True)

Psum=None
for f in range(5):
    ckpt = RUN_DIR/"checkpoints"/"modelA_dinov2"/f"fold_{f}"/"best.pt"
    m = ImgReg3(BACKBONE_A, pretrained=False).to(device)
    m.load_state_dict(torch.load(ckpt, map_location=device)); m.eval()
    Ps=[]
    with torch.no_grad():
        for x,_ in dl:
            with torch.amp.autocast("cuda", enabled=(device.type=="cuda")):
                Ps.append(m(x.to(device)).float().cpu().numpy())
    P = np.concatenate(Ps).astype(np.float32)
    Psum = P if Psum is None else (Psum + P)

P3 = np.clip(Psum/5.0, 0.0, None)
pg,pd,pc = P3[:,0],P3[:,1],P3[:,2]
np.savez(RUN_DIR/"preds_test"/"preds_test_modelA.npz",
         image_key=test_imgs["image_key"].values.astype(str),
         Dry_Green_g=pg, Dry_Dead_g=pd, Dry_Clover_g=pc,
         Dry_Total_g=(pg+pd+pc), GDM_g=(pg+pc))
print("Saved:", RUN_DIR/"preds_test"/"preds_test_modelA.npz")


test_df shape: (5, 4)
Unique test images: 1
Saved: /content/drive/MyDrive/CSIRO/runs/run_001/preds_test/preds_test_modelA.npz


In [28]:
# BLOCK 14  Build submission.csv from test_df + preds_test_modelA.npz
import numpy as np, pandas as pd
from pathlib import Path

pred_npz = np.load(RUN_DIR/"preds_test"/"preds_test_modelA.npz", allow_pickle=True)
keys = pred_npz["image_key"].astype(str)
pred_map = {k:i for i,k in enumerate(keys)}

# ensure test_df has image_key derived from image_path stem
test_sub = test_df.copy()
test_sub["image_key"] = test_sub["image_path"].astype(str).apply(lambda p: Path(p).stem)

def get_pred(row):
    i = pred_map[row["image_key"]]
    return float(pred_npz[row["target_name"]][i])

test_sub["target"] = test_sub.apply(get_pred, axis=1)

out = test_sub[["sample_id","target"]].copy()
sub_path = RUN_DIR/"submissions"/"submission_modelA.csv"
out.to_csv(sub_path, index=False)

print("Saved submission:", sub_path)
print(out.head())


Saved submission: /content/drive/MyDrive/CSIRO/runs/run_001/submissions/submission_modelA.csv
                    sample_id     target
0  ID1001187975__Dry_Clover_g   6.400000
1    ID1001187975__Dry_Dead_g  17.512501
2   ID1001187975__Dry_Green_g  37.831249
3   ID1001187975__Dry_Total_g  61.743752
4         ID1001187975__GDM_g  44.231251
