In [1]:
# ============================================================
# CSIRO Biomass - 0.56-style training recipe
# Dual backbone: convnext_base + tf_efficientnetv2_s (local safetensors)
# 3 outputs (Green/Clover/Dead) + derived GDM/Total
# 20 epochs, KFold, TTA, ensemble
# ============================================================

TRAIN = True
DEBUG = False  # if True: 2 folds, 5 epochs

import os
from pathlib import Path
import numpy as np
import pandas as pd
from PIL import Image
from tqdm.auto import tqdm

import torch
import torch.nn as nn
import torch.optim as optim
from torch.optim import AdamW
from torch.utils.data import Dataset, DataLoader
from torchvision import transforms
import timm

tqdm.pandas()

# ----------------------------
# Paths
# ----------------------------
LOCAL = os.environ.get("KAGGLE_KERNEL_RUN_TYPE", "") == ""
DATA_ROOT = "../input/" if LOCAL else "/kaggle/input/csiro-biomass/"
OUTPUT_DIR = "./trained_models"
os.makedirs(OUTPUT_DIR, exist_ok=True)

CONVNEXT_CKPT = "/kaggle/input/convnext-base-imagenet22k-imagenet1k-weights/pytorch/default/1/convnext_base_pretrained.safetensors"
EFFNETV2_CKPT = "/kaggle/input/effnetv2/pytorch/default/1/tf_efficientnetv2_s_pretrained.safetensors"

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print("Device:", device)

# ----------------------------
# Data: train wide (one row per image/sample)
# ----------------------------
train_df = pd.read_csv(f"{DATA_ROOT}/train.csv")
train_df[["sample_id_prefix", "sample_id_suffix"]] = train_df.sample_id.str.split("__", expand=True)

META_COLS = ["sample_id_prefix", "image_path", "Sampling_Date", "State", "Species", "Pre_GSHH_NDVI", "Height_Ave_cm"]
wide = (
    train_df.pivot_table(
        index=META_COLS,
        columns="target_name",
        values="target",
        aggfunc="first",
    )
    .reset_index()
)

REQ = ["Dry_Green_g", "Dry_Clover_g", "Dry_Dead_g"]
for c in REQ:
    if c not in wide.columns:
        wide[c] = 0.0

# KFold
from sklearn.model_selection import KFold
NFOLD = 5
kf = KFold(n_splits=NFOLD, shuffle=True, random_state=42)
wide["fold"] = -1
for f, (_, va_idx) in enumerate(kf.split(wide)):
    wide.loc[va_idx, "fold"] = f

print("Train rows:", len(wide))

# ----------------------------
# Metric: weighted R2 on 5 targets (3 + derived)
# ----------------------------
def weighted_r2_score(y_true: np.ndarray, y_pred: np.ndarray):
    weights = np.array([0.1, 0.1, 0.1, 0.2, 0.5], dtype=np.float32)
    r2s = []
    for i in range(5):
        yt = y_true[:, i]
        yp = y_pred[:, i]
        ss_res = np.sum((yt - yp) ** 2)
        ss_tot = np.sum((yt - np.mean(yt)) ** 2)
        r2 = 1.0 - (ss_res / ss_tot) if ss_tot > 0 else 0.0
        r2s.append(r2)
    r2s = np.array(r2s, dtype=np.float32)
    return float(np.sum(r2s * weights) / np.sum(weights)), r2s

def calc_metric(outputs3: np.ndarray, targets3: np.ndarray):
    y_true = np.column_stack([
        targets3,
        targets3[:, 0] + targets3[:, 1],                 # GDM
        targets3[:, 0] + targets3[:, 1] + targets3[:, 2]  # Total
    ])
    y_pred = np.column_stack([
        outputs3,
        outputs3[:, 0] + outputs3[:, 1],
        outputs3[:, 0] + outputs3[:, 1] + outputs3[:, 2]
    ])
    return weighted_r2_score(y_true, y_pred)

# ----------------------------
# Loss: same idea as 0.56 notebook
# ----------------------------
class BiomassLoss(nn.Module):
    def __init__(self):
        super().__init__()
        self.base = nn.SmoothL1Loss()

    def forward(self, out, y):
        lg = self.base(out[:, 0], y[:, 0])
        lc = self.base(out[:, 1], y[:, 1])
        ld = self.base(out[:, 2], y[:, 2])

        pred_gdm = out[:, 0] + out[:, 1]
        true_gdm = y[:, 0] + y[:, 1]
        l_gdm = self.base(pred_gdm, true_gdm)

        pred_total = out.sum(dim=1)
        true_total = y.sum(dim=1)
        l_total = self.base(pred_total, true_total)

        return (1.0*lg + 1.0*lc + 1.0*ld + 0.5*l_gdm + 1.0*l_total)

# ----------------------------
# Dataset / Dataloader: RESIZE(512,512) like your 0.56 code
# ----------------------------
def make_transform(img_size=512, aug=True):
    if aug:
        return transforms.Compose([
            transforms.Resize((img_size, img_size)),
            transforms.RandomHorizontalFlip(p=0.5),
            transforms.RandomVerticalFlip(p=0.5),
            transforms.ColorJitter(brightness=0.1, contrast=0.1, saturation=0.1, hue=0.05),
            transforms.ToTensor(),
            transforms.Normalize(mean=[0.485,0.456,0.406], std=[0.229,0.224,0.225]),
        ])
    else:
        return transforms.Compose([
            transforms.Resize((img_size, img_size)),
            transforms.ToTensor(),
            transforms.Normalize(mean=[0.485,0.456,0.406], std=[0.229,0.224,0.225]),
        ])

class BiomassDataset(Dataset):
    def __init__(self, df, img_size=512, aug=True, train=True):
        self.df = df.reset_index(drop=True)
        self.train = train
        self.tfm = make_transform(img_size=img_size, aug=aug)
        self.img_size = img_size

    def __len__(self):
        return len(self.df)

    def __getitem__(self, idx):
        row = self.df.iloc[idx]
        img = Image.open(os.path.join(DATA_ROOT, row["image_path"])).convert("RGB")
        img = self.tfm(img)

        if self.train:
            y = torch.tensor([row["Dry_Green_g"], row["Dry_Clover_g"], row["Dry_Dead_g"]], dtype=torch.float32)
            return img, y
        else:
            return img

def make_loader(df, img_size=512, batch_size=8, shuffle=True, aug=True, train=True):
    ds = BiomassDataset(df, img_size=img_size, aug=aug, train=train)
    return DataLoader(ds, batch_size=batch_size, shuffle=shuffle, num_workers=2, pin_memory=True)

# ----------------------------
# Safetensors load into timm model
# ----------------------------
def load_safetensors(path: str):
    import safetensors.torch as st
    sd = st.load_file(path)
    # strip common wrappers if present
    if all(k.startswith("model.") for k in sd.keys()):
        sd = {k.replace("model.", "", 1): v for k,v in sd.items()}
    if all(k.startswith("module.") for k in sd.keys()):
        sd = {k.replace("module.", "", 1): v for k,v in sd.items()}
    return sd

def strip_head(sd: dict):
    drop = ("head.", "fc.", "classifier.")
    return {k:v for k,v in sd.items() if not k.startswith(drop)}

def init_pretrained(model, ckpt_path):
    if not ckpt_path or not Path(ckpt_path).exists():
        print("[init] ckpt missing:", ckpt_path)
        return
    sd = strip_head(load_safetensors(ckpt_path))
    missing, unexpected = model.load_state_dict(sd, strict=False)
    print(f"[init] loaded: {Path(ckpt_path).name} | missing={len(missing)} unexpected={len(unexpected)}")

def get_model(model_name, ckpt_path):
    m = timm.create_model(model_name, pretrained=False, num_classes=3)
    init_pretrained(m, ckpt_path)
    return m

# ----------------------------
# Train / Validate (torch.amp, grad accum)
# ----------------------------
def train_epoch(model, loader, criterion, optimizer, scaler, accum=4):
    model.train()
    total = 0.0
    optimizer.zero_grad(set_to_none=True)

    for step, (x, y) in enumerate(loader):
        x = x.to(device, non_blocking=True)
        y = y.to(device, non_blocking=True)

        with torch.amp.autocast(device_type="cuda", enabled=(device.type=="cuda")):
            out = model(x)
            loss = criterion(out, y) / accum

        scaler.scale(loss).backward()

        if (step + 1) % accum == 0:
            scaler.step(optimizer)
            scaler.update()
            optimizer.zero_grad(set_to_none=True)

        total += float(loss.item()) * accum

    if len(loader) % accum != 0:
        scaler.step(optimizer)
        scaler.update()
        optimizer.zero_grad(set_to_none=True)

    return total / len(loader)

@torch.no_grad()
def validate(model, loader, criterion):
    model.eval()
    total = 0.0
    outs, trgs = [], []

    for x, y in loader:
        x = x.to(device, non_blocking=True)
        y = y.to(device, non_blocking=True)

        with torch.amp.autocast(device_type="cuda", enabled=(device.type=="cuda")):
            out = model(x)
            loss = criterion(out, y)

        total += float(loss.item())
        outs.append(out.detach().cpu())
        trgs.append(y.detach().cpu())

    outs = torch.cat(outs).numpy()
    trgs = torch.cat(trgs).numpy()
    w_r2, r2s = calc_metric(outs, trgs)
    return total/len(loader), w_r2, r2s

def train_fold(df, fold, model_name, ckpt_path, img_size=512, real_bs=8, accum=4, lr=1e-3,
               epochs=20, patience=5, tag="model"):
    tr_df = df[df.fold != fold].copy()
    va_df = df[df.fold == fold].copy()

    tr_loader = make_loader(tr_df, img_size=img_size, batch_size=real_bs, shuffle=True, aug=True, train=True)
    va_loader = make_loader(va_df, img_size=img_size, batch_size=real_bs, shuffle=False, aug=False, train=True)

    model = get_model(model_name, ckpt_path).to(device)
    criterion = BiomassLoss()
    optimizer = AdamW(model.parameters(), lr=lr)
    scheduler = optim.lr_scheduler.ReduceLROnPlateau(optimizer, mode="max", factor=0.5, patience=max(1,patience//2))
    scaler = torch.amp.GradScaler("cuda", enabled=(device.type=="cuda"))

    best = -1e9
    bad = 0

    for ep in range(epochs):
        tr_loss = train_epoch(model, tr_loader, criterion, optimizer, scaler, accum=accum)
        va_loss, wR2, r2s = validate(model, va_loader, criterion)
        scheduler.step(wR2)

        # print EVERY epoch (no confusion)
        print(f"{tag} | fold {fold} | ep {ep:03d}/{epochs} | tr={tr_loss:.4f} va={va_loss:.4f} "
              f"wR2={wR2:.4f} lr={optimizer.param_groups[0]['lr']:.2e} "
              f"r2=[{', '.join([f'{x:.3f}' for x in r2s])}]")

        if wR2 > best:
            best = wR2
            bad = 0
            torch.save(model.state_dict(), f"{OUTPUT_DIR}/{tag}_best_fold{fold}.pth")
        else:
            bad += 1

        if bad >= patience:
            print(f"Early stop {tag} fold {fold} | best wR2={best:.4f}")
            break

    print(f"Done {tag} fold {fold} | best wR2={best:.4f}")
    return best

# ----------------------------
# Train both backbones
# ----------------------------
SPECS = [
    {"tag":"convnext_base", "name":"convnext_base", "ckpt":CONVNEXT_CKPT, "img":512, "bs":6, "accum":4, "lr":3e-4},
    {"tag":"effnetv2_s",    "name":"tf_efficientnetv2_s", "ckpt":EFFNETV2_CKPT, "img":512, "bs":8, "accum":4, "lr":1e-3},
]

EPOCHS = 5 if DEBUG else 20
RUN_FOLDS = 2 if DEBUG else NFOLD

if TRAIN:
    for spec in SPECS:
        print("\n" + "="*80)
        print(f"Training {spec['tag']} | img={spec['img']} | lr={spec['lr']}")
        scores = []
        for f in range(RUN_FOLDS):
            scores.append(train_fold(
                wide, f,
                model_name=spec["name"],
                ckpt_path=spec["ckpt"],
                img_size=spec["img"],
                real_bs=spec["bs"],
                accum=spec["accum"],
                lr=spec["lr"],
                epochs=EPOCHS,
                patience=5,
                tag=spec["tag"]
            ))
        print(spec["tag"], "CV avg:", float(np.mean(scores)))

# ----------------------------
# Test wide
# ----------------------------
test_df = pd.read_csv(f"{DATA_ROOT}/test.csv")
test_df[["sample_id_prefix", "sample_id_suffix"]] = test_df.sample_id.str.split("__", expand=True)

# One row per image
test_wide = test_df[["sample_id_prefix", "image_path"]].drop_duplicates().reset_index(drop=True)

# ----------------------------
# TTA predict + fold ensemble + backbone ensemble
# ----------------------------
@torch.no_grad()
def predict_tta(model, loader):
    model.eval()
    preds = []
    for x in loader:
        x = x.to(device, non_blocking=True)
        with torch.amp.autocast(device_type="cuda", enabled=(device.type=="cuda")):
            p1 = model(x)
            p2 = model(torch.flip(x, dims=[3]))
            p3 = model(torch.flip(x, dims=[2]))
            p4 = model(torch.flip(x, dims=[2,3]))
            p = (p1+p2+p3+p4)/4.0
        preds.append(p.detach().cpu())
    p = torch.cat(preds).numpy()
    return np.clip(p, 0, None)

def predict_backbone(spec):
    # loader
    ds = BiomassDataset(test_wide, img_size=spec["img"], aug=False, train=False)
    loader = DataLoader(ds, batch_size=32, shuffle=False, num_workers=2, pin_memory=True)

    # fold models
    fold_files = sorted(Path(OUTPUT_DIR).glob(f"{spec['tag']}_best_fold*.pth"))
    if len(fold_files) == 0:
        raise FileNotFoundError(f"No fold models found for {spec['tag']} in {OUTPUT_DIR}")

    all_preds = []
    for ff in fold_files:
        model = get_model(spec["name"], spec["ckpt"]).to(device)
        model.load_state_dict(torch.load(ff, map_location="cpu"), strict=True)
        all_preds.append(predict_tta(model, loader))

    return np.mean(all_preds, axis=0)

preds_list = []
for spec in SPECS:
    preds_list.append(predict_backbone(spec))

preds3 = np.mean(preds_list, axis=0)

test_wide["Dry_Green_g"]  = preds3[:, 0]
test_wide["Dry_Clover_g"] = preds3[:, 1]
test_wide["Dry_Dead_g"]   = preds3[:, 2]

# PHYSICS CONSISTENCY (CRITICAL)
test_wide["GDM_g"]       = test_wide["Dry_Green_g"] + test_wide["Dry_Clover_g"]
test_wide["Dry_Total_g"] = test_wide["GDM_g"] + test_wide["Dry_Dead_g"]

# submission
cols = ["Dry_Clover_g","Dry_Dead_g","Dry_Green_g","Dry_Total_g","GDM_g"]
sub = test_wide.set_index("sample_id_prefix")[cols].stack().reset_index()
sub.columns = ["sample_id_prefix","target_name","target"]
sub["sample_id"] = sub["sample_id_prefix"] + "__" + sub["target_name"]
sub = sub[["sample_id","target"]]
sub.to_csv("submission.csv", index=False)

print(sub.head())
print("Saved submission.csv")



Device: cuda
Train rows: 357

Training convnext_base | img=512 | lr=0.0003
[init] loaded: convnext_base_pretrained.safetensors | missing=4 unexpected=0
convnext_base | fold 0 | ep 000/20 | tr=88.8082 va=61.8446 wR2=-0.3130 lr=3.00e-04 r2=[-0.201, -0.036, -0.083, -0.320, -0.434]
convnext_base | fold 0 | ep 001/20 | tr=63.8634 va=58.2325 wR2=-0.1555 lr=3.00e-04 r2=[-0.029, -0.126, -0.037, -0.150, -0.213]
convnext_base | fold 0 | ep 002/20 | tr=60.7742 va=53.5647 wR2=-0.0090 lr=3.00e-04 r2=[0.072, -0.118, -0.014, 0.032, -0.019]
convnext_base | fold 0 | ep 003/20 | tr=58.9980 va=51.3563 wR2=0.0433 lr=3.00e-04 r2=[0.107, 0.100, 0.012, 0.123, -0.006]
convnext_base | fold 0 | ep 004/20 | tr=54.0013 va=47.0044 wR2=0.2116 lr=3.00e-04 r2=[0.162, 0.200, 0.085, 0.279, 0.222]
convnext_base | fold 0 | ep 005/20 | tr=52.4135 va=44.5260 wR2=0.2915 lr=3.00e-04 r2=[0.263, 0.229, 0.176, 0.323, 0.320]
convnext_base | fold 0 | ep 006/20 | tr=50.1436 va=43.1924 wR2=0.3125 lr=3.00e-04 r2=[0.290, 0.213, 0.163

  has_large_values = (abs_vals > 1e6).any()
