## Step 0

In [9]:
!ls

OneDrive_1_01-02-2026.zip  sample_data


In [10]:
!unzip OneDrive_1_01-02-2026.zip

Archive:  OneDrive_1_01-02-2026.zip
 extracting: Test.zip                
 extracting: Training.zip            


In [11]:
!unzip Test.zip

Archive:  Test.zip
   creating: Test/
  inflating: Test/Test.csv           
   creating: Test/RGBImages/
  inflating: Test/RGBImages/RGB_4.png  
  inflating: Test/RGBImages/RGB_5.png  
  inflating: Test/RGBImages/RGB_13.png  
  inflating: Test/RGBImages/RGB_17.png  
  inflating: Test/RGBImages/RGB_24.png  
  inflating: Test/RGBImages/RGB_30.png  
  inflating: Test/RGBImages/RGB_31.png  
  inflating: Test/RGBImages/RGB_36.png  
  inflating: Test/RGBImages/RGB_37.png  
  inflating: Test/RGBImages/RGB_38.png  
  inflating: Test/RGBImages/RGB_41.png  
  inflating: Test/RGBImages/RGB_47.png  
  inflating: Test/RGBImages/RGB_53.png  
  inflating: Test/RGBImages/RGB_61.png  
  inflating: Test/RGBImages/RGB_62.png  
  inflating: Test/RGBImages/RGB_65.png  
  inflating: Test/RGBImages/RGB_69.png  
  inflating: Test/RGBImages/RGB_73.png  
  inflating: Test/RGBImages/RGB_75.png  
  inflating: Test/RGBImages/RGB_83.png  
  inflating: Test/RGBImages/RGB_90.png  
  inflating: Test/RGBImages/RGB_91.p

In [12]:
!unzip Training.zip

Archive:  Training.zip
   creating: Training/
   creating: Training/RGBImages/
  inflating: Training/RGBImages/RGB_1.png  
  inflating: Training/RGBImages/RGB_2.png  
  inflating: Training/RGBImages/RGB_3.png  
  inflating: Training/RGBImages/RGB_6.png  
  inflating: Training/RGBImages/RGB_8.png  
  inflating: Training/RGBImages/RGB_9.png  
  inflating: Training/RGBImages/RGB_11.png  
  inflating: Training/RGBImages/RGB_14.png  
  inflating: Training/RGBImages/RGB_15.png  
  inflating: Training/RGBImages/RGB_16.png  
  inflating: Training/RGBImages/RGB_18.png  
  inflating: Training/RGBImages/RGB_19.png  
  inflating: Training/RGBImages/RGB_20.png  
  inflating: Training/RGBImages/RGB_21.png  
  inflating: Training/RGBImages/RGB_22.png  
  inflating: Training/RGBImages/RGB_25.png  
  inflating: Training/RGBImages/RGB_27.png  
  inflating: Training/RGBImages/RGB_28.png  
  inflating: Training/RGBImages/RGB_29.png  
  inflating: Training/RGBImages/RGB_32.png  
  inflating: Training/RGBIm

## Step 1

In [13]:
import os
from pathlib import Path
import numpy as np
import pandas as pd
from PIL import Image

# ============
# CONFIG
# ============
# Set this to the folder that contains: Training/, Test/
# Example (if you uploaded a zip and extracted under /content/data):
DATA_ROOT = Path("/content/data")

TRAIN_DIR = DATA_ROOT / "Training"
TEST_DIR  = DATA_ROOT / "Test"

TRAIN_CSV = TRAIN_DIR / "Train.csv"
TEST_CSV  = TEST_DIR / "Test.csv"

TRAIN_RGB_DIR   = TRAIN_DIR / "RGBImages"
TRAIN_DEPTH_DIR = TRAIN_DIR / "DepthImages"

TEST_RGB_DIR    = TEST_DIR / "RGBImages"
TEST_DEPTH_DIR  = TEST_DIR / "DepthImages"

print("DATA_ROOT:", DATA_ROOT)
print("Train CSV exists:", TRAIN_CSV.exists(), TRAIN_CSV)
print("Test  CSV exists:", TEST_CSV.exists(), TEST_CSV)


DATA_ROOT: /content/data
Train CSV exists: True /content/data/Training/Train.csv
Test  CSV exists: True /content/data/Test/Test.csv


In [14]:
train_df = pd.read_csv(TRAIN_CSV)
test_df  = pd.read_csv(TEST_CSV)

print("Train shape:", train_df.shape)
print("Test  shape:", test_df.shape)
print("\nTrain columns:", list(train_df.columns))
print("Test  columns:", list(test_df.columns))

# Basic column expectations
required_train_cols = {"image_id","Height","Diameter","LeafArea","FreshWeightShoot","DryWeightShoot"}
missing_train_cols = required_train_cols - set(train_df.columns)
print("\nMissing required train columns:", missing_train_cols)

print("\nTrain head:\n", train_df.head(3))
print("\nTest head:\n", test_df.head(3))


Train shape: (231, 7)
Test  shape: (76, 2)

Train columns: ['image_id', 'Height', 'Diameter', 'LeafArea', 'FreshWeightShoot', 'Variety', 'DryWeightShoot']
Test  columns: ['image_id', 'DryWeightShoot']

Missing required train columns: set()

Train head:
    image_id  Height  Diameter  LeafArea  FreshWeightShoot   Variety  \
0        15     5.1      16.1      87.6               3.2  Aphylion   
1        16     9.6      18.2     102.4               3.8  Aphylion   
2        18     6.2      17.6      84.5               3.1  Aphylion   

   DryWeightShoot  
0            0.16  
1            0.16  
2            0.14  

Test head:
    image_id  DryWeightShoot
0        17             NaN
1        47             NaN
2        53             NaN


In [15]:
def expected_paths(image_id: int, rgb_dir: Path, depth_dir: Path):
    rgb_path = rgb_dir / f"RGB_{image_id}.png"
    depth_path = depth_dir / f"Depth_{image_id}.png"
    return rgb_path, depth_path

def check_split(df: pd.DataFrame, rgb_dir: Path, depth_dir: Path, name: str):
    missing_rgb = []
    missing_depth = []

    for image_id in df["image_id"].astype(int).tolist():
        rgb_path, depth_path = expected_paths(image_id, rgb_dir, depth_dir)
        if not rgb_path.exists():
            missing_rgb.append(str(rgb_path))
        if not depth_path.exists():
            missing_depth.append(str(depth_path))

    print(f"\n[{name}] total rows:", len(df))
    print(f"[{name}] missing RGB:", len(missing_rgb))
    print(f"[{name}] missing Depth:", len(missing_depth))

    if missing_rgb:
        print("  Example missing RGB:", missing_rgb[0])
    if missing_depth:
        print("  Example missing Depth:", missing_depth[0])

    return missing_rgb, missing_depth

train_missing_rgb, train_missing_depth = check_split(train_df, TRAIN_RGB_DIR, TRAIN_DEPTH_DIR, "TRAIN")
test_missing_rgb,  test_missing_depth  = check_split(test_df,  TEST_RGB_DIR,  TEST_DEPTH_DIR,  "TEST")



[TRAIN] total rows: 231
[TRAIN] missing RGB: 1
[TRAIN] missing Depth: 0
  Example missing RGB: /content/data/Training/RGBImages/RGB_332.png

[TEST] total rows: 76
[TEST] missing RGB: 0
[TEST] missing Depth: 0


In [16]:
# Pick a sample id that exists in train
sample_id = int(train_df["image_id"].iloc[0])

rgb_path, depth_path = expected_paths(sample_id, TRAIN_RGB_DIR, TRAIN_DEPTH_DIR)
print("Sample image_id:", sample_id)
print("RGB path:", rgb_path)
print("Depth path:", depth_path)

rgb = Image.open(rgb_path)
depth = Image.open(depth_path)

print("\nRGB mode/size:", rgb.mode, rgb.size)
print("Depth mode/size:", depth.mode, depth.size)

# Convert depth to numpy for quick stats
depth_np = np.array(depth)
print("\nDepth dtype:", depth_np.dtype)
print("Depth min/max:", int(depth_np.min()), int(depth_np.max()))
print("Depth percentiles (50/90/95/99):",
      np.percentile(depth_np, [50,90,95,99]).astype(int))


Sample image_id: 15
RGB path: /content/data/Training/RGBImages/RGB_15.png
Depth path: /content/data/Training/DepthImages/Depth_15.png

RGB mode/size: RGB (1920, 1080)
Depth mode/size: I;16 (1920, 1080)

Depth dtype: uint16
Depth min/max: 0 25729
Depth percentiles (50/90/95/99): [1140 1324 1361 1374]


In [17]:
print("\n===== SUMMARY =====")
print("Train rows:", len(train_df), " Test rows:", len(test_df))
print("Train missing RGB:", len(train_missing_rgb), " Train missing Depth:", len(train_missing_depth))
print("Test  missing RGB:", len(test_missing_rgb),  " Test  missing Depth:", len(test_missing_depth))
print("===================\n")

if len(train_missing_rgb) == 0 and len(train_missing_depth) == 0 and len(test_missing_rgb) == 0 and len(test_missing_depth) == 0:
    print("✅ Step 1 passed: CSVs load and all images are present.")
else:
    print("⚠️ Step 1 found missing files. We'll fix paths / IDs before continuing.")



===== SUMMARY =====
Train rows: 231  Test rows: 76
Train missing RGB: 1  Train missing Depth: 0
Test  missing RGB: 0  Test  missing Depth: 0

⚠️ Step 1 found missing files. We'll fix paths / IDs before continuing.


In [18]:
missing_id = 332

# confirm it exists in Train.csv
print("Rows in Train.csv with image_id=332:")
display(train_df[train_df["image_id"] == missing_id])

# check if any file contains "332" in the RGB directory
matches = sorted([p.name for p in TRAIN_RGB_DIR.glob("*332*")])
print("\nFiles in Training/RGBImages containing '332':", matches)

# also check in Test RGB dir just in case
matches_test = sorted([p.name for p in TEST_RGB_DIR.glob("*332*")])
print("\nFiles in Test/RGBImages containing '332':", matches_test)

# list a few nearby IDs (to see naming pattern)
nearby = sorted([p.name for p in TRAIN_RGB_DIR.glob("RGB_33*.png")])[:20]
print("\nSome nearby RGB_33*.png files:", nearby)


Rows in Train.csv with image_id=332:


Unnamed: 0,image_id,Height,Diameter,LeafArea,FreshWeightShoot,Variety,DryWeightShoot
166,332,15.0,29.0,4153.3,232.5,Salanova,10.49



Files in Training/RGBImages containing '332': []

Files in Test/RGBImages containing '332': []

Some nearby RGB_33*.png files: ['RGB_330.png', 'RGB_334.png', 'RGB_335.png', 'RGB_337.png', 'RGB_338.png']


In [19]:
# Drop rows whose RGB or Depth file is missing (train only)
def has_both_images(image_id: int, rgb_dir: Path, depth_dir: Path) -> bool:
    rgb_path = rgb_dir / f"RGB_{image_id}.png"
    depth_path = depth_dir / f"Depth_{image_id}.png"
    return rgb_path.exists() and depth_path.exists()

mask = train_df["image_id"].astype(int).apply(lambda x: has_both_images(x, TRAIN_RGB_DIR, TRAIN_DEPTH_DIR))
train_df_clean = train_df[mask].reset_index(drop=True)

print("Original train rows:", len(train_df))
print("Cleaned  train rows:", len(train_df_clean))
print("Dropped IDs:", sorted(set(train_df["image_id"]) - set(train_df_clean["image_id"])))


Original train rows: 231
Cleaned  train rows: 230
Dropped IDs: [332]


In [20]:
# Are there any duplicate IDs?
print("Test duplicate image_ids:", test_df["image_id"].duplicated().sum())

# Do all Test.csv IDs have corresponding files?
test_missing_rgb2, test_missing_depth2 = check_split(test_df, TEST_RGB_DIR, TEST_DEPTH_DIR, "TEST (recheck)")


Test duplicate image_ids: 0

[TEST (recheck)] total rows: 76
[TEST (recheck)] missing RGB: 0
[TEST (recheck)] missing Depth: 0


## Step 2

In [21]:
from tqdm import tqdm

# Use cleaned train df
train_df = train_df_clean

def load_depth_uint16(path: Path) -> np.ndarray:
    d = Image.open(path)
    return np.array(d)

# Compute a robust scale based on p99 across images
per_image_p99 = []
for image_id in tqdm(train_df["image_id"].astype(int).tolist(), desc="Computing per-image depth p99"):
    depth_path = TRAIN_DEPTH_DIR / f"Depth_{image_id}.png"
    d = load_depth_uint16(depth_path)
    per_image_p99.append(np.percentile(d, 99))

DEPTH_P99 = float(np.percentile(per_image_p99, 50))  # median of per-image p99 values
print("Median(per-image depth p99):", DEPTH_P99)

# Safety fallback
if DEPTH_P99 <= 0:
    DEPTH_P99 = 65535.0

print("DEPTH_P99 used for normalization:", DEPTH_P99)

Computing per-image depth p99: 100%|██████████| 230/230 [00:07<00:00, 31.05it/s]

Median(per-image depth p99): 1374.0
DEPTH_P99 used for normalization: 1374.0





In [28]:
import torch
from torch.utils.data import Dataset

IMG_SIZE = 256

# RGB normalization (simple, safe baseline)
RGB_MEAN = (0.5, 0.5, 0.5)
RGB_STD  = (0.25, 0.25, 0.25)

def load_rgb_tensor(path: Path, size=IMG_SIZE) -> torch.Tensor:
    img = Image.open(path).convert("RGB").resize((size, size), Image.BILINEAR)
    arr = np.asarray(img).astype(np.float32) / 255.0  # HWC, [0,1]
    # normalize
    arr = (arr - np.array(RGB_MEAN)) / np.array(RGB_STD)
    # HWC -> CHW
    arr = np.transpose(arr, (2,0,1))
    return torch.from_numpy(arr).float()

def load_depth_tensor(path: Path, size=IMG_SIZE, depth_p99=DEPTH_P99) -> torch.Tensor:
    img = Image.open(path)  # 16-bit
    img = img.resize((size, size), Image.NEAREST)  # nearest preserves discrete depth better
    d = np.asarray(img).astype(np.float32)
    # robust clip + normalize to [0,1]
    d = np.clip(d, 0, depth_p99) / depth_p99
    # add channel dim: 1xHxW
    d = d[None, :, :]
    return torch.from_numpy(d).float()

# Auxiliary standardization stats (computed from train)
AUX_COLS = ["Height","Diameter","LeafArea","FreshWeightShoot"]
aux_mean = train_df[AUX_COLS].mean().values.astype(np.float32)
aux_std  = train_df[AUX_COLS].std(ddof=0).values.astype(np.float32)
aux_std[aux_std == 0] = 1.0

print("Aux mean:", aux_mean)
print("Aux std :", aux_std)

class LettuceDataset(Dataset):
    def __init__(self, df: pd.DataFrame, rgb_dir: Path, depth_dir: Path, train_mode: bool):
        self.df = df.reset_index(drop=True)
        self.rgb_dir = rgb_dir
        self.depth_dir = depth_dir
        self.train_mode = train_mode

    def __len__(self):
        return len(self.df)

    def __getitem__(self, idx: int):
        row = self.df.iloc[idx]
        image_id = int(row["image_id"])

        rgb_path   = self.rgb_dir / f"RGB_{image_id}.png"
        depth_path = self.depth_dir / f"Depth_{image_id}.png"

        rgb = load_rgb_tensor(rgb_path)
        depth = load_depth_tensor(depth_path)

        x = torch.cat([rgb, depth], dim=0)  # 4xHxW

        if self.train_mode:
            y_main = torch.tensor([float(row["DryWeightShoot"])], dtype=torch.float32)

            aux = row[AUX_COLS].values.astype(np.float32)
            aux = (aux - aux_mean) / aux_std
            y_aux = torch.from_numpy(aux)

            return x, y_main, y_aux, image_id
        else:
            return x, image_id


Aux mean: [  12.399131   22.873478 1894.5353    117.21435 ]
Aux std : [   4.822682    6.386966 1577.2528    110.85202 ]


In [23]:
ds = LettuceDataset(train_df.iloc[:5], TRAIN_RGB_DIR, TRAIN_DEPTH_DIR, train_mode=True)

x, y_main, y_aux, image_id = ds[0]
print("image_id:", image_id)
print("x shape:", x.shape)           # expect [4, 256, 256]
print("y_main:", y_main, y_main.shape)  # expect [1]
print("y_aux shape:", y_aux.shape)   # expect [4]

# Check value ranges
print("RGB channels stats (mean/std):", x[:3].mean().item(), x[:3].std().item())
print("Depth channel stats (min/max):", x[3].min().item(), x[3].max().item())


image_id: 15
x shape: torch.Size([4, 256, 256])
y_main: tensor([0.1600]) torch.Size([1])
y_aux shape: torch.Size([4])
RGB channels stats (mean/std): -0.180261590139177 0.5769466877044992
Depth channel stats (min/max): 0.0 1.0


In [24]:
from torch.utils.data import DataLoader

loader = DataLoader(ds, batch_size=2, shuffle=False, num_workers=0)

batch = next(iter(loader))
xb, yb, yauxb, ids = batch
print("Batch x:", xb.shape)      # [2,4,256,256]
print("Batch y:", yb.shape)      # [2,1]
print("Batch yaux:", yauxb.shape)# [2,4]
print("Batch ids:", ids)


Batch x: torch.Size([2, 4, 256, 256])
Batch y: torch.Size([2, 1])
Batch yaux: torch.Size([2, 4])
Batch ids: tensor([15, 16])


## Step 3

In [31]:
import torch
import torch.nn as nn
import torch.nn.functional as F

class SmallCNNMultiTask(nn.Module):
    def __init__(self, dropout=0.3):
        super().__init__()

        def block(in_ch, out_ch):
            return nn.Sequential(
                nn.Conv2d(in_ch, out_ch, kernel_size=3, padding=1, bias=False),
                nn.BatchNorm2d(out_ch),
                nn.ReLU(inplace=True),
                nn.MaxPool2d(2)
            )

        self.backbone = nn.Sequential(
            block(4, 32),    # 256 -> 128
            block(32, 64),   # 128 -> 64
            block(64, 128),  # 64 -> 32
            block(128, 256)  # 32 -> 16
        )

        self.pool = nn.AdaptiveAvgPool2d(1)  # -> [B,256,1,1]
        self.fc = nn.Sequential(
            nn.Linear(256, 128),
            nn.ReLU(inplace=True),
            nn.Dropout(dropout),
        )

        self.head_main = nn.Linear(128, 1)   # DryWeightShoot
        self.head_aux  = nn.Linear(128, 4)   # Height, Diameter, LeafArea, FreshWeightShoot (standardized)

    def forward(self, x):
        feats = self.backbone(x)
        feats = self.pool(feats).flatten(1)      # [B,256]
        h = self.fc(feats)                       # [B,128]
        y_main = self.head_main(h)               # [B,1]
        y_aux  = self.head_aux(h)                # [B,4]
        return y_main, y_aux


In [32]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print("Device:", device)

model = SmallCNNMultiTask(dropout=0.3).to(device)

ds_small = LettuceDataset(train_df.iloc[:8], TRAIN_RGB_DIR, TRAIN_DEPTH_DIR, train_mode=True)
loader_small = DataLoader(ds_small, batch_size=4, shuffle=False, num_workers=0)

xb, yb, yauxb, ids = next(iter(loader_small))

# Force float32 + move to GPU
xb = xb.to(device).float()

with torch.no_grad():
    pred_main, pred_aux = model(xb)

print("xb dtype:", xb.dtype)
print("pred_main shape:", pred_main.shape)  # [4,1]
print("pred_aux shape :", pred_aux.shape)   # [4,4]
print("pred_main sample:", pred_main[:2].view(-1).detach().cpu().numpy())


Device: cuda
xb dtype: torch.float32
pred_main shape: torch.Size([4, 1])
pred_aux shape : torch.Size([4, 4])
pred_main sample: [0.5440216  0.31083065]


In [30]:
mae = nn.L1Loss()
mse = nn.MSELoss()

LAMBDA_AUX = 0.3  # starting point

def compute_loss(pred_main, pred_aux, y_main, y_aux, lam=LAMBDA_AUX):
    loss_main = mae(pred_main, y_main)
    loss_aux  = mse(pred_aux, y_aux)
    loss_total = loss_main + lam * loss_aux
    return loss_total, loss_main, loss_aux


## Step 4

In [33]:
from torch.utils.data import DataLoader
import torch.optim as optim
import time

# Use a very small subset
tiny_df = train_df.sample(n=10, random_state=42).reset_index(drop=True)

tiny_ds = LettuceDataset(tiny_df, TRAIN_RGB_DIR, TRAIN_DEPTH_DIR, train_mode=True)
tiny_loader = DataLoader(tiny_ds, batch_size=5, shuffle=True, num_workers=0)

print("Tiny dataset size:", len(tiny_ds))
print(tiny_df[["image_id","DryWeightShoot"]].head())


Tiny dataset size: 10
   image_id  DryWeightShoot
0       371           10.11
1        70            1.82
2       101            1.79
3       183            2.80
4       110            1.42


In [34]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model = SmallCNNMultiTask(dropout=0.0).to(device)  # dropout off to help memorization

optimizer = optim.Adam(model.parameters(), lr=1e-3, weight_decay=0.0)

mae = nn.L1Loss()
mse = nn.MSELoss()
LAMBDA_AUX = 0.3

def compute_loss(pred_main, pred_aux, y_main, y_aux, lam=LAMBDA_AUX):
    loss_main = mae(pred_main, y_main)
    loss_aux  = mse(pred_aux, y_aux)
    return loss_main + lam * loss_aux, loss_main, loss_aux

model.train()
num_epochs = 200

for epoch in range(1, num_epochs + 1):
    total, main_total, aux_total = 0.0, 0.0, 0.0

    for xb, yb, yauxb, ids in tiny_loader:
        xb = xb.to(device).float()
        yb = yb.to(device).float()
        yauxb = yauxb.to(device).float()

        optimizer.zero_grad()
        pred_main, pred_aux = model(xb)

        loss, loss_main, loss_aux = compute_loss(pred_main, pred_aux, yb, yauxb)
        loss.backward()
        optimizer.step()

        total += loss.item() * xb.size(0)
        main_total += loss_main.item() * xb.size(0)
        aux_total += loss_aux.item() * xb.size(0)

    total /= len(tiny_ds)
    main_total /= len(tiny_ds)
    aux_total /= len(tiny_ds)

    if epoch % 20 == 0 or epoch == 1:
        print(f"Epoch {epoch:03d} | total={total:.4f} | main(MAE)={main_total:.4f} | aux(MSE)={aux_total:.4f}")


Epoch 001 | total=3.6423 | main(MAE)=3.4596 | aux(MSE)=0.6090
Epoch 020 | total=0.6121 | main(MAE)=0.5268 | aux(MSE)=0.2844
Epoch 040 | total=0.3291 | main(MAE)=0.2918 | aux(MSE)=0.1243
Epoch 060 | total=0.6432 | main(MAE)=0.6089 | aux(MSE)=0.1146
Epoch 080 | total=0.6976 | main(MAE)=0.6700 | aux(MSE)=0.0919
Epoch 100 | total=0.2190 | main(MAE)=0.1967 | aux(MSE)=0.0745
Epoch 120 | total=0.4997 | main(MAE)=0.4697 | aux(MSE)=0.1000
Epoch 140 | total=0.6819 | main(MAE)=0.6505 | aux(MSE)=0.1049
Epoch 160 | total=0.1610 | main(MAE)=0.1448 | aux(MSE)=0.0541
Epoch 180 | total=0.3104 | main(MAE)=0.2861 | aux(MSE)=0.0810
Epoch 200 | total=0.3165 | main(MAE)=0.2971 | aux(MSE)=0.0647


In [35]:
model.eval()
rows = []

with torch.no_grad():
    for i in range(len(tiny_ds)):
        x, y, yaux, image_id = tiny_ds[i]
        x = x.unsqueeze(0).to(device).float()
        pred_main, _ = model(x)
        pred = float(pred_main.item())
        rows.append((image_id, float(y.item()), pred, abs(pred - float(y.item()))))

res = pd.DataFrame(rows, columns=["image_id","true","pred","abs_error"]).sort_values("abs_error")
print(res)
print("\nMean abs error on tiny set:", res["abs_error"].mean())


   image_id   true       pred  abs_error
1        70   1.82   1.809090   0.010910
2       101   1.79   1.825552   0.035552
3       183   2.80   2.875254   0.075254
7       187   2.24   2.421997   0.181997
4       110   1.42   1.606386   0.186386
9        93   1.24   1.469445   0.229445
5       193   3.63   3.866493   0.236493
6       176   4.10   4.439178   0.339178
8       258   7.63   8.568734   0.938734
0       371  10.11  12.031196   1.921196

Mean abs error on tiny set: 0.41551449298858645


## Step 4.1

In [36]:
import torch.optim as optim

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model = SmallCNNMultiTask(dropout=0.0).to(device)

# --- Freeze BatchNorm stats (keeps affine params trainable) ---
for m in model.modules():
    if isinstance(m, nn.BatchNorm2d):
        m.eval()  # freezes running mean/var
        m.requires_grad_(True)

optimizer = optim.Adam(model.parameters(), lr=3e-4, weight_decay=0.0)

mae = nn.L1Loss()
mse = nn.MSELoss()
LAMBDA_AUX = 0.2  # slightly lower to focus on main target

def compute_loss(pred_main, pred_aux, y_main, y_aux, lam=LAMBDA_AUX):
    loss_main = mae(pred_main, y_main)
    loss_aux  = mse(pred_aux, y_aux)
    return loss_main + lam * loss_aux, loss_main, loss_aux

model.train()
num_epochs = 400

for epoch in range(1, num_epochs + 1):
    total, main_total, aux_total = 0.0, 0.0, 0.0

    for xb, yb, yauxb, ids in tiny_loader:
        xb = xb.to(device).float()
        yb = yb.to(device).float()
        yauxb = yauxb.to(device).float()

        optimizer.zero_grad()
        pred_main, pred_aux = model(xb)
        loss, loss_main, loss_aux = compute_loss(pred_main, pred_aux, yb, yauxb)
        loss.backward()
        optimizer.step()

        total += loss.item() * xb.size(0)
        main_total += loss_main.item() * xb.size(0)
        aux_total += loss_aux.item() * xb.size(0)

    total /= len(tiny_ds)
    main_total /= len(tiny_ds)
    aux_total /= len(tiny_ds)

    if epoch % 50 == 0 or epoch == 1:
        print(f"Epoch {epoch:03d} | total={total:.4f} | main(MAE)={main_total:.4f} | aux(MSE)={aux_total:.4f}")


Epoch 001 | total=3.7578 | main(MAE)=3.6613 | aux(MSE)=0.4827
Epoch 050 | total=0.6550 | main(MAE)=0.5858 | aux(MSE)=0.3460
Epoch 100 | total=0.3541 | main(MAE)=0.3356 | aux(MSE)=0.0925
Epoch 150 | total=0.5038 | main(MAE)=0.4839 | aux(MSE)=0.0996
Epoch 200 | total=0.3576 | main(MAE)=0.3410 | aux(MSE)=0.0830
Epoch 250 | total=0.3382 | main(MAE)=0.3181 | aux(MSE)=0.1008
Epoch 300 | total=0.3012 | main(MAE)=0.2871 | aux(MSE)=0.0707
Epoch 350 | total=0.1876 | main(MAE)=0.1762 | aux(MSE)=0.0571
Epoch 400 | total=0.2498 | main(MAE)=0.2371 | aux(MSE)=0.0635


In [37]:
model.eval()
rows = []

with torch.no_grad():
    for i in range(len(tiny_ds)):
        x, y, yaux, image_id = tiny_ds[i]
        x = x.unsqueeze(0).to(device).float()
        pred_main, _ = model(x)
        pred = float(pred_main.item())
        rows.append((image_id, float(y.item()), pred, abs(pred - float(y.item()))))

res = pd.DataFrame(rows, columns=["image_id","true","pred","abs_error"]).sort_values("abs_error")
print(res)
print("\nMean abs error on tiny set:", res["abs_error"].mean())


   image_id   true       pred  abs_error
8       258   7.63   7.580428   0.049572
2       101   1.79   1.705294   0.084706
1        70   1.82   1.725532   0.094468
5       193   3.63   3.519864   0.110137
9        93   1.24   1.105059   0.134941
4       110   1.42   1.252824   0.167176
3       183   2.80   2.632094   0.167906
6       176   4.10   3.924866   0.175134
0       371  10.11  10.335894   0.225894
7       187   2.24   1.991558   0.248442

Mean abs error on tiny set: 0.14583749771118165


## Step 5

In [38]:
from sklearn.model_selection import train_test_split

# Use cleaned training data
df = train_df.copy()

train_split, val_split = train_test_split(
    df,
    test_size=0.2,
    random_state=42,
    shuffle=True
)

train_split = train_split.reset_index(drop=True)
val_split   = val_split.reset_index(drop=True)

print("Train split:", train_split.shape)
print("Val split  :", val_split.shape)


Train split: (184, 7)
Val split  : (46, 7)


In [39]:
BATCH_SIZE = 8

train_ds = LettuceDataset(train_split, TRAIN_RGB_DIR, TRAIN_DEPTH_DIR, train_mode=True)
val_ds   = LettuceDataset(val_split,   TRAIN_RGB_DIR, TRAIN_DEPTH_DIR, train_mode=True)

train_loader = DataLoader(train_ds, batch_size=BATCH_SIZE, shuffle=True,  num_workers=0)
val_loader   = DataLoader(val_ds,   batch_size=BATCH_SIZE, shuffle=False, num_workers=0)

print("Train batches:", len(train_loader))
print("Val batches  :", len(val_loader))


Train batches: 23
Val batches  : 6


In [40]:
import torch.optim as optim

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

model = SmallCNNMultiTask(dropout=0.3).to(device)

optimizer = optim.Adam(model.parameters(), lr=1e-3, weight_decay=1e-4)
scheduler = optim.lr_scheduler.ReduceLROnPlateau(
    optimizer, mode="min", factor=0.5, patience=8
)

mae = nn.L1Loss()
mse = nn.MSELoss()
LAMBDA_AUX = 0.3

def train_one_epoch(model, loader):
    model.train()
    total_loss = 0.0
    total_main = 0.0
    total_aux  = 0.0
    n = 0

    for xb, yb, yauxb, ids in loader:
        xb = xb.to(device).float()
        yb = yb.to(device).float()
        yauxb = yauxb.to(device).float()

        optimizer.zero_grad()
        pred_main, pred_aux = model(xb)

        loss_main = mae(pred_main, yb)
        loss_aux  = mse(pred_aux, yauxb)
        loss = loss_main + LAMBDA_AUX * loss_aux

        loss.backward()
        optimizer.step()

        bs = xb.size(0)
        total_loss += loss.item() * bs
        total_main += loss_main.item() * bs
        total_aux  += loss_aux.item() * bs
        n += bs

    return total_loss / n, total_main / n, total_aux / n

@torch.no_grad()
def evaluate(model, loader):
    model.eval()
    total_main = 0.0
    n = 0
    for xb, yb, yauxb, ids in loader:
        xb = xb.to(device).float()
        yb = yb.to(device).float()
        pred_main, pred_aux = model(xb)
        loss_main = mae(pred_main, yb)
        bs = xb.size(0)
        total_main += loss_main.item() * bs
        n += bs
    return total_main / n  # MAE in grams

# Early stopping
best_val = float("inf")
best_state = None
patience = 20
patience_ctr = 0

MAX_EPOCHS = 150

for epoch in range(1, MAX_EPOCHS + 1):
    tr_loss, tr_main, tr_aux = train_one_epoch(model, train_loader)
    val_mae = evaluate(model, val_loader)

    scheduler.step(val_mae)

    lr = optimizer.param_groups[0]["lr"]

    if val_mae < best_val - 1e-4:
        best_val = val_mae
        best_state = {k: v.detach().cpu().clone() for k, v in model.state_dict().items()}
        patience_ctr = 0
    else:
        patience_ctr += 1

    if epoch % 10 == 0 or epoch == 1:
        print(f"Epoch {epoch:03d} | lr={lr:.2e} | train_main(MAE)={tr_main:.4f} | val_main(MAE)={val_mae:.4f}")

    if patience_ctr >= patience:
        print(f"Early stopping at epoch {epoch}. Best val MAE: {best_val:.4f}")
        break

# Restore best
model.load_state_dict(best_state)
print("Restored best model. Best val MAE:", best_val)


Epoch 001 | lr=1.00e-03 | train_main(MAE)=3.3632 | val_main(MAE)=1.4550
Epoch 010 | lr=1.00e-03 | train_main(MAE)=1.4062 | val_main(MAE)=1.0384
Epoch 020 | lr=1.00e-03 | train_main(MAE)=1.1366 | val_main(MAE)=1.1815
Epoch 030 | lr=5.00e-04 | train_main(MAE)=1.3936 | val_main(MAE)=0.8901
Epoch 040 | lr=5.00e-04 | train_main(MAE)=1.0149 | val_main(MAE)=1.0835
Epoch 050 | lr=5.00e-04 | train_main(MAE)=0.9421 | val_main(MAE)=0.7442
Epoch 060 | lr=5.00e-04 | train_main(MAE)=1.1490 | val_main(MAE)=0.6693
Epoch 070 | lr=2.50e-04 | train_main(MAE)=0.8683 | val_main(MAE)=0.5615
Epoch 080 | lr=1.25e-04 | train_main(MAE)=0.8615 | val_main(MAE)=0.6208
Epoch 090 | lr=1.25e-04 | train_main(MAE)=0.6970 | val_main(MAE)=0.5701
Epoch 100 | lr=6.25e-05 | train_main(MAE)=0.7155 | val_main(MAE)=0.5747
Early stopping at epoch 104. Best val MAE: 0.5598
Restored best model. Best val MAE: 0.5598193044247834


## Step 6

### Step 6A

In [41]:
import random
from PIL import ImageEnhance

def apply_train_augmentations(rgb_img: Image.Image, depth_img: Image.Image):
    # --- Random horizontal flip (same for both) ---
    if random.random() < 0.5:
        rgb_img = rgb_img.transpose(Image.FLIP_LEFT_RIGHT)
        depth_img = depth_img.transpose(Image.FLIP_LEFT_RIGHT)

    # --- Small rotation (same for both) ---
    angle = random.uniform(-10, 10)
    rgb_img = rgb_img.rotate(angle, resample=Image.BILINEAR)
    depth_img = depth_img.rotate(angle, resample=Image.NEAREST)

    # --- Mild color jitter (RGB only) ---
    # brightness +/- 10%
    if random.random() < 0.8:
        rgb_img = ImageEnhance.Brightness(rgb_img).enhance(random.uniform(0.9, 1.1))
        rgb_img = ImageEnhance.Contrast(rgb_img).enhance(random.uniform(0.9, 1.1))

    return rgb_img, depth_img


In [42]:
class LettuceDataset(Dataset):
    def __init__(self, df: pd.DataFrame, rgb_dir: Path, depth_dir: Path, train_mode: bool, augment: bool = False):
        self.df = df.reset_index(drop=True)
        self.rgb_dir = rgb_dir
        self.depth_dir = depth_dir
        self.train_mode = train_mode
        self.augment = augment

    def __len__(self):
        return len(self.df)

    def __getitem__(self, idx: int):
        row = self.df.iloc[idx]
        image_id = int(row["image_id"])

        rgb_path   = self.rgb_dir / f"RGB_{image_id}.png"
        depth_path = self.depth_dir / f"Depth_{image_id}.png"

        # Load original PIL images
        rgb_img = Image.open(rgb_path).convert("RGB")
        depth_img = Image.open(depth_path)  # 16-bit

        # Apply aug only for training set
        if self.train_mode and self.augment:
            rgb_img, depth_img = apply_train_augmentations(rgb_img, depth_img)

        # Resize
        rgb_img = rgb_img.resize((IMG_SIZE, IMG_SIZE), Image.BILINEAR)
        depth_img = depth_img.resize((IMG_SIZE, IMG_SIZE), Image.NEAREST)

        # RGB -> tensor normalized
        rgb_arr = np.asarray(rgb_img).astype(np.float32) / 255.0
        rgb_arr = (rgb_arr - np.array(RGB_MEAN)) / np.array(RGB_STD)
        rgb_arr = np.transpose(rgb_arr, (2,0,1))
        rgb = torch.from_numpy(rgb_arr).float()

        # Depth -> tensor normalized robustly
        d = np.asarray(depth_img).astype(np.float32)
        d = np.clip(d, 0, DEPTH_P99) / DEPTH_P99
        depth = torch.from_numpy(d[None, :, :]).float()

        x = torch.cat([rgb, depth], dim=0)  # 4xHxW

        if self.train_mode:
            y_main = torch.tensor([float(row["DryWeightShoot"])], dtype=torch.float32)

            aux = row[AUX_COLS].values.astype(np.float32)
            aux = (aux - aux_mean) / aux_std
            y_aux = torch.from_numpy(aux).float()

            return x, y_main, y_aux, image_id
        else:
            return x, image_id


In [43]:
train_ds = LettuceDataset(train_split, TRAIN_RGB_DIR, TRAIN_DEPTH_DIR, train_mode=True, augment=True)
val_ds   = LettuceDataset(val_split,   TRAIN_RGB_DIR, TRAIN_DEPTH_DIR, train_mode=True, augment=False)

train_loader = DataLoader(train_ds, batch_size=BATCH_SIZE, shuffle=True,  num_workers=0)
val_loader   = DataLoader(val_ds,   batch_size=BATCH_SIZE, shuffle=False, num_workers=0)

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model = SmallCNNMultiTask(dropout=0.3).to(device)

optimizer = optim.Adam(model.parameters(), lr=1e-3, weight_decay=1e-4)
scheduler = optim.lr_scheduler.ReduceLROnPlateau(
    optimizer, mode="min", factor=0.5, patience=8
)

best_val = float("inf")
best_state = None
patience = 20
patience_ctr = 0
MAX_EPOCHS = 150

for epoch in range(1, MAX_EPOCHS + 1):
    tr_loss, tr_main, tr_aux = train_one_epoch(model, train_loader)
    val_mae = evaluate(model, val_loader)

    scheduler.step(val_mae)
    lr = optimizer.param_groups[0]["lr"]

    if val_mae < best_val - 1e-4:
        best_val = val_mae
        best_state = {k: v.detach().cpu().clone() for k, v in model.state_dict().items()}
        patience_ctr = 0
    else:
        patience_ctr += 1

    if epoch % 10 == 0 or epoch == 1:
        print(f"Epoch {epoch:03d} | lr={lr:.2e} | train_main(MAE)={tr_main:.4f} | val_main(MAE)={val_mae:.4f}")

    if patience_ctr >= patience:
        print(f"Early stopping at epoch {epoch}. Best val MAE: {best_val:.4f}")
        break

model.load_state_dict(best_state)
print("Restored best model. Best val MAE:", best_val)


Epoch 001 | lr=1.00e-03 | train_main(MAE)=3.7098 | val_main(MAE)=2.1517
Epoch 010 | lr=1.00e-03 | train_main(MAE)=1.7217 | val_main(MAE)=1.1764
Epoch 020 | lr=1.00e-03 | train_main(MAE)=1.3256 | val_main(MAE)=0.7192
Epoch 030 | lr=5.00e-04 | train_main(MAE)=1.2956 | val_main(MAE)=0.6194
Epoch 040 | lr=2.50e-04 | train_main(MAE)=1.0981 | val_main(MAE)=1.3473
Epoch 050 | lr=2.50e-04 | train_main(MAE)=1.0344 | val_main(MAE)=0.6424
Epoch 060 | lr=2.50e-04 | train_main(MAE)=1.2556 | val_main(MAE)=0.7034
Epoch 070 | lr=6.25e-05 | train_main(MAE)=1.0208 | val_main(MAE)=0.6256
Early stopping at epoch 72. Best val MAE: 0.5053
Restored best model. Best val MAE: 0.505272898984992


### Step 6 B

In [44]:
from sklearn.model_selection import KFold

def run_one_fold(train_idx, val_idx, df, fold_id):
    train_fold = df.iloc[train_idx].reset_index(drop=True)
    val_fold   = df.iloc[val_idx].reset_index(drop=True)

    train_ds = LettuceDataset(train_fold, TRAIN_RGB_DIR, TRAIN_DEPTH_DIR, train_mode=True, augment=True)
    val_ds   = LettuceDataset(val_fold,   TRAIN_RGB_DIR, TRAIN_DEPTH_DIR, train_mode=True, augment=False)

    train_loader = DataLoader(train_ds, batch_size=BATCH_SIZE, shuffle=True,  num_workers=0)
    val_loader   = DataLoader(val_ds,   batch_size=BATCH_SIZE, shuffle=False, num_workers=0)

    model = SmallCNNMultiTask(dropout=0.3).to(device)
    optimizer = optim.Adam(model.parameters(), lr=1e-3, weight_decay=1e-4)
    scheduler = optim.lr_scheduler.ReduceLROnPlateau(optimizer, mode="min", factor=0.5, patience=8)

    best_val = float("inf")
    best_state = None
    patience = 20
    patience_ctr = 0

    for epoch in range(1, 151):
        tr_loss, tr_main, tr_aux = train_one_epoch(model, train_loader)
        val_mae = evaluate(model, val_loader)

        scheduler.step(val_mae)

        if val_mae < best_val - 1e-4:
            best_val = val_mae
            best_state = {k: v.detach().cpu().clone() for k, v in model.state_dict().items()}
            patience_ctr = 0
        else:
            patience_ctr += 1

        if epoch % 20 == 0 or epoch == 1:
            lr = optimizer.param_groups[0]["lr"]
            print(f"Fold {fold_id} | Epoch {epoch:03d} | lr={lr:.2e} | trainMAE={tr_main:.4f} | valMAE={val_mae:.4f}")

        if patience_ctr >= patience:
            break

    return best_val, best_state

# Run CV
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
kf = KFold(n_splits=5, shuffle=True, random_state=42)

fold_maes = []
fold_states = []  # store best weights per fold for later ensembling

df = train_df.copy().reset_index(drop=True)

for fold_id, (tr_idx, va_idx) in enumerate(kf.split(df), start=1):
    print("\n==============================")
    print("Starting fold", fold_id)
    print("==============================")
    best_val, best_state = run_one_fold(tr_idx, va_idx, df, fold_id)
    fold_maes.append(best_val)
    fold_states.append(best_state)
    print(f"Fold {fold_id} BEST val MAE: {best_val:.4f}")

print("\n===== CV RESULTS =====")
print("Fold MAEs:", [round(x,4) for x in fold_maes])
print("Mean MAE:", float(np.mean(fold_maes)))
print("Std  MAE:", float(np.std(fold_maes)))



Starting fold 1
Fold 1 | Epoch 001 | lr=1.00e-03 | trainMAE=5.6113 | valMAE=4.7024
Fold 1 | Epoch 020 | lr=1.00e-03 | trainMAE=5.5989 | valMAE=4.5925
Fold 1 BEST val MAE: 4.5876

Starting fold 2
Fold 2 | Epoch 001 | lr=1.00e-03 | trainMAE=5.8737 | valMAE=4.9856
Fold 2 | Epoch 020 | lr=2.50e-04 | trainMAE=5.8850 | valMAE=5.0768
Fold 2 BEST val MAE: 4.9856

Starting fold 3
Fold 3 | Epoch 001 | lr=1.00e-03 | trainMAE=5.5935 | valMAE=5.8447
Fold 3 | Epoch 020 | lr=2.50e-04 | trainMAE=5.5842 | valMAE=6.1052
Fold 3 BEST val MAE: 5.8447

Starting fold 4
Fold 4 | Epoch 001 | lr=1.00e-03 | trainMAE=5.3600 | valMAE=5.7097
Fold 4 | Epoch 020 | lr=2.50e-04 | trainMAE=5.3513 | valMAE=5.7596
Fold 4 BEST val MAE: 5.7097

Starting fold 5
Fold 5 | Epoch 001 | lr=1.00e-03 | trainMAE=5.4325 | valMAE=6.1565
Fold 5 | Epoch 020 | lr=2.50e-04 | trainMAE=5.4287 | valMAE=6.2905
Fold 5 BEST val MAE: 6.1565

===== CV RESULTS =====
Fold MAEs: [4.5876, 4.9856, 5.8447, 5.7097, 6.1565]
Mean MAE: 5.456827953587408
S

### Step 6B.1

In [45]:
# Inspect one fold's targets distribution quickly
kf = KFold(n_splits=5, shuffle=True, random_state=42)
tr_idx, va_idx = next(iter(kf.split(df)))

train_fold = df.iloc[tr_idx]
val_fold = df.iloc[va_idx]

print("Train fold DryWeightShoot stats:")
print(train_fold["DryWeightShoot"].describe())

print("\nVal fold DryWeightShoot stats:")
print(val_fold["DryWeightShoot"].describe())

print("\nAny NaNs?")
print("Train NaNs:", train_fold["DryWeightShoot"].isna().sum())
print("Val NaNs  :", val_fold["DryWeightShoot"].isna().sum())


Train fold DryWeightShoot stats:
count    184.000000
mean       5.633804
std        4.705485
min        0.090000
25%        1.697500
50%        3.895000
75%        9.332500
max       18.210000
Name: DryWeightShoot, dtype: float64

Val fold DryWeightShoot stats:
count    46.000000
mean      4.632826
std       3.882927
min       0.110000
25%       1.605000
50%       3.900000
75%       7.300000
max      16.470000
Name: DryWeightShoot, dtype: float64

Any NaNs?
Train NaNs: 0
Val NaNs  : 0


In [46]:
# Build fold loaders (small) and run 10 optimizer steps, print loss
train_ds_dbg = LettuceDataset(train_fold.sample(32, random_state=0).reset_index(drop=True),
                              TRAIN_RGB_DIR, TRAIN_DEPTH_DIR, train_mode=True, augment=True)
train_loader_dbg = DataLoader(train_ds_dbg, batch_size=8, shuffle=True, num_workers=0)

model_dbg = SmallCNNMultiTask(dropout=0.3).to(device)
opt_dbg = optim.Adam(model_dbg.parameters(), lr=1e-3, weight_decay=1e-4)

model_dbg.train()
for step, (xb, yb, yauxb, ids) in enumerate(train_loader_dbg):
    xb = xb.to(device).float()
    yb = yb.to(device).float()
    yauxb = yauxb.to(device).float()

    opt_dbg.zero_grad()
    pred_main, pred_aux = model_dbg(xb)

    loss_main = mae(pred_main, yb)
    loss_aux  = mse(pred_aux, yauxb)
    loss = loss_main + 0.3 * loss_aux
    loss.backward()
    opt_dbg.step()

    print(f"step {step} | loss_main(MAE)={loss_main.item():.4f} | loss_aux(MSE)={loss_aux.item():.4f}")
    if step == 9:
        break


step 0 | loss_main(MAE)=2.7474 | loss_aux(MSE)=0.6958
step 1 | loss_main(MAE)=2.9483 | loss_aux(MSE)=1.1962
step 2 | loss_main(MAE)=3.4221 | loss_aux(MSE)=0.7710
step 3 | loss_main(MAE)=3.7630 | loss_aux(MSE)=1.0145


### Step 6B Fix

In [47]:
import torch
import torch.nn as nn

mae = nn.L1Loss()
mse = nn.MSELoss()

def train_one_epoch(model, loader, optimizer, device, lambda_aux=0.3):
    model.train()
    total_loss = 0.0
    total_main = 0.0
    total_aux  = 0.0
    n = 0

    for xb, yb, yauxb, ids in loader:
        xb = xb.to(device).float()
        yb = yb.to(device).float()
        yauxb = yauxb.to(device).float()

        optimizer.zero_grad()
        pred_main, pred_aux = model(xb)

        loss_main = mae(pred_main, yb)
        loss_aux  = mse(pred_aux, yauxb)
        loss = loss_main + lambda_aux * loss_aux

        loss.backward()
        optimizer.step()

        bs = xb.size(0)
        total_loss += loss.item() * bs
        total_main += loss_main.item() * bs
        total_aux  += loss_aux.item() * bs
        n += bs

    return total_loss / n, total_main / n, total_aux / n

@torch.no_grad()
def evaluate_mae(model, loader, device):
    model.eval()
    total = 0.0
    n = 0
    for xb, yb, yauxb, ids in loader:
        xb = xb.to(device).float()
        yb = yb.to(device).float()
        pred_main, _ = model(xb)
        loss_main = mae(pred_main, yb)
        bs = xb.size(0)
        total += loss_main.item() * bs
        n += bs
    return total / n


In [None]:
from sklearn.model_selection import KFold
import numpy as np
import torch.optim as optim

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

def run_one_fold(train_idx, val_idx, df, fold_id, lambda_aux=0.3):
    train_fold = df.iloc[train_idx].reset_index(drop=True)
    val_fold   = df.iloc[val_idx].reset_index(drop=True)

    train_ds = LettuceDataset(train_fold, TRAIN_RGB_DIR, TRAIN_DEPTH_DIR, train_mode=True, augment=True)
    val_ds   = LettuceDataset(val_fold,   TRAIN_RGB_DIR, TRAIN_DEPTH_DIR, train_mode=True, augment=False)

    train_loader = DataLoader(train_ds, batch_size=BATCH_SIZE, shuffle=True,  num_workers=0)
    val_loader   = DataLoader(val_ds,   batch_size=BATCH_SIZE, shuffle=False, num_workers=0)

    model = SmallCNNMultiTask(dropout=0.3).to(device)
    optimizer = optim.Adam(model.parameters(), lr=1e-3, weight_decay=1e-4)
    scheduler = optim.lr_scheduler.ReduceLROnPlateau(optimizer, mode="min", factor=0.5, patience=8)

    best_val = float("inf")
    best_state = None
    patience = 20
    patience_ctr = 0

    for epoch in range(1, 151):
        tr_loss, tr_main, tr_aux = train_one_epoch(model, train_loader, optimizer, device, lambda_aux=lambda_aux)
        val_mae = evaluate_mae(model, val_loader, device)

        scheduler.step(val_mae)

        if val_mae < best_val - 1e-4:
            best_val = val_mae
            best_state = {k: v.detach().cpu().clone() for k, v in model.state_dict().items()}
            patience_ctr = 0
        else:
            patience_ctr += 1

        if epoch % 20 == 0 or epoch == 1:
            lr = optimizer.param_groups[0]["lr"]
            print(f"Fold {fold_id} | Epoch {epoch:03d} | lr={lr:.2e} | trainMAE={tr_main:.4f} | valMAE={val_mae:.4f}")

        if patience_ctr >= patience:
            break

    return best_val, best_state

# Run CV
df = train_df.copy().reset_index(drop=True)
kf = KFold(n_splits=5, shuffle=True, random_state=42)

fold_maes = []
fold_states = []

for fold_id, (tr_idx, va_idx) in enumerate(kf.split(df), start=1):
    print("\n==============================")
    print("Starting fold", fold_id)
    print("==============================")
    best_val, best_state = run_one_fold(tr_idx, va_idx, df, fold_id, lambda_aux=0.3)
    fold_maes.append(best_val)
    fold_states.append(best_state)
    print(f"Fold {fold_id} BEST val MAE: {best_val:.4f}")

print("\n===== CV RESULTS =====")
print("Fold MAEs:", [round(x,4) for x in fold_maes])
print("Mean MAE:", float(np.mean(fold_maes)))
print("Std  MAE:", float(np.std(fold_maes)))



Starting fold 1
Fold 1 | Epoch 001 | lr=1.00e-03 | trainMAE=3.8331 | valMAE=2.1016
Fold 1 | Epoch 020 | lr=1.00e-03 | trainMAE=1.5415 | valMAE=0.8974
Fold 1 | Epoch 040 | lr=5.00e-04 | trainMAE=1.2666 | valMAE=0.9016
Fold 1 | Epoch 060 | lr=2.50e-04 | trainMAE=1.1720 | valMAE=0.6844
Fold 1 BEST val MAE: 0.5918

Starting fold 2
Fold 2 | Epoch 001 | lr=1.00e-03 | trainMAE=3.4629 | valMAE=2.8508
Fold 2 | Epoch 020 | lr=1.00e-03 | trainMAE=1.4392 | valMAE=0.8374
Fold 2 | Epoch 040 | lr=5.00e-04 | trainMAE=1.0181 | valMAE=0.6906
Fold 2 | Epoch 060 | lr=2.50e-04 | trainMAE=1.1130 | valMAE=0.6553
