### Imports

In [140]:
import os
SET_SEED=42
os.environ['PYTHONHASHSEED'] = str(SET_SEED)
os.environ['CUBLAS_WORKSPACE_CONFIG'] = ':4096:8'

In [141]:
import torch
import torch.nn as nn
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
import copy
import sklearn
import gc
import timm
import time
from torchvision.models import resnet50, ResNet50_Weights
import random
import safetensors
from safetensors.torch import load_file
from PIL import Image
from torch.utils.data import DataLoader
from sklearn.preprocessing import StandardScaler
from torchvision import transforms
from torchvision import models
from collections import defaultdict
from sklearn.model_selection import GroupKFold
from torch.utils.data import random_split
from scipy.stats import zscore
# if os.environ.get('KAGGLE_KERNEL_RUN_TYPE') != 'Batch':
#     !pip install -q ipdb
#     import ipdb

In [142]:
def set_seed(seed=SET_SEED):
    random.seed(seed)
    np.random.seed(seed)
    torch.manual_seed(seed)
    torch.cuda.manual_seed_all(seed)
    torch.cuda.manual_seed(seed)
    torch.backends.cudnn.deterministic = True
    torch.backends.cudnn.benchmark = False
    try:
        torch.use_deterministic_algorithms(True, warn_only=True)
    except AttributeError:
        pass  # Older PyTorch versions
    

set_seed(SET_SEED)

In [143]:
# Hyperparameters
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
BATCH_SIZE = 16
NUM_FT_EPOCHS = 20
NUM_BB_EPOCHS = 12
LEARNING_RATE = 0.0001
WEIGHT_DECAY = 1e-4
NUM_FOLDS = 3
GIVEN_WEIGHTS = [0.1, 0.1, 0.1, 0.5, 0.2]
TARGET_COLS = ['Dry_Clover_g', 'Dry_Dead_g', 'Dry_Green_g', 'Dry_Total_g', 'GDM_g']
BASE_MODEL='efficientnet_b1'
IMAGE_SIZE=(392,392)
TRAIN_SHUFFLE=0

In [144]:
def print_result(train_loss, val_loss, epoch_start, epoch, num_epochs, val_r2):
    epoch_time = time.time() - epoch_start
    mins, secs = divmod(epoch_time, 60)
    
    print(f'Epoch {epoch+1}/{num_epochs} - '
          f'Train Loss: {train_loss:.4f}, Val Loss: {val_loss:.4f} | '
          f'R²: {val_r2:.4f} | '
          f'Time: {int(mins)}m {int(secs)}s')

class BiomassDataset(torch.utils.data.Dataset):
    def __init__(self, df, base_path, transform=None):
        self.df = df
        self.base_path = base_path
        self.transform = transform
        self.target_cols = TARGET_COLS
        self.is_training = all(col in df.columns for col in self.target_cols)
    
    def __len__(self):
        return len(self.df) * 5

    def _get_crop(self, image, crop_type):
        """Get different crop from image"""
        width, height = image.size
        
        if crop_type == 0:  # Left half
            return image.crop((0, 0, width // 2, height))
        elif crop_type == 1:  # Right half
            return image.crop((width // 2, 0, width, height))
        elif crop_type == 2:  # Top half
            return image.crop((0, 0, width, height // 2))
        elif crop_type == 3:  # Bottom half
            return image.crop((0, height // 2, width, height))
        else:  # Center crop (80% of image)
            crop_w, crop_h = int(width * 0.8), int(height * 0.8)
            left = (width - crop_w) // 2
            top = (height - crop_h) // 2
            return image.crop((left, top, left + crop_w, top + crop_h))
    
    def __getitem__(self, idx):
        row = self.df.iloc[idx // 5] 
        img_path = os.path.join(self.base_path, row['image_path'])
        
        image = Image.open(img_path).convert('RGB')
        # image = self._get_crop(image, crop_type)
        
        # half = idx % 2
        # width, height = image.size
        # if half == 0:
        #     image = image.crop((0, 0, width // 2, height))  # Left half
        # else:
        #     image = image.crop((width // 2, 0, width, height))  # Right half
        
        if self.transform:
            image = self.transform(image)
        
        if self.is_training:
            targets = row[self.target_cols].values.astype('float32')
            targets_normalized = (targets - TARGET_MEANS.numpy()) / TARGET_STDS.numpy()
            
            return image, torch.tensor(targets_normalized, dtype=torch.float32)
        else:            
            return image, row['image_path']

class ExtraDataset(torch.utils.data.Dataset):
    def __init__(self, df, img_path, transform=None):
        self.df = df.reset_index(drop=True)
        self.img_path = img_path
        self.transform = transform
    
    def __len__(self):
        return len(self.df)
    
    def __getitem__(self, idx):
        row = self.df.iloc[idx]
        img_file = os.path.join(self.img_path, row['image_file_name'])
        image = Image.open(img_file).convert('RGB')
        
        if self.transform:
            image = self.transform(image)
        
        target = torch.tensor(row['dry_total'], dtype=torch.float32)
        return image, target

In [145]:
class PreTrainModel(nn.Module):
    def __init__(self):
        super().__init__()
        # model = timm.create_model(BASE_MODEL, pretrained=True, num_classes=0)
        
        # ckpt_path = "/kaggle/input/m/voxoff/resnet50/pytorch/default/1/model.safetensors"
        # # model.load_state_dict(load_file(ckpt_path))

        # loaded_state_dict = load_file(ckpt_path)

        # new_state_dict = {}
        # for k, v in loaded_state_dict.items():
        #     # Assuming the actual ResNet backbone weights are nested under 'resnet.encoder.'
        #     if k.startswith('resnet.encoder.'):
        #         new_key = k[len('resnet.encoder.'):] # Strip the prefix
        #         new_state_dict[new_key] = v

        # # Load the modified state dict, allowing for non-matching keys (strict=False)
        # model.load_state_dict(new_state_dict, strict=False)

        
        # self.backbone = model
        # in_features = self.backbone.num_features
        
        self.regression_head = nn.Sequential(
            nn.Linear(in_features, 256),
            nn.ReLU(),
            nn.Dropout(0.3),
            nn.Linear(256, 64),
            nn.ReLU(),
            nn.Dropout(0.3),
            nn.Linear(64, 1)
        )
    
    def forward(self, x):
        features = self.backbone(x)
        return self.regression_head(features)

In [146]:
# from torchvision.models import resnet18, resnet34, resnet50, efficientnet_b0, efficientnet_b1

class FinetuneModel(nn.Module):
    def __init__(self, pretrained_backbone=None):
        super().__init__()
        # self.backbone = timm.create_model('tf_efficientnet_b1', pretrained=False, num_classes = 0, checkpoint_path='')
        self.backbone = timm.create_model('tf_efficientnet_b1', pretrained=False, num_classes=0)
        model_path = '/kaggle/input/tf-efficientnet/pytorch/tf-efficientnet-b1/1/tf_efficientnet_b1_aa-ea7a6ee0.pth'
        checkpoint = torch.load(model_path)
        self.backbone.load_state_dict(checkpoint, strict=False)
        
        feature_dim = self.backbone.num_features
        
        # backbone_name=BASE_MODEL
        # if backbone_name == "efficientnet_b0":
        # self.backbone = efficientnet_b0(weights="IMAGENET1K_V1")
        # feature_dim = self.backbone.classifier[1].in_features
        # self.backbone.classifier = nn.Identity()
        # elif backbone_name == "efficientnet_b1":
        #     self.backbone = efficientnet_b1(weights="IMAGENET1K_V1")
        #     feature_dim = self.backbone.classifier[1].in_features
        #     self.backbone.classifier = nn.Identity()
        # else:
        #     raise ValueError(f"Backbone {backbone_name} not supported.")
      

        self.regression_head = nn.Sequential(
            nn.Linear(feature_dim, 128),
            nn.ReLU(),
            nn.Dropout(0.3),
            nn.Linear(128, 64),
            nn.ReLU(),
            nn.Dropout(0.4),
            nn.Linear(64, 5), # 5 outputs for competition
            nn.ReLU()
        )

        self._init_head_weights()
    
    def _init_head_weights(self):
        """Initialize regression head with deterministic weights"""
        for m in self.regression_head.modules():
            if isinstance(m, nn.Linear):
                # Use a fixed seed for weight initialization
                with torch.random.fork_rng():
                    torch.manual_seed(SET_SEED)
                    torch.nn.init.xavier_uniform_(m.weight)
                    if m.bias is not None:
                        torch.nn.init.zeros_(m.bias)
    
    def forward(self, x):
        features = self.backbone(x)
        return self.regression_head(features)

In [147]:
base = '/kaggle/input'
train_csv = f'{base}/csiro-biomass/train.csv'
test_csv = f'{base}/csiro-biomass/test.csv'
extra_csv = f'{base}/grassclover-dataset/biomass_data/train/biomass_train_data.csv'
extra_img = f'{base}/grassclover-dataset/biomass_data/train/images'
base_path = f'{base}/csiro-biomass/'
submission_path = f'{base}/csiro-biomass/sample_submission.csv'

dataset_df = pd.read_csv(train_csv)
test_df = pd.read_csv(test_csv)
extra_df = pd.read_csv(extra_csv, sep=';')
extra_img_path = extra_img
unique_test_images = test_df['image_path'].unique()

In [148]:
dataset_df['Sampling_Date'] = pd.to_datetime(dataset_df['Sampling_Date'], format='mixed')  # adjust format if needed
dataset_df = dataset_df.pivot(
    index=['image_path','Sampling_Date'],
    columns='target_name',
    values='target'
).reset_index()
dataset_df['Month'] = dataset_df['Sampling_Date'].dt.month

In [149]:
def denormalize_targets(normalized_targets):
    """
    Convert normalized targets back to original scale.
    
    Args:
        normalized_targets: Tensor of shape [batch_size, 5] or [5] with normalized values
    
    Returns:
        Tensor of same shape with denormalized values
    """
    if normalized_targets.dim() == 1:
        # Single sample: [5]
        means = TARGET_MEANS.to(normalized_targets.device)
        stds = TARGET_STDS.to(normalized_targets.device)
        return normalized_targets * stds + means
    else:
        # Batch: [batch_size, 5]
        means = TARGET_MEANS.to(normalized_targets.device).unsqueeze(0)  # [1, 5]
        stds = TARGET_STDS.to(normalized_targets.device).unsqueeze(0)  # [1, 5]
        return normalized_targets * stds + means


# Normalization
target_stats = {}
for col in TARGET_COLS:
    target_stats[col] = {
        'mean': dataset_df[col].mean(),
        'std': dataset_df[col].std() + 1e-8
    }
    print(f"{col}: mean={target_stats[col]['mean']:.2f}, std={target_stats[col]['std']:.2f}")

# Store for later denormalization
TARGET_MEANS = torch.tensor([target_stats[col]['mean'] for col in TARGET_COLS], dtype=torch.float32)
TARGET_STDS = torch.tensor([target_stats[col]['std'] for col in TARGET_COLS], dtype=torch.float32)

dataset_df.head()

Dry_Clover_g: mean=6.65, std=12.12
Dry_Dead_g: mean=12.04, std=12.40
Dry_Green_g: mean=26.62, std=25.40
Dry_Total_g: mean=45.32, std=27.98
GDM_g: mean=33.27, std=24.94


target_name,image_path,Sampling_Date,Dry_Clover_g,Dry_Dead_g,Dry_Green_g,Dry_Total_g,GDM_g,Month
0,train/ID1011485656.jpg,2015-09-04,0.0,31.9984,16.2751,48.2735,16.275,9
1,train/ID1012260530.jpg,2015-04-01,0.0,0.0,7.6,7.6,7.6,4
2,train/ID1025234388.jpg,2015-09-01,6.05,0.0,0.0,6.05,6.05,9
3,train/ID1028611175.jpg,2015-05-18,0.0,30.9703,24.2376,55.2079,24.2376,5
4,train/ID1035947949.jpg,2015-09-11,0.4343,23.2239,10.5261,34.1844,10.9605,9


### Pytorch

In [150]:
train_transform = transforms.Compose([
    transforms.Resize(IMAGE_SIZE),
    transforms.RandomHorizontalFlip(p=0.5),
    transforms.RandomVerticalFlip(p=0.5),  # Grass can be flipped
    transforms.RandomRotation(degrees=15),
    transforms.RandomAffine(degrees=0, translate=(0.1, 0.1), scale=(0.9, 1.1)),
    
    # Color augmentations (grass varies in color)
    transforms.ColorJitter(
        brightness=0.1,  # Sunlight variations
        contrast=0.1,     # Different lighting
        saturation=0.1,  # Grass color variations
        hue=0.1
    ),
    
    # Advanced augmentations
    transforms.RandomApply([
        transforms.GaussianBlur(kernel_size=3, sigma=(0.1, 0.5))
    ], p=0.1),
    # transforms.RandomHorizontalFlip(p=0.2),
    # transforms.RandomVerticalFlip(p=0.1),
    # transforms.RandomRotation(10),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], 
                       std=[0.229, 0.224, 0.225])
])

val_transform = transforms.Compose([
    transforms.Resize(IMAGE_SIZE),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], 
                       std=[0.229, 0.224, 0.225])
])

### Support Functions

In [151]:
def forward_pass(images, targets, optimizer, model, validation=False):
    images = images.to(device)
    targets = targets.to(device)
    
    if not validation: 
        optimizer.zero_grad()
    
    outputs = model(images)
    
    loss = combined_biomass_loss(outputs, targets)
    
    if not validation:
        loss.backward()
        # torch.nn.utils.clip_grad_norm_(model.parameters(), max_norm=1.0)
        optimizer.step()
    
    return loss

### Train Model

In [152]:
def combined_biomass_loss(biomass_pred, biomass_true):
    weights = torch.tensor(GIVEN_WEIGHTS, device=biomass_pred.device)

    smooth_l1 = nn.SmoothL1Loss(reduction='none')
    mse = nn.MSELoss(reduction='none')
    
    smooth_l1_loss = smooth_l1(biomass_pred, biomass_true)
    mse_loss = mse(biomass_pred, biomass_true)
    
    combined = 0.3 * smooth_l1_loss + 0.7 * mse_loss
    weighted_loss = (combined * weights).mean()
    
    return weighted_loss

In [153]:
def weighted_r2_score(sum_target, total_samples, sum_target_sq, ss_res):
    mean_target = sum_target / total_samples
    ss_tot = sum_target_sq - total_samples * (mean_target ** 2)

    r2_per_output = 1 - ss_res / (ss_tot + 1e-10)

    weights = torch.tensor(GIVEN_WEIGHTS, device=device)
    r2_weighted = (r2_per_output * weights).sum() / weights.sum()
    return r2_weighted

In [154]:
def pretrain_phase(extra_df, extra_img_path, num_epochs=NUM_FT_EPOCHS):
    print("=" * 50)
    print("PHASE 1: PRE-TRAINING ON EXTRA DATASET")
    
    # Create dataset
    extra_dataset = ExtraDataset(extra_df, extra_img_path, transform=train_transform)

    generator = torch.Generator().manual_seed(SET_SEED)
    
    extra_train_size = int(0.8 * len(extra_dataset))
    extra_dev_size = len(extra_dataset) - extra_train_size

    extra_train_dataset, extra_dev_dataset = random_split(extra_dataset, [extra_train_size, extra_dev_size], generator=generator)

    extra_loader = DataLoader(extra_train_dataset, batch_size=16, shuffle=TRAIN_SHUFFLE)
    extra_dev_loader = DataLoader(extra_dev_dataset, batch_size=16, shuffle=False)  # usually no shuffle for dev
    
    # Initialize model
    model = PreTrainModel().to(device)
    optimizer = torch.optim.Adam(model.parameters(), lr=1e-4)
    criterion = nn.SmoothL1Loss()
    
    gc.collect()
    for epoch in range(num_epochs):
        model.train()
        total_loss = 0
        for images, targets in extra_loader:
            images, targets = images.to(device), targets.to(device)
            
            optimizer.zero_grad()
            outputs = model(images).squeeze()
            loss = criterion(outputs, targets)
            loss.backward()
            torch.nn.utils.clip_grad_norm_(model.parameters(), max_norm=1.0)
            optimizer.step()
            
            total_loss += loss.item()
            # validate_epoch(model, extra_dev_loader, criterion=criterion)
        
        print(f"Pre-train Epoch {epoch+1}/{num_epochs} - Loss: {total_loss/len(extra_loader):.4f}")
    
    return model.backbone 

In [155]:
def create_data_loaders(train_df, val_df, batch_size=BATCH_SIZE):
    train_dataset = BiomassDataset(train_df, base_path, transform=train_transform)
    val_dataset = BiomassDataset(val_df, base_path, transform=val_transform)
    train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=TRAIN_SHUFFLE, num_workers=0)
    val_loader = DataLoader(val_dataset, batch_size=batch_size, shuffle=False, num_workers=0)
    return train_loader, val_loader

def train_epoch(model, train_loader, optimizer, quick):
    model.train()
    total_loss = 0
    for i, (images, targets) in enumerate(train_loader):
        if quick and i >= 1:
            break
            
        loss = forward_pass(images, targets, optimizer, model)
        total_loss += loss.item()
    return total_loss / len(train_loader)

def validate_epoch(model, val_loader, criterion=None):
    model.eval()
    total_loss = 0
    total_samples = 0

    # Accumulate sums for R² calculation (no need to store all predictions)
    ss_res = torch.zeros(5, device=device)
    sum_target = torch.zeros(5, device=device)
    sum_target_sq = torch.zeros(5, device=device)

    with torch.no_grad():
        for images, targets in val_loader:
            images = images.to(device)
            targets = targets.to(device)
            
            outputs = model(images)

            if criterion:
                loss = criterion(outputs, targets)
            else:
                loss = combined_biomass_loss(outputs, targets)
            
            total_loss += loss.item()
            total_samples += outputs.shape[0] # batch_size
            
            outputs_denorm = denormalize_targets(outputs)
            targets_denorm = denormalize_targets(targets)
            
            ss_res += ((outputs_denorm - targets_denorm) ** 2).sum(dim=0)
            sum_target += targets_denorm.sum(dim=0)
            sum_target_sq += (targets_denorm ** 2).sum(dim=0)

    r2_weighted = weighted_r2_score(sum_target, total_samples, sum_target_sq, ss_res)

    return total_loss / len(val_loader), r2_weighted


def compute_fold_metrics(epoch_train_losses, epoch_val_losses, epoch_val_r2s):
    best_val_idx = np.argmin(epoch_val_losses)
    best_val_loss = epoch_val_losses[best_val_idx]
    best_val_r2 = epoch_val_r2s[best_val_idx]
    overfit_metric = epoch_train_losses[best_val_idx] - best_val_loss
    stability_val_loss = np.mean(epoch_val_losses[-5:])
    stability_val_r2 = np.mean(epoch_val_r2s[-5:])

    metrics = {
        "best_val_loss": best_val_loss,
        "best_val_r2": best_val_r2,
        "overfit_metric": overfit_metric,
        "stability_val_loss": stability_val_loss,
        "stability_val_r2": stability_val_r2
    }
    return metrics

def print_fold_metrics(fold, metrics):
    print(f"\n--- Fold {fold + 1} Metrics Summary ---")
    print(f"Best Val Loss: {metrics['best_val_loss']:.4f}")
    print(f"Best Val R²: {metrics['best_val_r2']:.4f}")
    print(f"Overfit Metric (train - val at best epoch): {metrics['overfit_metric']:.4f}")
    print(f"Average Val Loss (last 5 epochs): {metrics['stability_val_loss']:.4f}")
    print(f"Average Val R² (last 5 epochs): {metrics['stability_val_r2']:.4f}")


def finetune_phase(pretrained_backbone=None, num_epochs=NUM_FT_EPOCHS, quick=False):
    print("\n" + "=" * 50)
    print("PHASE 2: FINE-TUNING ON COMPETITION DATA")
    
    r2 = []
    fold_models = []

    for fold, (train_idx, val_idx) in enumerate(splits):
        print(f"\n--- Fold {fold + 1} Metrics Summary ---")
        train_df = dataset_df.iloc[train_idx].copy()
        val_df = dataset_df.iloc[val_idx].copy()
        train_loader, val_loader = create_data_loaders(train_df, val_df)

        model = FinetuneModel().to(device)

        optimizer = torch.optim.Adam(model.parameters(), lr=LEARNING_RATE, weight_decay=WEIGHT_DECAY)

        
        epoch_train_losses = []
        epoch_val_losses = []
        epoch_val_r2s = []
    

        for epoch in range(num_epochs):
            epoch_start = time.time()
            train_loss = train_epoch(model, train_loader, optimizer, quick)
            val_loss, val_r2 = validate_epoch(model, val_loader)

            epoch_train_losses.append(train_loss)
            epoch_val_losses.append(val_loss)
            epoch_val_r2s.append(val_r2.item())
            
            print_result(train_loss, val_loss, epoch_start, epoch, num_epochs, val_r2)

        r2.append(val_r2.item())
        fold_models.append(model)
        fold_metrics = compute_fold_metrics(epoch_train_losses, epoch_val_losses, epoch_val_r2s)
        print_fold_metrics(fold, fold_metrics)

    overall_r2 = np.array(r2).mean()
    
    print(f'\nOverall R² across all folds: {overall_r2:.4f}')

    return fold_models

In [156]:
# pretrained_backbone = pretrain_phase(extra_df, extra_img_path, num_epochs=10)

In [None]:
### NUM_FOLDS
# NUM_FT_EPOCHS
# kfold = GroupKFold(n_splits=NUM_FOLDS)
# groups = dataset_df['Month']
# splits = kfold.split(dataset_df, groups=groups)
from sklearn.model_selection import train_test_split
import numpy as np

# Make a single random split
train_idx, val_idx = train_test_split(
    np.arange(len(dataset_df)),
    test_size=0.2,
    shuffle=False,
    random_state=42
)

# Wrap it in a list so enumerate() still works
splits = [(train_idx, val_idx)]
import warnings
warnings.filterwarnings("ignore", message=".*does not have a deterministic implementation.*")
final_model = finetune_phase(pretrained_backbone=False,num_epochs=40, quick=False)
# Epoch 1/40 - Train Loss: 0.1645, Val Loss: 0.1424 | R²: 0.0942 | Time: 0m 40s
# Epoch 2/40 - Train Loss: 0.1284, Val Loss: 0.1148 | R²: 0.2658 | Time: 0m 27s
# Epoch 3/40 - Train Loss: 0.1011, Val Loss: 0.1003 | R²: 0.3627 | Time: 0m 27s
# Epoch 4/40 - Train Loss: 0.0802, Val Loss: 0.1005 | R²: 0.3636 | Time: 0m 27s
# Epoch 5/40 - Train Loss: 0.0816, Val Loss: 0.0999 | R²: 0.3692 | Time: 0m 27s
# Epoch 6/40 - Train Loss: 0.0742, Val Loss: 0.0894 | R²: 0.4357 | Time: 0m 27s
# Epoch 7/40 - Train Loss: 0.0757, Val Loss: 0.0892 | R²: 0.4407 | Time: 0m 27s
# Epoch 8/40 - Train Loss: 0.0713, Val Loss: 0.0929 | R²: 0.4147 | Time: 0m 27s
# Epoch 9/40 - Train Loss: 0.0741, Val Loss: 0.0890 | R²: 0.4445 | Time: 0m 27s
# Epoch 10/40 - Train Loss: 0.0657, Val Loss: 0.0834 | R²: 0.4740 | Time: 0m 27s
# Epoch 11/40 - Train Loss: 0.0662, Val Loss: 0.0873 | R²: 0.4419 | Time: 0m 27s
# Epoch 12/40 - Train Loss: 0.0577, Val Loss: 0.0813 | R²: 0.4830 | Time: 0m 27s
# Epoch 13/40 - Train Loss: 0.0529, Val Loss: 0.0846 | R²: 0.4671 | Time: 0m 27s
# Epoch 15/40 - Train Loss: 0.0508, Val Loss: 0.0844 | R²: 0.4676 | Time: 0m 28s
# Epoch 16/40 - Train Loss: 0.0593, Val Loss: 0.0844 | R²: 0.4698 | Time: 0m 27s
# Epoch 17/40 - Train Loss: 0.0553, Val Loss: 0.0799 | R²: 0.5002 | Time: 0m 28s
# Epoch 18/40 - Train Loss: 0.0503, Val Loss: 0.0881 | R²: 0.4480 | Time: 0m 28s
# Epoch 19/40 - Train Loss: 0.0533, Val Loss: 0.0807 | R²: 0.4952 | Time: 0m 28s
# --- efficientb 
# Epoch 1/40 - Train Loss: 0.1669, Val Loss: 0.1252 | R²: 0.2158 | Time: 0m 50s
# Epoch 2/40 - Train Loss: 0.1363, Val Loss: 0.1012 | R²: 0.3694 | Time: 0m 50s
# Epoch 3/40 - Train Loss: 0.1186, Val Loss: 0.0877 | R²: 0.4542 | Time: 0m 50s
# Epoch 4/40 - Train Loss: 0.1043, Val Loss: 0.0841 | R²: 0.4783 | Time: 0m 50s
# Epoch 5/40 - Train Loss: 0.0916, Val Loss: 0.0814 | R²: 0.4980 | Time: 0m 50s
# Epoch 6/40 - Train Loss: 0.0825, Val Loss: 0.0801 | R²: 0.5055 | Time: 0m 49s
# Epoch 7/40 - Train Loss: 0.0831, Val Loss: 0.0750 | R²: 0.5377 | Time: 0m 50s
# Epoch 8/40 - Train Loss: 0.0733, Val Loss: 0.0757 | R²: 0.5345 | Time: 0m 50s
# Epoch 9/40 - Train Loss: 0.0717, Val Loss: 0.0778 | R²: 0.5194 | Time: 0m 50s
# Epoch 10/40 - Train Loss: 0.0693, Val Loss: 0.0745 | R²: 0.5398 | Time: 0m 50s
# Epoch 11/40 - Train Loss: 0.0671, Val Loss: 0.0713 | R²: 0.5599 | Time: 0m 50s
# Epoch 12/40 - Train Loss: 0.0630, Val Loss: 0.0724 | R²: 0.5511 | Time: 0m 50s
# Epoch 13/40 - Train Loss: 0.0587, Val Loss: 0.0759 | R²: 0.5284 | Time: 0m 50s
# Epoch 14/40 - Train Loss: 0.0631, Val Loss: 0.0732 | R²: 0.5463 | Time: 0m 50s
# Epoch 15/40 - Train Loss: 0.0568, Val Loss: 0.0688 | R²: 0.5731 | Time: 0m 50s
# Epoch 16/40 - Train Loss: 0.0526, Val Loss: 0.0679 | R²: 0.5778 | Time: 0m 50s
# Epoch 17/40 - Train Loss: 0.0543, Val Loss: 0.0669 | R²: 0.5858 | Time: 0m 50s
# --- With  with higher weight decay



PHASE 2: FINE-TUNING ON COMPETITION DATA

--- Fold 1 Metrics Summary ---
Epoch 1/40 - Train Loss: 0.1575, Val Loss: 0.1277 | R²: 0.1926 | Time: 2m 36s
Epoch 2/40 - Train Loss: 0.1381, Val Loss: 0.1101 | R²: 0.3069 | Time: 2m 37s
Epoch 3/40 - Train Loss: 0.1231, Val Loss: 0.1014 | R²: 0.3653 | Time: 2m 41s
Epoch 4/40 - Train Loss: 0.1102, Val Loss: 0.0857 | R²: 0.4698 | Time: 2m 50s
Epoch 5/40 - Train Loss: 0.1034, Val Loss: 0.0860 | R²: 0.4670 | Time: 2m 44s
Epoch 6/40 - Train Loss: 0.0946, Val Loss: 0.0734 | R²: 0.5480 | Time: 2m 39s
Epoch 7/40 - Train Loss: 0.0855, Val Loss: 0.0762 | R²: 0.5293 | Time: 2m 40s
Epoch 8/40 - Train Loss: 0.0762, Val Loss: 0.0829 | R²: 0.4856 | Time: 2m 34s
Epoch 9/40 - Train Loss: 0.0694, Val Loss: 0.0803 | R²: 0.5006 | Time: 2m 32s
Epoch 10/40 - Train Loss: 0.0639, Val Loss: 0.0788 | R²: 0.5108 | Time: 2m 35s
Epoch 11/40 - Train Loss: 0.0619, Val Loss: 0.0800 | R²: 0.5032 | Time: 2m 34s
Epoch 12/40 - Train Loss: 0.0598, Val Loss: 0.0773 | R²: 0.5135 | 

In [None]:
test_dataset = BiomassDataset(test_df, base_path, transform=val_transform)
test_loader = DataLoader(test_dataset, batch_size=BATCH_SIZE, shuffle=False, num_workers=0)
fold_models = final_model
ensemble_outputs = []
with torch.no_grad():
    for image, img_path in test_loader:
        image = image.to(device)
        
        # Get predictions from all models
        all_outputs = []
        for model in fold_models:
            model.eval()
            outputs = model(image)
            print(outputs)
            all_outputs.append(outputs)
            ensemble_outputs.append(torch.stack(all_outputs).mean(dim=0))
        
ensemble_outputs = torch.cat(ensemble_outputs, dim=0)


In [None]:
test_dataset = BiomassDataset(test_df, base_path, transform=val_transform)
test_loader = DataLoader(test_dataset, batch_size=BATCH_SIZE, shuffle=False, num_workers=0)

# Dictionary to store predictions grouped by image_path
preds_by_image = defaultdict(list)

with torch.no_grad():
    for image, img_paths in test_loader:
        image = image.to(device)
        
        # Get ensemble predictions from all models
        all_outputs = []
        for model in fold_models:
            model.eval()
            outputs = model(image)
            all_outputs.append(outputs)
        
        # Average across models (ensemble)
        ensemble_batch = torch.stack(all_outputs).mean(dim=0)  # [batch_size, 5]
        
        # Group predictions by image_path
        # Since BiomassDataset returns 2 items per image (left/right halves),
        # img_paths is a tuple/list of image paths (may have duplicates)
        # We group by image_path and will average left/right later
        batch_size = ensemble_batch.shape[0]
        for i in range(batch_size):
            img_path = img_paths[i]  # Get the image_path for this batch item
            preds_by_image[img_path].append(ensemble_batch[i].cpu())

# Average left/right predictions for each image
image_predictions = {}
for img_path, preds in preds_by_image.items():
    # Stack and average: [num_halves, 5] -> [5]
    # Each image should have exactly 2 predictions (left + right)
    stacked_preds = torch.stack(preds)  # [2, 5] for left and right halves
    image_predictions[img_path] = stacked_preds.mean(dim=0)  # Average to [5]ee

# Check predictions for first image
if len(image_predictions) > 0:
    first_img_path = list(image_predictions.keys())[0]
    print(f"Image: {first_img_path}")
    print(f"Predictions: {image_predictions[first_img_path].tolist()}")
    print(f"Target columns: {TARGET_COLS}")
else:
    print("No predictions available yet. Run the test prediction cell first.")

In [None]:
# Denormalize all predictions back to original scale
for img_path in image_predictions:
    image_predictions[img_path] = denormalize_targets(image_predictions[img_path])

print(f"Denormalized predictions for {len(image_predictions)} images")


In [None]:
ensemble_outputs[0].tolist()

In [None]:
submission_df = pd.read_csv(submission_path)

for i, row in submission_df.iterrows():
    img_path = test_df[test_df['sample_id'] == row['sample_id']]['image_path'].values[0]
    target_name = test_df[test_df['sample_id'] == row['sample_id']]['target_name'].values[0]
    
    preds = ensemble_outputs
    target_idx = TARGET_COLS.index(target_name)
    submission_df.at[i, 'target'] = ensemble_outputs[i, target_idx].item()

submission_df.to_csv('submission.csv', index=False)
submission_df