In [1]:
import os
from kaggle_secrets import UserSecretsClient
user_secrets = UserSecretsClient()
TOKEN = user_secrets.get_secret("GITHUB_TOKEN")
USERNAME = 'ada-yl2425'
REPO_NAME = 'CSIRO---Image2Biomass-Prediction'
!git clone https://{USERNAME}:{TOKEN}@github.com/{USERNAME}/{REPO_NAME}.git
!git pull origin main
!ls

Cloning into 'CSIRO---Image2Biomass-Prediction'...
remote: Enumerating objects: 473, done.[K
remote: Counting objects: 100% (84/84), done.[K
remote: Compressing objects: 100% (69/69), done.[K
remote: Total 473 (delta 53), reused 29 (delta 15), pack-reused 389 (from 4)[K
Receiving objects: 100% (473/473), 1.02 GiB | 41.12 MiB/s, done.
Resolving deltas: 100% (57/57), done.
Updating files: 100% (377/377), done.
fatal: not a git repository (or any parent up to mount point /kaggle)
Stopping at filesystem boundary (GIT_DISCOVERY_ACROSS_FILESYSTEM not set).
CSIRO---Image2Biomass-Prediction  __notebook__.ipynb


In [2]:
!pip install torch torchvision pandas scikit-learn pillow tqdm timm

Collecting nvidia-cuda-nvrtc-cu12==12.4.127 (from torch)
  Downloading nvidia_cuda_nvrtc_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-cuda-runtime-cu12==12.4.127 (from torch)
  Downloading nvidia_cuda_runtime_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-cuda-cupti-cu12==12.4.127 (from torch)
  Downloading nvidia_cuda_cupti_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.6 kB)
Collecting nvidia-cudnn-cu12==9.1.0.70 (from torch)
  Downloading nvidia_cudnn_cu12-9.1.0.70-py3-none-manylinux2014_x86_64.whl.metadata (1.6 kB)
Collecting nvidia-cublas-cu12==12.4.5.8 (from torch)
  Downloading nvidia_cublas_cu12-12.4.5.8-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-cufft-cu12==11.2.1.3 (from torch)
  Downloading nvidia_cufft_cu12-11.2.1.3-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-curand-cu12==10.3.5.147 (from torch)
  Downloading nvidia_curan

In [3]:
import os
import argparse
import pandas as pd
import numpy as np
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import Dataset, DataLoader
from sklearn.model_selection import train_test_split, KFold
from torchvision import transforms
from PIL import Image
from tqdm import tqdm
import warnings

In [4]:
# 忽略 PIL 的一些警告
warnings.filterwarnings("ignore", "(Possibly corrupt EXIF data|Truncated File Read)")

In [5]:
# --- 1. 评估指标 (Weighted R2) ---
def calculate_weighted_r2(y_true, y_pred, device):
    """
    在原始尺度上计算全局加权 R2
    (此版本已修复，符合官方公式)
    y_true, y_pred: 形状为 [N, 5] 的张量 (在原始尺度上)
    """
    weights = torch.tensor([0.1, 0.1, 0.1, 0.2, 0.5], dtype=torch.float32).to(device) # 形状 [5]

    # --- 1. SS_res (Residual Sum of Squares) ---
    # 按照公式: SS_res = Σ w_j * (y_j - ŷ_j)^2
    # weights * (y_true - y_pred) ** 2 -> 广播 [5] 到 [N, 5]
    # torch.sum(...) -> 聚合 N*5 个元素
    ss_res = torch.sum(weights * (y_true - y_pred) ** 2)

    # --- 2. SS_tot (Total Sum of Squares) ---

    # [修复] 2a. 计算全局加权均值 ȳ_w (y_mean_w)
    # ȳ_w = (Σ w_j * y_j) / (Σ w_j)

    # 分子 (Numerator): Σ w_j * y_j
    # (weights * y_true) -> 广播 [5] 到 [N, 5]
    # torch.sum(...) -> 聚合 N*5 个元素
    sum_weighted_values = torch.sum(weights * y_true)

    # 分母 (Denominator): Σ w_j
    # 1. 将 weights [5] 广播到 [N, 5] (N是批量大小)
    weights_broadcasted = weights.expand_as(y_true)
    # 2. 计算总权重和 (这等于 N * 1.0)
    sum_of_all_weights = torch.sum(weights_broadcasted)

    # 计算 ȳ_w
    y_mean_w = sum_weighted_values / (sum_of_all_weights + 1e-6)

    # [修复] 2b. 计算 SS_tot
    # 按照公式: SS_tot = Σ w_j * (y_j - ȳ_w)^2
    # (y_true - y_mean_w) -> 广播标量 ȳ_w 到 [N, 5]
    # weights * (...) -> 广播 [5] 到 [N, 5]
    ss_tot = torch.sum(weights * (y_true - y_mean_w) ** 2)

    # --- 3. R2 ---
    r2 = 1.0 - (ss_res / (ss_tot + 1e-6)) # +1e-6 防止除以零
    return r2.item()

In [6]:
# --- 2. 自定义数据集 ---
class PastureDataset(Dataset):
    """
    加载图像、表格数据和目标
    """
    def __init__(self, df, img_dir, transforms, img_size): # <-- 增加了 img_size
        self.df = df
        self.img_dir = img_dir
        self.transforms = transforms
        self.img_size = img_size  # <-- 存储 img_size

        # 定义列名
        self.numeric_cols = ['Pre_GSHH_NDVI', 'Height_Ave_cm', 'month_sin', 'month_cos']
        self.categorical_cols = ['State_encoded', 'Species_encoded']

        # 训练目标 (log scale)
        self.log_target_cols = ['log_Dry_Green_g', 'log_Dry_Dead_g',
                                'log_Dry_Clover_g', 'log_GDM_g', 'log_Dry_Total_g']

        # 验证目标 (original scale)
        self.orig_target_cols = ['Dry_Green_g', 'Dry_Dead_g', 'Dry_Clover_g',
                                 'GDM_g', 'Dry_Total_g']

    def __len__(self):
        return len(self.df)

    def __getitem__(self, idx):
        row = self.df.iloc[idx]

        # 1. 加载图像
        # 您的路径逻辑是正确的，因为 'image_path' 索引包含 'train/' 前缀
        filename = row.name.split('/')[-1]
        img_path = os.path.join(self.img_dir, filename)

        try:
            image = Image.open(img_path).convert('RGB')
            image = self.transforms(image)
        except Exception as e:
            print(f"Warning: Error loading image {img_path}. Using a dummy image. Error: {e}")
            # *** 修复 ***: 使用传入的 img_size
            image = torch.zeros((3, self.img_size, self.img_size))

        # 2. 提取表格数据

        # ---
        # *** 关键修复 ***:
        # 在 .values 之后立即使用 .astype() 强制转换类型
        # ---
        numeric = torch.tensor(
            row[self.numeric_cols].values.astype(np.float32),
            dtype=torch.float32
        )

        categorical = torch.tensor(
            row[self.categorical_cols].values.astype(np.int64), # 类别用 int64
            dtype=torch.long
        )

        # 3. 提取目标 (同样应用修复)
        log_target = torch.tensor(
            row[self.log_target_cols].values.astype(np.float32),
            dtype=torch.float32
        )

        orig_target = torch.tensor(
            row[self.orig_target_cols].values.astype(np.float32),
            dtype=torch.float32
        )

        return {
            'image': image,
            'numeric': numeric,
            'categorical': categorical,
            'log_target': log_target,
            'orig_target': orig_target
        }

In [7]:
# --- 3. 训练和验证循环 ---

def train_one_epoch(model, loader, criterion, optimizer, device):
    model.train()
    total_loss = 0.0

    for batch in tqdm(loader, desc="Training"):
        # 移动数据到设备
        image = batch['image'].to(device)
        numeric = batch['numeric'].to(device)
        categorical = batch['categorical'].to(device)
        log_target = batch['log_target'].to(device)

        # 梯度清零
        optimizer.zero_grad()

        # 前向传播
        pred = model(image, numeric, categorical)

        # 计算损失 (在 log 尺度上)
        loss = criterion(pred, log_target)

        # 反向传播
        loss.backward()
        optimizer.step()

        total_loss += loss.item()

    return total_loss / len(loader)

def validate(model, loader, criterion, device):
    model.eval()
    total_val_loss = 0.0
    all_preds_orig = []
    all_targets_orig = []

    with torch.no_grad():
        for batch in tqdm(loader, desc="Validating"):
            image = batch['image'].to(device)
            numeric = batch['numeric'].to(device)
            categorical = batch['categorical'].to(device)
            log_target = batch['log_target'].to(device)
            orig_target = batch['orig_target'].to(device)

            # 预测 (log 尺度)
            pred_log = model(image, numeric, categorical)

            # 计算验证损失 (log 尺度)
            loss = criterion(pred_log, log_target)
            total_val_loss += loss.item()

            # 转换回原始尺度
            pred_orig = torch.expm1(pred_log)

            all_preds_orig.append(pred_orig)
            all_targets_orig.append(orig_target)

    # 拼接所有批次的结果
    all_preds_orig = torch.cat(all_preds_orig, dim=0)
    all_targets_orig = torch.cat(all_targets_orig, dim=0)

    # 计算 R2 (原始尺度)
    val_r2 = calculate_weighted_r2(all_targets_orig, all_preds_orig, device)

    avg_val_loss = total_val_loss / len(loader)

    return avg_val_loss, val_r2


In [8]:
# --- 4. 主函数 (已更新为 5-Fold CV + Early Stopping) ---
def main(args):
    # 设置设备
    device = torch.device("cuda" if torch.cuda.is_available() else "mps" if torch.backends.mps.is_available() else "cpu")
    print(f"Using device: {device}")

    # 加载数据
    df = pd.read_csv(args.data_csv, index_col='image_path')

    # 获取类别数量 (用于 Embedding)
    num_states = df['State_encoded'].nunique()
    num_species = df['Species_encoded'].nunique()
    print(f"Found {num_states} states and {num_species} species.")

    # 图像预处理
    train_transforms = transforms.Compose([
        transforms.Resize((args.img_size, args.img_size)),
        
        # --- 1. 几何变换 (强制模型去“寻找”目标) ---
        transforms.RandomHorizontalFlip(p=0.5),
        transforms.RandomVerticalFlip(p=0.5),
        transforms.RandomRotation(90),
        
        # [新] 仿射变换：平移 和 错切
        transforms.RandomAffine(
            degrees=0,
            translate=(0.15, 0.15),  # 随机平移 15%
            shear=15                 # 随机错切 15 度
        ), 

        # --- 2. 颜色变换 (模拟不同光照/季节) ---
        transforms.ColorJitter(
            brightness=0.3,
            contrast=0.3, 
            saturation=0.3, 
            hue=0.1
        ), 
        
        transforms.ToTensor(),
        transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
    ])

    # 验证集不使用增强，只做 Resize 和 Normalize
    val_transforms = transforms.Compose([
        transforms.Resize((args.img_size, args.img_size)),
        transforms.ToTensor(),
        transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
    ])

    # --- K-Fold Cross-Validation 设置 ---
    N_SPLITS = 5
    kf = KFold(n_splits=N_SPLITS, shuffle=True, random_state=42)

    all_fold_best_r2 = [] # 存储每一折的 R2 分数

    # --- K-Fold 训练循环 ---
    for fold, (train_indices, val_indices) in enumerate(kf.split(df)):
        print(f"========== FOLD {fold + 1}/{N_SPLITS} ==========")

        # 1. 为当前折创建数据
        train_df = df.iloc[train_indices]
        val_df = df.iloc[val_indices]

        # 2. 创建 Datasets 和 DataLoaders
        train_dataset = PastureDataset(train_df, args.img_dir, train_transforms, args.img_size)
        val_dataset = PastureDataset(val_df, args.img_dir, val_transforms, args.img_size)

        train_loader = DataLoader(train_dataset, batch_size=args.batch_size, shuffle=True, num_workers=args.num_workers)
        val_loader = DataLoader(val_dataset, batch_size=args.batch_size, shuffle=False, num_workers=args.num_workers)

        # 3. !! 为当前折重新初始化模型、损失和优化器 !!
        model = TeacherModel(num_states, num_species).to(device)
        criterion = WeightedMSELoss()
        # optimizer = optim.AdamW(model.parameters(), lr=args.lr, weight_decay=1e-3)

        # --- [关键修改] 设置差分学习率 (Differential LRs) ---
        
        # 1. 定义哪些模块属于“Head”（从零开始学）
        head_param_names = [
            'tab_mlp',
            'state_embedding',
            'species_embedding',
            'img_kv_projector',  # <-- [新]
            'tab_q_projector',   # <-- [新]
            'cross_attn',        # <-- [新]
            'attn_norm',         # <-- [新]
            'fusion_head'
        
            # [删除] 'img_pool' 和 'img_projector' 已被替换
        ]
        
        head_params = []
        backbone_params = []

        # 2. 将所有可训练参数 (requires_grad=True) 分配到两组
        for name, param in model.named_parameters():
            if not param.requires_grad:
                continue
                
            is_head = False
            for head_name in head_param_names:
                if name.startswith(head_name):
                    head_params.append(param)
                    is_head = True
                    break
            
            if not is_head:
                backbone_params.append(param)
                # print(f"Backbone param: {name}") # (用于调试)

        # 3. 创建参数组
        #    主干 (Backbone) 使用基础 LR (例如 5e-5)
        #    头部 (Head) 使用 10 倍的基础 LR (例如 5e-4)
        param_groups = [
            {'params': backbone_params, 'lr': args.lr}, 
            {'params': head_params, 'lr': args.lr * 10}  
        ]

        # print(f"设置差分学习率：Head LR = {args.lr * 10}, Backbone LR = {args.lr}")

        # 4. !! 为当前折重新初始化模型、损失和优化器 !!
        
        # [修改] 将 param_groups 传入优化器
        optimizer = optim.AdamW(param_groups, 
                              lr=args.lr, # (默认 LR，主要由 group 覆盖)
                              weight_decay=1e-3) 

        # -----------------------------------------------------        

        # 学习率调度器
        scheduler = optim.lr_scheduler.ReduceLROnPlateau(optimizer, 'max', patience=7, factor=0.1)

        # 5. 训练循环 (针对当前折)
        best_val_r2 = -float('inf')

        # --- [新] 早停变量 ---
        patience_counter = 0
        # -------------------------

        for epoch in range(args.epochs):
            print(f"--- Fold {fold+1}, Epoch {epoch+1}/{args.epochs} ---")

            train_loss = train_one_epoch(model, train_loader, criterion, optimizer, device)
            val_loss, val_r2 = validate(model, val_loader, criterion, device)

            print(f"Epoch {epoch+1}: Train Loss: {train_loss:.4f} | Val Loss: {val_loss:.4f} | Val R2: {val_r2:.4f}")

            # 更新学习率
            scheduler.step(val_r2)

            # --- [新] 早停和模型保存逻辑 ---
            if val_r2 > best_val_r2:
                best_val_r2 = val_r2
                patience_counter = 0 # 重置耐心

                # 保存最佳模型 (针对当前折)
                save_path = os.path.join(args.output_dir, f"best_teacher_model_fold_{fold+1}.pth")
                torch.save(model.state_dict(), save_path)
                print(f"New best model for fold {fold+1} saved with R2: {best_val_r2:.4f}")
            else:
                patience_counter += 1 # 增加耐心
                print(f"No improvement. Patience: {patience_counter}/{args.early_stopping_patience}")

            # 检查是否触发早停
            if patience_counter >= args.early_stopping_patience:
                print(f"--- Early stopping triggered at epoch {epoch+1} ---")
                break # 跳出当前 fold 的 epoch 循环
            # -----------------------------------

        print(f"Fold {fold+1} complete. Best Validation R2: {best_val_r2:.4f}")
        all_fold_best_r2.append(best_val_r2)
        print("=============================\n")

    # --- K-Fold 结束后，计算并打印平均 R2 ---
    print("\n--- K-Fold Cross-Validation Complete ---")
    print(f"R2 scores for each fold: {all_fold_best_r2}")
    print(f"Average R2: {np.mean(all_fold_best_r2):.4f}")
    print(f"Std Dev R2: {np.std(all_fold_best_r2):.4f}")

In [9]:
import sys
import os # 确保导入 os
import argparse # 确保导入 argparse

project_root = 'CSIRO---Image2Biomass-Prediction'
if project_root not in sys.path:
    sys.path.append(project_root)

# 导入您的模块
from KnowledgeDistillation.teacher_model import TeacherModel
from KnowledgeDistillation.loss import WeightedMSELoss

if __name__ == "__main__":
    parser = argparse.ArgumentParser(description="Train Teacher Model")

    # 使用 os.path.join 和您的 project_root 变量来构建绝对路径

    parser.add_argument('--data_csv', type=str,
                        default=os.path.join(project_root, 'csiro-biomass/preprocessing_output/train_processed.csv'),
                        help='Path to the processed training CSV file')

    parser.add_argument('--img_dir', type=str,
                        default=os.path.join(project_root, 'csiro-biomass/train'),
                        help='Path to the directory containing training images')

    # 指定一个明确的输出目录
    output_path = os.path.join(project_root, 'KnowledgeDistillation/teacher_model_output')
    parser.add_argument('--output_dir', type=str,
                        default=output_path,
                        help='Directory to save the best model')

    # --------------------------

    # 训练超参数
    parser.add_argument('--img_size', type=int, default=260, # <-- [修改] 从 240 改为 260
                        help='Image size for the model (B2 uses 260)')
    parser.add_argument('--lr', type=float, default=1e-4,
                        help='Initial learning rate (1e-4 is good for fine-tuning)')
    parser.add_argument('--batch_size', type=int, default=16,
                        help='Batch size (use 8 or 16 for small datasets)')
    parser.add_argument('--epochs', type=int, default=150,
                        help='Number of training epochs')
    parser.add_argument('--val_split', type=float, default=0.2,
                        help='Validation split fraction')
    parser.add_argument('--num_workers', type=int, default=2,
                        help='Number of workers for DataLoader')

    # --- [新] 早停参数 ---
    parser.add_argument('--early_stopping_patience', type=int, default=15,
                        help='Patience for early stopping (e.g., 15 epochs)')
    # -------------------------

    # 传入一个空列表，告诉 argparse "不要读取 sys.argv"
    args = parser.parse_args(args=[])

    # 确保输出目录存在
    # args.output_dir 现在是绝对路径
    os.makedirs(args.output_dir, exist_ok=True)
    print(f"Model output will be saved to: {args.output_dir}")
    print(f"Reading data from: {args.data_csv}")

    main(args)



Model output will be saved to: CSIRO---Image2Biomass-Prediction/KnowledgeDistillation/teacher_model_output
Reading data from: CSIRO---Image2Biomass-Prediction/csiro-biomass/preprocessing_output/train_processed.csv
Using device: cuda
Found 4 states and 15 species.


model.safetensors:   0%|          | 0.00/36.8M [00:00<?, ?B/s]

--- Fold 1, Epoch 1/150 ---


Training: 100%|██████████| 18/18 [00:10<00:00,  1.74it/s]
Validating: 100%|██████████| 5/5 [00:02<00:00,  1.97it/s]


Epoch 1: Train Loss: 1.1521 | Val Loss: 0.1884 | Val R2: -0.1391
New best model for fold 1 saved with R2: -0.1391
--- Fold 1, Epoch 2/150 ---


Training: 100%|██████████| 18/18 [00:09<00:00,  1.86it/s]
Validating: 100%|██████████| 5/5 [00:02<00:00,  2.11it/s]


Epoch 2: Train Loss: 0.1493 | Val Loss: 0.1203 | Val R2: 0.2044
New best model for fold 1 saved with R2: 0.2044
--- Fold 1, Epoch 3/150 ---


Training: 100%|██████████| 18/18 [00:09<00:00,  1.82it/s]
Validating: 100%|██████████| 5/5 [00:02<00:00,  2.11it/s]


Epoch 3: Train Loss: 0.1071 | Val Loss: 0.0616 | Val R2: 0.5565
New best model for fold 1 saved with R2: 0.5565
--- Fold 1, Epoch 4/150 ---


Training: 100%|██████████| 18/18 [00:09<00:00,  1.87it/s]
Validating: 100%|██████████| 5/5 [00:02<00:00,  2.09it/s]


Epoch 4: Train Loss: 0.1024 | Val Loss: 0.0774 | Val R2: 0.4570
No improvement. Patience: 1/15
--- Fold 1, Epoch 5/150 ---


Training: 100%|██████████| 18/18 [00:09<00:00,  1.89it/s]
Validating: 100%|██████████| 5/5 [00:02<00:00,  2.11it/s]


Epoch 5: Train Loss: 0.1003 | Val Loss: 0.0967 | Val R2: 0.0584
No improvement. Patience: 2/15
--- Fold 1, Epoch 6/150 ---


Training: 100%|██████████| 18/18 [00:09<00:00,  1.87it/s]
Validating: 100%|██████████| 5/5 [00:02<00:00,  2.02it/s]


Epoch 6: Train Loss: 0.0791 | Val Loss: 0.0883 | Val R2: 0.2978
No improvement. Patience: 3/15
--- Fold 1, Epoch 7/150 ---


Training: 100%|██████████| 18/18 [00:09<00:00,  1.86it/s]
Validating: 100%|██████████| 5/5 [00:02<00:00,  2.10it/s]


Epoch 7: Train Loss: 0.0844 | Val Loss: 0.0768 | Val R2: 0.3624
No improvement. Patience: 4/15
--- Fold 1, Epoch 8/150 ---


Training: 100%|██████████| 18/18 [00:09<00:00,  1.89it/s]
Validating: 100%|██████████| 5/5 [00:02<00:00,  2.03it/s]


Epoch 8: Train Loss: 0.0729 | Val Loss: 0.0639 | Val R2: 0.5302
No improvement. Patience: 5/15
--- Fold 1, Epoch 9/150 ---


Training: 100%|██████████| 18/18 [00:09<00:00,  1.84it/s]
Validating: 100%|██████████| 5/5 [00:02<00:00,  2.09it/s]


Epoch 9: Train Loss: 0.0831 | Val Loss: 0.0815 | Val R2: 0.4203
No improvement. Patience: 6/15
--- Fold 1, Epoch 10/150 ---


Training: 100%|██████████| 18/18 [00:09<00:00,  1.84it/s]
Validating: 100%|██████████| 5/5 [00:02<00:00,  2.10it/s]


Epoch 10: Train Loss: 0.0825 | Val Loss: 0.0860 | Val R2: 0.2866
No improvement. Patience: 7/15
--- Fold 1, Epoch 11/150 ---


Training: 100%|██████████| 18/18 [00:09<00:00,  1.86it/s]
Validating: 100%|██████████| 5/5 [00:02<00:00,  2.04it/s]


Epoch 11: Train Loss: 0.0856 | Val Loss: 0.0655 | Val R2: -0.0101
No improvement. Patience: 8/15
--- Fold 1, Epoch 12/150 ---


Training: 100%|██████████| 18/18 [00:09<00:00,  1.86it/s]
Validating: 100%|██████████| 5/5 [00:02<00:00,  2.04it/s]


Epoch 12: Train Loss: 0.0734 | Val Loss: 0.0562 | Val R2: -1.8806
No improvement. Patience: 9/15
--- Fold 1, Epoch 13/150 ---


Training: 100%|██████████| 18/18 [00:09<00:00,  1.92it/s]
Validating: 100%|██████████| 5/5 [00:02<00:00,  2.08it/s]


Epoch 13: Train Loss: 0.0657 | Val Loss: 0.0491 | Val R2: 0.6075
New best model for fold 1 saved with R2: 0.6075
--- Fold 1, Epoch 14/150 ---


Training: 100%|██████████| 18/18 [00:09<00:00,  1.87it/s]
Validating: 100%|██████████| 5/5 [00:02<00:00,  2.01it/s]


Epoch 14: Train Loss: 0.0622 | Val Loss: 0.0488 | Val R2: 0.6861
New best model for fold 1 saved with R2: 0.6861
--- Fold 1, Epoch 15/150 ---


Training: 100%|██████████| 18/18 [00:09<00:00,  1.88it/s]
Validating: 100%|██████████| 5/5 [00:02<00:00,  2.10it/s]


Epoch 15: Train Loss: 0.0564 | Val Loss: 0.0498 | Val R2: 0.6233
No improvement. Patience: 1/15
--- Fold 1, Epoch 16/150 ---


Training: 100%|██████████| 18/18 [00:09<00:00,  1.89it/s]
Validating: 100%|██████████| 5/5 [00:02<00:00,  2.00it/s]


Epoch 16: Train Loss: 0.0551 | Val Loss: 0.0488 | Val R2: 0.6814
No improvement. Patience: 2/15
--- Fold 1, Epoch 17/150 ---


Training: 100%|██████████| 18/18 [00:09<00:00,  1.85it/s]
Validating: 100%|██████████| 5/5 [00:02<00:00,  2.10it/s]


Epoch 17: Train Loss: 0.0577 | Val Loss: 0.0488 | Val R2: 0.6385
No improvement. Patience: 3/15
--- Fold 1, Epoch 18/150 ---


Training: 100%|██████████| 18/18 [00:09<00:00,  1.88it/s]
Validating: 100%|██████████| 5/5 [00:02<00:00,  2.03it/s]


Epoch 18: Train Loss: 0.0536 | Val Loss: 0.0507 | Val R2: 0.6909
New best model for fold 1 saved with R2: 0.6909
--- Fold 1, Epoch 19/150 ---


Training: 100%|██████████| 18/18 [00:09<00:00,  1.88it/s]
Validating: 100%|██████████| 5/5 [00:02<00:00,  2.05it/s]


Epoch 19: Train Loss: 0.0607 | Val Loss: 0.0461 | Val R2: 0.7446
New best model for fold 1 saved with R2: 0.7446
--- Fold 1, Epoch 20/150 ---


Training: 100%|██████████| 18/18 [00:09<00:00,  1.90it/s]
Validating: 100%|██████████| 5/5 [00:02<00:00,  2.15it/s]


Epoch 20: Train Loss: 0.0541 | Val Loss: 0.0485 | Val R2: 0.7011
No improvement. Patience: 1/15
--- Fold 1, Epoch 21/150 ---


Training: 100%|██████████| 18/18 [00:09<00:00,  1.87it/s]
Validating: 100%|██████████| 5/5 [00:02<00:00,  2.07it/s]


Epoch 21: Train Loss: 0.0568 | Val Loss: 0.0475 | Val R2: 0.4299
No improvement. Patience: 2/15
--- Fold 1, Epoch 22/150 ---


Training: 100%|██████████| 18/18 [00:09<00:00,  1.92it/s]
Validating: 100%|██████████| 5/5 [00:02<00:00,  2.05it/s]


Epoch 22: Train Loss: 0.0575 | Val Loss: 0.0488 | Val R2: 0.7110
No improvement. Patience: 3/15
--- Fold 1, Epoch 23/150 ---


Training: 100%|██████████| 18/18 [00:09<00:00,  1.89it/s]
Validating: 100%|██████████| 5/5 [00:02<00:00,  2.02it/s]


Epoch 23: Train Loss: 0.0454 | Val Loss: 0.0478 | Val R2: 0.7024
No improvement. Patience: 4/15
--- Fold 1, Epoch 24/150 ---


Training: 100%|██████████| 18/18 [00:09<00:00,  1.91it/s]
Validating: 100%|██████████| 5/5 [00:02<00:00,  1.98it/s]


Epoch 24: Train Loss: 0.0509 | Val Loss: 0.0475 | Val R2: 0.7623
New best model for fold 1 saved with R2: 0.7623
--- Fold 1, Epoch 25/150 ---


Training: 100%|██████████| 18/18 [00:09<00:00,  1.87it/s]
Validating: 100%|██████████| 5/5 [00:02<00:00,  2.03it/s]


Epoch 25: Train Loss: 0.0558 | Val Loss: 0.0461 | Val R2: 0.7466
No improvement. Patience: 1/15
--- Fold 1, Epoch 26/150 ---


Training: 100%|██████████| 18/18 [00:09<00:00,  1.82it/s]
Validating: 100%|██████████| 5/5 [00:02<00:00,  1.97it/s]


Epoch 26: Train Loss: 0.0455 | Val Loss: 0.0477 | Val R2: 0.7150
No improvement. Patience: 2/15
--- Fold 1, Epoch 27/150 ---


Training: 100%|██████████| 18/18 [00:09<00:00,  1.85it/s]
Validating: 100%|██████████| 5/5 [00:02<00:00,  2.03it/s]


Epoch 27: Train Loss: 0.0546 | Val Loss: 0.0502 | Val R2: 0.7076
No improvement. Patience: 3/15
--- Fold 1, Epoch 28/150 ---


Training: 100%|██████████| 18/18 [00:09<00:00,  1.86it/s]
Validating: 100%|██████████| 5/5 [00:02<00:00,  2.01it/s]


Epoch 28: Train Loss: 0.0525 | Val Loss: 0.0474 | Val R2: 0.7462
No improvement. Patience: 4/15
--- Fold 1, Epoch 29/150 ---


Training: 100%|██████████| 18/18 [00:10<00:00,  1.80it/s]
Validating: 100%|██████████| 5/5 [00:02<00:00,  2.09it/s]


Epoch 29: Train Loss: 0.0510 | Val Loss: 0.0452 | Val R2: 0.7573
No improvement. Patience: 5/15
--- Fold 1, Epoch 30/150 ---


Training: 100%|██████████| 18/18 [00:10<00:00,  1.79it/s]
Validating: 100%|██████████| 5/5 [00:02<00:00,  2.02it/s]


Epoch 30: Train Loss: 0.0492 | Val Loss: 0.0484 | Val R2: 0.7353
No improvement. Patience: 6/15
--- Fold 1, Epoch 31/150 ---


Training: 100%|██████████| 18/18 [00:09<00:00,  1.83it/s]
Validating: 100%|██████████| 5/5 [00:02<00:00,  1.98it/s]


Epoch 31: Train Loss: 0.0479 | Val Loss: 0.0489 | Val R2: 0.7302
No improvement. Patience: 7/15
--- Fold 1, Epoch 32/150 ---


Training: 100%|██████████| 18/18 [00:09<00:00,  1.86it/s]
Validating: 100%|██████████| 5/5 [00:02<00:00,  1.94it/s]


Epoch 32: Train Loss: 0.0490 | Val Loss: 0.0462 | Val R2: 0.7372
No improvement. Patience: 8/15
--- Fold 1, Epoch 33/150 ---


Training: 100%|██████████| 18/18 [00:09<00:00,  1.85it/s]
Validating: 100%|██████████| 5/5 [00:02<00:00,  1.99it/s]


Epoch 33: Train Loss: 0.0481 | Val Loss: 0.0471 | Val R2: 0.6975
No improvement. Patience: 9/15
--- Fold 1, Epoch 34/150 ---


Training: 100%|██████████| 18/18 [00:09<00:00,  1.83it/s]
Validating: 100%|██████████| 5/5 [00:02<00:00,  2.15it/s]


Epoch 34: Train Loss: 0.0473 | Val Loss: 0.0509 | Val R2: 0.6837
No improvement. Patience: 10/15
--- Fold 1, Epoch 35/150 ---


Training: 100%|██████████| 18/18 [00:09<00:00,  1.86it/s]
Validating: 100%|██████████| 5/5 [00:02<00:00,  2.01it/s]


Epoch 35: Train Loss: 0.0460 | Val Loss: 0.0485 | Val R2: 0.7010
No improvement. Patience: 11/15
--- Fold 1, Epoch 36/150 ---


Training: 100%|██████████| 18/18 [00:09<00:00,  1.86it/s]
Validating: 100%|██████████| 5/5 [00:02<00:00,  2.08it/s]


Epoch 36: Train Loss: 0.0459 | Val Loss: 0.0489 | Val R2: 0.7048
No improvement. Patience: 12/15
--- Fold 1, Epoch 37/150 ---


Training: 100%|██████████| 18/18 [00:09<00:00,  1.86it/s]
Validating: 100%|██████████| 5/5 [00:02<00:00,  2.01it/s]


Epoch 37: Train Loss: 0.0479 | Val Loss: 0.0459 | Val R2: 0.6743
No improvement. Patience: 13/15
--- Fold 1, Epoch 38/150 ---


Training: 100%|██████████| 18/18 [00:09<00:00,  1.90it/s]
Validating: 100%|██████████| 5/5 [00:02<00:00,  2.09it/s]


Epoch 38: Train Loss: 0.0490 | Val Loss: 0.0449 | Val R2: 0.5528
No improvement. Patience: 14/15
--- Fold 1, Epoch 39/150 ---


Training: 100%|██████████| 18/18 [00:09<00:00,  1.82it/s]
Validating: 100%|██████████| 5/5 [00:02<00:00,  2.19it/s]


Epoch 39: Train Loss: 0.0493 | Val Loss: 0.0511 | Val R2: 0.6914
No improvement. Patience: 15/15
--- Early stopping triggered at epoch 39 ---
Fold 1 complete. Best Validation R2: 0.7623

--- Fold 2, Epoch 1/150 ---


Training: 100%|██████████| 18/18 [00:09<00:00,  1.89it/s]
Validating: 100%|██████████| 5/5 [00:02<00:00,  2.16it/s]


Epoch 1: Train Loss: 0.8324 | Val Loss: 0.2282 | Val R2: -0.2393
New best model for fold 2 saved with R2: -0.2393
--- Fold 2, Epoch 2/150 ---


Training: 100%|██████████| 18/18 [00:09<00:00,  1.94it/s]
Validating: 100%|██████████| 5/5 [00:02<00:00,  2.09it/s]


Epoch 2: Train Loss: 0.1402 | Val Loss: 0.1355 | Val R2: 0.4788
New best model for fold 2 saved with R2: 0.4788
--- Fold 2, Epoch 3/150 ---


Training: 100%|██████████| 18/18 [00:09<00:00,  1.93it/s]
Validating: 100%|██████████| 5/5 [00:02<00:00,  2.08it/s]


Epoch 3: Train Loss: 0.1091 | Val Loss: 0.0956 | Val R2: 0.3438
No improvement. Patience: 1/15
--- Fold 2, Epoch 4/150 ---


Training: 100%|██████████| 18/18 [00:09<00:00,  1.97it/s]
Validating: 100%|██████████| 5/5 [00:02<00:00,  2.17it/s]


Epoch 4: Train Loss: 0.0988 | Val Loss: 0.0815 | Val R2: 0.5614
New best model for fold 2 saved with R2: 0.5614
--- Fold 2, Epoch 5/150 ---


Training: 100%|██████████| 18/18 [00:09<00:00,  1.84it/s]
Validating: 100%|██████████| 5/5 [00:02<00:00,  2.14it/s]


Epoch 5: Train Loss: 0.0862 | Val Loss: 0.1044 | Val R2: 0.2900
No improvement. Patience: 1/15
--- Fold 2, Epoch 6/150 ---


Training: 100%|██████████| 18/18 [00:09<00:00,  1.96it/s]
Validating: 100%|██████████| 5/5 [00:02<00:00,  2.18it/s]


Epoch 6: Train Loss: 0.0859 | Val Loss: 0.1114 | Val R2: 0.2159
No improvement. Patience: 2/15
--- Fold 2, Epoch 7/150 ---


Training: 100%|██████████| 18/18 [00:09<00:00,  1.97it/s]
Validating: 100%|██████████| 5/5 [00:02<00:00,  2.10it/s]


Epoch 7: Train Loss: 0.0796 | Val Loss: 0.0808 | Val R2: 0.3802
No improvement. Patience: 3/15
--- Fold 2, Epoch 8/150 ---


Training: 100%|██████████| 18/18 [00:09<00:00,  1.86it/s]
Validating: 100%|██████████| 5/5 [00:02<00:00,  2.10it/s]


Epoch 8: Train Loss: 0.0651 | Val Loss: 0.0649 | Val R2: 0.5437
No improvement. Patience: 4/15
--- Fold 2, Epoch 9/150 ---


Training: 100%|██████████| 18/18 [00:09<00:00,  1.96it/s]
Validating: 100%|██████████| 5/5 [00:02<00:00,  2.06it/s]


Epoch 9: Train Loss: 0.0693 | Val Loss: 0.0866 | Val R2: 0.4814
No improvement. Patience: 5/15
--- Fold 2, Epoch 10/150 ---


Training: 100%|██████████| 18/18 [00:09<00:00,  1.94it/s]
Validating: 100%|██████████| 5/5 [00:02<00:00,  2.15it/s]


Epoch 10: Train Loss: 0.0633 | Val Loss: 0.0805 | Val R2: 0.5481
No improvement. Patience: 6/15
--- Fold 2, Epoch 11/150 ---


Training: 100%|██████████| 18/18 [00:09<00:00,  1.88it/s]
Validating: 100%|██████████| 5/5 [00:02<00:00,  2.08it/s]


Epoch 11: Train Loss: 0.0657 | Val Loss: 0.0741 | Val R2: 0.4588
No improvement. Patience: 7/15
--- Fold 2, Epoch 12/150 ---


Training: 100%|██████████| 18/18 [00:09<00:00,  1.94it/s]
Validating: 100%|██████████| 5/5 [00:02<00:00,  2.17it/s]


Epoch 12: Train Loss: 0.0678 | Val Loss: 0.0572 | Val R2: 0.6009
New best model for fold 2 saved with R2: 0.6009
--- Fold 2, Epoch 13/150 ---


Training: 100%|██████████| 18/18 [00:09<00:00,  1.88it/s]
Validating: 100%|██████████| 5/5 [00:02<00:00,  2.15it/s]


Epoch 13: Train Loss: 0.0656 | Val Loss: 0.0743 | Val R2: 0.6439
New best model for fold 2 saved with R2: 0.6439
--- Fold 2, Epoch 14/150 ---


Training: 100%|██████████| 18/18 [00:09<00:00,  1.87it/s]
Validating: 100%|██████████| 5/5 [00:02<00:00,  2.13it/s]


Epoch 14: Train Loss: 0.0587 | Val Loss: 0.0690 | Val R2: 0.4508
No improvement. Patience: 1/15
--- Fold 2, Epoch 15/150 ---


Training: 100%|██████████| 18/18 [00:09<00:00,  1.96it/s]
Validating: 100%|██████████| 5/5 [00:02<00:00,  2.17it/s]


Epoch 15: Train Loss: 0.0646 | Val Loss: 0.0560 | Val R2: 0.6965
New best model for fold 2 saved with R2: 0.6965
--- Fold 2, Epoch 16/150 ---


Training: 100%|██████████| 18/18 [00:09<00:00,  1.86it/s]
Validating: 100%|██████████| 5/5 [00:02<00:00,  2.04it/s]


Epoch 16: Train Loss: 0.0616 | Val Loss: 0.0903 | Val R2: 0.5601
No improvement. Patience: 1/15
--- Fold 2, Epoch 17/150 ---


Training: 100%|██████████| 18/18 [00:09<00:00,  1.88it/s]
Validating: 100%|██████████| 5/5 [00:02<00:00,  2.19it/s]


Epoch 17: Train Loss: 0.0577 | Val Loss: 0.0707 | Val R2: 0.6621
No improvement. Patience: 2/15
--- Fold 2, Epoch 18/150 ---


Training: 100%|██████████| 18/18 [00:09<00:00,  1.90it/s]
Validating: 100%|██████████| 5/5 [00:02<00:00,  2.19it/s]


Epoch 18: Train Loss: 0.0572 | Val Loss: 0.0659 | Val R2: 0.4419
No improvement. Patience: 3/15
--- Fold 2, Epoch 19/150 ---


Training: 100%|██████████| 18/18 [00:09<00:00,  1.87it/s]
Validating: 100%|██████████| 5/5 [00:02<00:00,  2.10it/s]


Epoch 19: Train Loss: 0.0584 | Val Loss: 0.0728 | Val R2: 0.4644
No improvement. Patience: 4/15
--- Fold 2, Epoch 20/150 ---


Training: 100%|██████████| 18/18 [00:09<00:00,  1.95it/s]
Validating: 100%|██████████| 5/5 [00:02<00:00,  2.13it/s]


Epoch 20: Train Loss: 0.0497 | Val Loss: 0.0692 | Val R2: 0.5613
No improvement. Patience: 5/15
--- Fold 2, Epoch 21/150 ---


Training: 100%|██████████| 18/18 [00:09<00:00,  1.99it/s]
Validating: 100%|██████████| 5/5 [00:02<00:00,  2.15it/s]


Epoch 21: Train Loss: 0.0511 | Val Loss: 0.0626 | Val R2: 0.3859
No improvement. Patience: 6/15
--- Fold 2, Epoch 22/150 ---


Training: 100%|██████████| 18/18 [00:09<00:00,  1.86it/s]
Validating: 100%|██████████| 5/5 [00:02<00:00,  2.13it/s]


Epoch 22: Train Loss: 0.0568 | Val Loss: 0.0565 | Val R2: 0.5582
No improvement. Patience: 7/15
--- Fold 2, Epoch 23/150 ---


Training: 100%|██████████| 18/18 [00:09<00:00,  1.95it/s]
Validating: 100%|██████████| 5/5 [00:02<00:00,  2.18it/s]


Epoch 23: Train Loss: 0.0541 | Val Loss: 0.0601 | Val R2: 0.4966
No improvement. Patience: 8/15
--- Fold 2, Epoch 24/150 ---


Training: 100%|██████████| 18/18 [00:09<00:00,  1.90it/s]
Validating: 100%|██████████| 5/5 [00:02<00:00,  2.16it/s]


Epoch 24: Train Loss: 0.0428 | Val Loss: 0.0531 | Val R2: 0.5572
No improvement. Patience: 9/15
--- Fold 2, Epoch 25/150 ---


Training: 100%|██████████| 18/18 [00:09<00:00,  1.89it/s]
Validating: 100%|██████████| 5/5 [00:02<00:00,  2.15it/s]


Epoch 25: Train Loss: 0.0427 | Val Loss: 0.0567 | Val R2: 0.5395
No improvement. Patience: 10/15
--- Fold 2, Epoch 26/150 ---


Training: 100%|██████████| 18/18 [00:09<00:00,  1.92it/s]
Validating: 100%|██████████| 5/5 [00:02<00:00,  2.15it/s]


Epoch 26: Train Loss: 0.0386 | Val Loss: 0.0567 | Val R2: 0.5904
No improvement. Patience: 11/15
--- Fold 2, Epoch 27/150 ---


Training: 100%|██████████| 18/18 [00:09<00:00,  1.84it/s]
Validating: 100%|██████████| 5/5 [00:02<00:00,  2.13it/s]


Epoch 27: Train Loss: 0.0434 | Val Loss: 0.0601 | Val R2: 0.5341
No improvement. Patience: 12/15
--- Fold 2, Epoch 28/150 ---


Training: 100%|██████████| 18/18 [00:09<00:00,  1.93it/s]
Validating: 100%|██████████| 5/5 [00:02<00:00,  2.14it/s]


Epoch 28: Train Loss: 0.0386 | Val Loss: 0.0602 | Val R2: 0.5020
No improvement. Patience: 13/15
--- Fold 2, Epoch 29/150 ---


Training: 100%|██████████| 18/18 [00:09<00:00,  1.85it/s]
Validating: 100%|██████████| 5/5 [00:02<00:00,  2.08it/s]


Epoch 29: Train Loss: 0.0392 | Val Loss: 0.0514 | Val R2: 0.6537
No improvement. Patience: 14/15
--- Fold 2, Epoch 30/150 ---


Training: 100%|██████████| 18/18 [00:09<00:00,  1.88it/s]
Validating: 100%|██████████| 5/5 [00:02<00:00,  2.11it/s]


Epoch 30: Train Loss: 0.0368 | Val Loss: 0.0522 | Val R2: 0.5827
No improvement. Patience: 15/15
--- Early stopping triggered at epoch 30 ---
Fold 2 complete. Best Validation R2: 0.6965

--- Fold 3, Epoch 1/150 ---


Training: 100%|██████████| 18/18 [00:10<00:00,  1.76it/s]
Validating: 100%|██████████| 5/5 [00:02<00:00,  1.98it/s]


Epoch 1: Train Loss: 0.9768 | Val Loss: 0.1370 | Val R2: 0.2937
New best model for fold 3 saved with R2: 0.2937
--- Fold 3, Epoch 2/150 ---


Training: 100%|██████████| 18/18 [00:10<00:00,  1.75it/s]
Validating: 100%|██████████| 5/5 [00:02<00:00,  1.97it/s]


Epoch 2: Train Loss: 0.1625 | Val Loss: 0.0839 | Val R2: 0.5359
New best model for fold 3 saved with R2: 0.5359
--- Fold 3, Epoch 3/150 ---


Training: 100%|██████████| 18/18 [00:10<00:00,  1.75it/s]
Validating: 100%|██████████| 5/5 [00:02<00:00,  1.98it/s]


Epoch 3: Train Loss: 0.1309 | Val Loss: 0.0661 | Val R2: 0.5088
No improvement. Patience: 1/15
--- Fold 3, Epoch 4/150 ---


Training: 100%|██████████| 18/18 [00:10<00:00,  1.78it/s]
Validating: 100%|██████████| 5/5 [00:02<00:00,  1.96it/s]


Epoch 4: Train Loss: 0.0987 | Val Loss: 0.0770 | Val R2: 0.5352
No improvement. Patience: 2/15
--- Fold 3, Epoch 5/150 ---


Training: 100%|██████████| 18/18 [00:10<00:00,  1.75it/s]
Validating: 100%|██████████| 5/5 [00:02<00:00,  1.99it/s]


Epoch 5: Train Loss: 0.1089 | Val Loss: 0.0916 | Val R2: 0.3507
No improvement. Patience: 3/15
--- Fold 3, Epoch 6/150 ---


Training: 100%|██████████| 18/18 [00:10<00:00,  1.77it/s]
Validating: 100%|██████████| 5/5 [00:02<00:00,  1.95it/s]


Epoch 6: Train Loss: 0.1049 | Val Loss: 0.0735 | Val R2: 0.4412
No improvement. Patience: 4/15
--- Fold 3, Epoch 7/150 ---


Training: 100%|██████████| 18/18 [00:10<00:00,  1.76it/s]
Validating: 100%|██████████| 5/5 [00:02<00:00,  1.93it/s]


Epoch 7: Train Loss: 0.0841 | Val Loss: 0.0554 | Val R2: 0.6435
New best model for fold 3 saved with R2: 0.6435
--- Fold 3, Epoch 8/150 ---


Training: 100%|██████████| 18/18 [00:09<00:00,  1.82it/s]
Validating: 100%|██████████| 5/5 [00:02<00:00,  2.01it/s]


Epoch 8: Train Loss: 0.0855 | Val Loss: 0.0543 | Val R2: 0.6551
New best model for fold 3 saved with R2: 0.6551
--- Fold 3, Epoch 9/150 ---


Training: 100%|██████████| 18/18 [00:10<00:00,  1.79it/s]
Validating: 100%|██████████| 5/5 [00:02<00:00,  1.99it/s]


Epoch 9: Train Loss: 0.0858 | Val Loss: 0.0554 | Val R2: 0.7400
New best model for fold 3 saved with R2: 0.7400
--- Fold 3, Epoch 10/150 ---


Training: 100%|██████████| 18/18 [00:10<00:00,  1.77it/s]
Validating: 100%|██████████| 5/5 [00:02<00:00,  1.99it/s]


Epoch 10: Train Loss: 0.0866 | Val Loss: 0.0639 | Val R2: 0.6018
No improvement. Patience: 1/15
--- Fold 3, Epoch 11/150 ---


Training: 100%|██████████| 18/18 [00:10<00:00,  1.79it/s]
Validating: 100%|██████████| 5/5 [00:02<00:00,  1.97it/s]


Epoch 11: Train Loss: 0.0827 | Val Loss: 0.0680 | Val R2: 0.5811
No improvement. Patience: 2/15
--- Fold 3, Epoch 12/150 ---


Training: 100%|██████████| 18/18 [00:10<00:00,  1.77it/s]
Validating: 100%|██████████| 5/5 [00:02<00:00,  1.97it/s]


Epoch 12: Train Loss: 0.0819 | Val Loss: 0.0651 | Val R2: 0.6244
No improvement. Patience: 3/15
--- Fold 3, Epoch 13/150 ---


Training: 100%|██████████| 18/18 [00:10<00:00,  1.75it/s]
Validating: 100%|██████████| 5/5 [00:02<00:00,  1.98it/s]


Epoch 13: Train Loss: 0.0842 | Val Loss: 0.0648 | Val R2: 0.5731
No improvement. Patience: 4/15
--- Fold 3, Epoch 14/150 ---


Training: 100%|██████████| 18/18 [00:10<00:00,  1.76it/s]
Validating: 100%|██████████| 5/5 [00:02<00:00,  2.00it/s]


Epoch 14: Train Loss: 0.0731 | Val Loss: 0.0482 | Val R2: 0.7537
New best model for fold 3 saved with R2: 0.7537
--- Fold 3, Epoch 15/150 ---


Training: 100%|██████████| 18/18 [00:10<00:00,  1.78it/s]
Validating: 100%|██████████| 5/5 [00:02<00:00,  1.98it/s]


Epoch 15: Train Loss: 0.0697 | Val Loss: 0.0483 | Val R2: 0.5414
No improvement. Patience: 1/15
--- Fold 3, Epoch 16/150 ---


Training: 100%|██████████| 18/18 [00:10<00:00,  1.77it/s]
Validating: 100%|██████████| 5/5 [00:02<00:00,  2.00it/s]


Epoch 16: Train Loss: 0.0694 | Val Loss: 0.0433 | Val R2: 0.7253
No improvement. Patience: 2/15
--- Fold 3, Epoch 17/150 ---


Training: 100%|██████████| 18/18 [00:10<00:00,  1.78it/s]
Validating: 100%|██████████| 5/5 [00:02<00:00,  1.98it/s]


Epoch 17: Train Loss: 0.0671 | Val Loss: 0.0494 | Val R2: 0.6378
No improvement. Patience: 3/15
--- Fold 3, Epoch 18/150 ---


Training: 100%|██████████| 18/18 [00:10<00:00,  1.78it/s]
Validating: 100%|██████████| 5/5 [00:02<00:00,  1.98it/s]


Epoch 18: Train Loss: 0.0755 | Val Loss: 0.0485 | Val R2: 0.6408
No improvement. Patience: 4/15
--- Fold 3, Epoch 19/150 ---


Training: 100%|██████████| 18/18 [00:10<00:00,  1.77it/s]
Validating: 100%|██████████| 5/5 [00:02<00:00,  1.97it/s]


Epoch 19: Train Loss: 0.0774 | Val Loss: 0.0468 | Val R2: 0.6969
No improvement. Patience: 5/15
--- Fold 3, Epoch 20/150 ---


Training: 100%|██████████| 18/18 [00:10<00:00,  1.77it/s]
Validating: 100%|██████████| 5/5 [00:02<00:00,  1.95it/s]


Epoch 20: Train Loss: 0.0651 | Val Loss: 0.0536 | Val R2: 0.6977
No improvement. Patience: 6/15
--- Fold 3, Epoch 21/150 ---


Training: 100%|██████████| 18/18 [00:10<00:00,  1.76it/s]
Validating: 100%|██████████| 5/5 [00:02<00:00,  2.01it/s]


Epoch 21: Train Loss: 0.0723 | Val Loss: 0.0491 | Val R2: 0.4275
No improvement. Patience: 7/15
--- Fold 3, Epoch 22/150 ---


Training: 100%|██████████| 18/18 [00:10<00:00,  1.74it/s]
Validating: 100%|██████████| 5/5 [00:02<00:00,  1.96it/s]


Epoch 22: Train Loss: 0.0714 | Val Loss: 0.0629 | Val R2: 0.4746
No improvement. Patience: 8/15
--- Fold 3, Epoch 23/150 ---


Training: 100%|██████████| 18/18 [00:10<00:00,  1.77it/s]
Validating: 100%|██████████| 5/5 [00:02<00:00,  2.02it/s]


Epoch 23: Train Loss: 0.0618 | Val Loss: 0.0533 | Val R2: 0.6762
No improvement. Patience: 9/15
--- Fold 3, Epoch 24/150 ---


Training: 100%|██████████| 18/18 [00:10<00:00,  1.77it/s]
Validating: 100%|██████████| 5/5 [00:02<00:00,  2.02it/s]


Epoch 24: Train Loss: 0.0531 | Val Loss: 0.0539 | Val R2: 0.6549
No improvement. Patience: 10/15
--- Fold 3, Epoch 25/150 ---


Training: 100%|██████████| 18/18 [00:10<00:00,  1.77it/s]
Validating: 100%|██████████| 5/5 [00:02<00:00,  1.99it/s]


Epoch 25: Train Loss: 0.0562 | Val Loss: 0.0482 | Val R2: 0.6792
No improvement. Patience: 11/15
--- Fold 3, Epoch 26/150 ---


Training: 100%|██████████| 18/18 [00:10<00:00,  1.77it/s]
Validating: 100%|██████████| 5/5 [00:02<00:00,  1.97it/s]


Epoch 26: Train Loss: 0.0531 | Val Loss: 0.0463 | Val R2: 0.7044
No improvement. Patience: 12/15
--- Fold 3, Epoch 27/150 ---


Training: 100%|██████████| 18/18 [00:10<00:00,  1.77it/s]
Validating: 100%|██████████| 5/5 [00:02<00:00,  2.03it/s]


Epoch 27: Train Loss: 0.0500 | Val Loss: 0.0464 | Val R2: 0.6972
No improvement. Patience: 13/15
--- Fold 3, Epoch 28/150 ---


Training: 100%|██████████| 18/18 [00:10<00:00,  1.79it/s]
Validating: 100%|██████████| 5/5 [00:02<00:00,  1.96it/s]


Epoch 28: Train Loss: 0.0558 | Val Loss: 0.0466 | Val R2: 0.7126
No improvement. Patience: 14/15
--- Fold 3, Epoch 29/150 ---


Training: 100%|██████████| 18/18 [00:10<00:00,  1.77it/s]
Validating: 100%|██████████| 5/5 [00:02<00:00,  1.98it/s]


Epoch 29: Train Loss: 0.0552 | Val Loss: 0.0444 | Val R2: 0.7377
No improvement. Patience: 15/15
--- Early stopping triggered at epoch 29 ---
Fold 3 complete. Best Validation R2: 0.7537

--- Fold 4, Epoch 1/150 ---


Training: 100%|██████████| 18/18 [00:10<00:00,  1.76it/s]
Validating: 100%|██████████| 5/5 [00:02<00:00,  1.97it/s]


Epoch 1: Train Loss: 1.0333 | Val Loss: 0.2321 | Val R2: -0.2143
New best model for fold 4 saved with R2: -0.2143
--- Fold 4, Epoch 2/150 ---


Training: 100%|██████████| 18/18 [00:10<00:00,  1.73it/s]
Validating: 100%|██████████| 5/5 [00:02<00:00,  1.97it/s]


Epoch 2: Train Loss: 0.1552 | Val Loss: 0.0955 | Val R2: 0.1428
New best model for fold 4 saved with R2: 0.1428
--- Fold 4, Epoch 3/150 ---


Training: 100%|██████████| 18/18 [00:10<00:00,  1.77it/s]
Validating: 100%|██████████| 5/5 [00:02<00:00,  1.97it/s]


Epoch 3: Train Loss: 0.1098 | Val Loss: 0.0817 | Val R2: 0.5699
New best model for fold 4 saved with R2: 0.5699
--- Fold 4, Epoch 4/150 ---


Training: 100%|██████████| 18/18 [00:10<00:00,  1.76it/s]
Validating: 100%|██████████| 5/5 [00:02<00:00,  1.92it/s]


Epoch 4: Train Loss: 0.1041 | Val Loss: 0.1175 | Val R2: -0.1574
No improvement. Patience: 1/15
--- Fold 4, Epoch 5/150 ---


Training: 100%|██████████| 18/18 [00:10<00:00,  1.77it/s]
Validating: 100%|██████████| 5/5 [00:02<00:00,  2.00it/s]


Epoch 5: Train Loss: 0.1012 | Val Loss: 0.0715 | Val R2: 0.2122
No improvement. Patience: 2/15
--- Fold 4, Epoch 6/150 ---


Training: 100%|██████████| 18/18 [00:10<00:00,  1.77it/s]
Validating: 100%|██████████| 5/5 [00:02<00:00,  1.97it/s]


Epoch 6: Train Loss: 0.0848 | Val Loss: 0.2257 | Val R2: -9091455.0000
No improvement. Patience: 3/15
--- Fold 4, Epoch 7/150 ---


Training: 100%|██████████| 18/18 [00:10<00:00,  1.77it/s]
Validating: 100%|██████████| 5/5 [00:02<00:00,  1.93it/s]


Epoch 7: Train Loss: 0.0669 | Val Loss: 0.4911 | Val R2: -5353191768064.0000
No improvement. Patience: 4/15
--- Fold 4, Epoch 8/150 ---


Training: 100%|██████████| 18/18 [00:10<00:00,  1.78it/s]
Validating: 100%|██████████| 5/5 [00:02<00:00,  2.00it/s]


Epoch 8: Train Loss: 0.0736 | Val Loss: 0.0766 | Val R2: -11.7083
No improvement. Patience: 5/15
--- Fold 4, Epoch 9/150 ---


Training: 100%|██████████| 18/18 [00:10<00:00,  1.76it/s]
Validating: 100%|██████████| 5/5 [00:02<00:00,  1.98it/s]


Epoch 9: Train Loss: 0.0870 | Val Loss: 0.0949 | Val R2: -826.5238
No improvement. Patience: 6/15
--- Fold 4, Epoch 10/150 ---


Training: 100%|██████████| 18/18 [00:10<00:00,  1.76it/s]
Validating: 100%|██████████| 5/5 [00:02<00:00,  1.99it/s]


Epoch 10: Train Loss: 0.0766 | Val Loss: 0.3103 | Val R2: -7388270.5000
No improvement. Patience: 7/15
--- Fold 4, Epoch 11/150 ---


Training: 100%|██████████| 18/18 [00:10<00:00,  1.78it/s]
Validating: 100%|██████████| 5/5 [00:02<00:00,  1.95it/s]


Epoch 11: Train Loss: 0.0653 | Val Loss: 0.6622 | Val R2: -7502237882384384.0000
No improvement. Patience: 8/15
--- Fold 4, Epoch 12/150 ---


Training: 100%|██████████| 18/18 [00:10<00:00,  1.75it/s]
Validating: 100%|██████████| 5/5 [00:02<00:00,  1.97it/s]


Epoch 12: Train Loss: 0.0637 | Val Loss: 0.0704 | Val R2: -10.0233
No improvement. Patience: 9/15
--- Fold 4, Epoch 13/150 ---


Training: 100%|██████████| 18/18 [00:10<00:00,  1.76it/s]
Validating: 100%|██████████| 5/5 [00:02<00:00,  1.99it/s]


Epoch 13: Train Loss: 0.0617 | Val Loss: 0.0448 | Val R2: 0.6565
New best model for fold 4 saved with R2: 0.6565
--- Fold 4, Epoch 14/150 ---


Training: 100%|██████████| 18/18 [00:10<00:00,  1.74it/s]
Validating: 100%|██████████| 5/5 [00:02<00:00,  1.95it/s]


Epoch 14: Train Loss: 0.0584 | Val Loss: 0.0613 | Val R2: -2.2714
No improvement. Patience: 1/15
--- Fold 4, Epoch 15/150 ---


Training: 100%|██████████| 18/18 [00:10<00:00,  1.76it/s]
Validating: 100%|██████████| 5/5 [00:02<00:00,  1.94it/s]


Epoch 15: Train Loss: 0.0548 | Val Loss: 0.0646 | Val R2: -8.8979
No improvement. Patience: 2/15
--- Fold 4, Epoch 16/150 ---


Training: 100%|██████████| 18/18 [00:10<00:00,  1.75it/s]
Validating: 100%|██████████| 5/5 [00:02<00:00,  2.00it/s]


Epoch 16: Train Loss: 0.0555 | Val Loss: 0.1397 | Val R2: -7665.4746
No improvement. Patience: 3/15
--- Fold 4, Epoch 17/150 ---


Training: 100%|██████████| 18/18 [00:10<00:00,  1.75it/s]
Validating: 100%|██████████| 5/5 [00:02<00:00,  1.99it/s]


Epoch 17: Train Loss: 0.0513 | Val Loss: 0.0800 | Val R2: -33.0563
No improvement. Patience: 4/15
--- Fold 4, Epoch 18/150 ---


Training: 100%|██████████| 18/18 [00:10<00:00,  1.76it/s]
Validating: 100%|██████████| 5/5 [00:02<00:00,  1.98it/s]


Epoch 18: Train Loss: 0.0484 | Val Loss: 0.0396 | Val R2: 0.6886
New best model for fold 4 saved with R2: 0.6886
--- Fold 4, Epoch 19/150 ---


Training: 100%|██████████| 18/18 [00:10<00:00,  1.77it/s]
Validating: 100%|██████████| 5/5 [00:02<00:00,  2.01it/s]


Epoch 19: Train Loss: 0.0529 | Val Loss: 0.2957 | Val R2: -81640032.0000
No improvement. Patience: 1/15
--- Fold 4, Epoch 20/150 ---


Training: 100%|██████████| 18/18 [00:10<00:00,  1.76it/s]
Validating: 100%|██████████| 5/5 [00:02<00:00,  2.01it/s]


Epoch 20: Train Loss: 0.0514 | Val Loss: 0.1287 | Val R2: -187233.9219
No improvement. Patience: 2/15
--- Fold 4, Epoch 21/150 ---


Training: 100%|██████████| 18/18 [00:10<00:00,  1.76it/s]
Validating: 100%|██████████| 5/5 [00:02<00:00,  1.97it/s]


Epoch 21: Train Loss: 0.0493 | Val Loss: 0.0381 | Val R2: 0.7660
New best model for fold 4 saved with R2: 0.7660
--- Fold 4, Epoch 22/150 ---


Training: 100%|██████████| 18/18 [00:10<00:00,  1.75it/s]
Validating: 100%|██████████| 5/5 [00:02<00:00,  1.98it/s]


Epoch 22: Train Loss: 0.0490 | Val Loss: 0.0393 | Val R2: 0.7594
No improvement. Patience: 1/15
--- Fold 4, Epoch 23/150 ---


Training: 100%|██████████| 18/18 [00:10<00:00,  1.76it/s]
Validating: 100%|██████████| 5/5 [00:02<00:00,  2.00it/s]


Epoch 23: Train Loss: 0.0533 | Val Loss: 0.0402 | Val R2: 0.6506
No improvement. Patience: 2/15
--- Fold 4, Epoch 24/150 ---


Training: 100%|██████████| 18/18 [00:10<00:00,  1.76it/s]
Validating: 100%|██████████| 5/5 [00:02<00:00,  1.98it/s]


Epoch 24: Train Loss: 0.0526 | Val Loss: 0.0420 | Val R2: 0.6969
No improvement. Patience: 3/15
--- Fold 4, Epoch 25/150 ---


Training: 100%|██████████| 18/18 [00:10<00:00,  1.78it/s]
Validating: 100%|██████████| 5/5 [00:02<00:00,  1.98it/s]


Epoch 25: Train Loss: 0.0482 | Val Loss: 0.0416 | Val R2: 0.6439
No improvement. Patience: 4/15
--- Fold 4, Epoch 26/150 ---


Training: 100%|██████████| 18/18 [00:10<00:00,  1.77it/s]
Validating: 100%|██████████| 5/5 [00:02<00:00,  2.00it/s]


Epoch 26: Train Loss: 0.0514 | Val Loss: 0.1834 | Val R2: -39930.5195
No improvement. Patience: 5/15
--- Fold 4, Epoch 27/150 ---


Training: 100%|██████████| 18/18 [00:10<00:00,  1.75it/s]
Validating: 100%|██████████| 5/5 [00:02<00:00,  2.00it/s]


Epoch 27: Train Loss: 0.0473 | Val Loss: 0.1976 | Val R2: -44898.1250
No improvement. Patience: 6/15
--- Fold 4, Epoch 28/150 ---


Training: 100%|██████████| 18/18 [00:10<00:00,  1.76it/s]
Validating: 100%|██████████| 5/5 [00:02<00:00,  1.97it/s]


Epoch 28: Train Loss: 0.0472 | Val Loss: 0.3868 | Val R2: -662733248.0000
No improvement. Patience: 7/15
--- Fold 4, Epoch 29/150 ---


Training: 100%|██████████| 18/18 [00:10<00:00,  1.71it/s]
Validating: 100%|██████████| 5/5 [00:02<00:00,  1.97it/s]


Epoch 29: Train Loss: 0.0502 | Val Loss: 0.2047 | Val R2: -640692.6250
No improvement. Patience: 8/15
--- Fold 4, Epoch 30/150 ---


Training: 100%|██████████| 18/18 [00:10<00:00,  1.73it/s]
Validating: 100%|██████████| 5/5 [00:02<00:00,  1.96it/s]


Epoch 30: Train Loss: 0.0495 | Val Loss: 0.1482 | Val R2: -3027.3582
No improvement. Patience: 9/15
--- Fold 4, Epoch 31/150 ---


Training: 100%|██████████| 18/18 [00:10<00:00,  1.73it/s]
Validating: 100%|██████████| 5/5 [00:02<00:00,  1.93it/s]


Epoch 31: Train Loss: 0.0450 | Val Loss: 0.1931 | Val R2: -291730.9688
No improvement. Patience: 10/15
--- Fold 4, Epoch 32/150 ---


Training: 100%|██████████| 18/18 [00:10<00:00,  1.73it/s]
Validating: 100%|██████████| 5/5 [00:02<00:00,  1.96it/s]


Epoch 32: Train Loss: 0.0494 | Val Loss: 0.2069 | Val R2: -1153370.6250
No improvement. Patience: 11/15
--- Fold 4, Epoch 33/150 ---


Training: 100%|██████████| 18/18 [00:10<00:00,  1.73it/s]
Validating: 100%|██████████| 5/5 [00:02<00:00,  1.94it/s]


Epoch 33: Train Loss: 0.0470 | Val Loss: 0.7244 | Val R2: -14071196551217152.0000
No improvement. Patience: 12/15
--- Fold 4, Epoch 34/150 ---


Training: 100%|██████████| 18/18 [00:10<00:00,  1.75it/s]
Validating: 100%|██████████| 5/5 [00:02<00:00,  1.96it/s]


Epoch 34: Train Loss: 0.0476 | Val Loss: 0.2896 | Val R2: -366861504.0000
No improvement. Patience: 13/15
--- Fold 4, Epoch 35/150 ---


Training: 100%|██████████| 18/18 [00:10<00:00,  1.76it/s]
Validating: 100%|██████████| 5/5 [00:02<00:00,  1.93it/s]


Epoch 35: Train Loss: 0.0469 | Val Loss: 0.1909 | Val R2: -14128237.0000
No improvement. Patience: 14/15
--- Fold 4, Epoch 36/150 ---


Training: 100%|██████████| 18/18 [00:10<00:00,  1.73it/s]
Validating: 100%|██████████| 5/5 [00:02<00:00,  1.90it/s]


Epoch 36: Train Loss: 0.0461 | Val Loss: 0.1167 | Val R2: -290.4841
No improvement. Patience: 15/15
--- Early stopping triggered at epoch 36 ---
Fold 4 complete. Best Validation R2: 0.7660

--- Fold 5, Epoch 1/150 ---


Training: 100%|██████████| 18/18 [00:10<00:00,  1.74it/s]
Validating: 100%|██████████| 5/5 [00:02<00:00,  1.95it/s]


Epoch 1: Train Loss: 0.8959 | Val Loss: 0.1509 | Val R2: 0.0852
New best model for fold 5 saved with R2: 0.0852
--- Fold 5, Epoch 2/150 ---


Training: 100%|██████████| 18/18 [00:10<00:00,  1.72it/s]
Validating: 100%|██████████| 5/5 [00:02<00:00,  1.93it/s]


Epoch 2: Train Loss: 0.1377 | Val Loss: 0.0984 | Val R2: 0.3855
New best model for fold 5 saved with R2: 0.3855
--- Fold 5, Epoch 3/150 ---


Training: 100%|██████████| 18/18 [00:10<00:00,  1.75it/s]
Validating: 100%|██████████| 5/5 [00:02<00:00,  1.95it/s]


Epoch 3: Train Loss: 0.1222 | Val Loss: 0.1239 | Val R2: 0.1554
No improvement. Patience: 1/15
--- Fold 5, Epoch 4/150 ---


Training: 100%|██████████| 18/18 [00:10<00:00,  1.74it/s]
Validating: 100%|██████████| 5/5 [00:02<00:00,  1.87it/s]


Epoch 4: Train Loss: 0.1068 | Val Loss: 0.0987 | Val R2: 0.1770
No improvement. Patience: 2/15
--- Fold 5, Epoch 5/150 ---


Training: 100%|██████████| 18/18 [00:10<00:00,  1.73it/s]
Validating: 100%|██████████| 5/5 [00:02<00:00,  1.95it/s]


Epoch 5: Train Loss: 0.0946 | Val Loss: 0.0828 | Val R2: 0.2205
No improvement. Patience: 3/15
--- Fold 5, Epoch 6/150 ---


Training: 100%|██████████| 18/18 [00:10<00:00,  1.74it/s]
Validating: 100%|██████████| 5/5 [00:02<00:00,  1.92it/s]


Epoch 6: Train Loss: 0.0789 | Val Loss: 0.0789 | Val R2: 0.5694
New best model for fold 5 saved with R2: 0.5694
--- Fold 5, Epoch 7/150 ---


Training: 100%|██████████| 18/18 [00:10<00:00,  1.73it/s]
Validating: 100%|██████████| 5/5 [00:02<00:00,  1.97it/s]


Epoch 7: Train Loss: 0.0857 | Val Loss: 0.0793 | Val R2: 0.3647
No improvement. Patience: 1/15
--- Fold 5, Epoch 8/150 ---


Training: 100%|██████████| 18/18 [00:10<00:00,  1.75it/s]
Validating: 100%|██████████| 5/5 [00:02<00:00,  1.97it/s]


Epoch 8: Train Loss: 0.0767 | Val Loss: 0.0729 | Val R2: 0.5717
New best model for fold 5 saved with R2: 0.5717
--- Fold 5, Epoch 9/150 ---


Training: 100%|██████████| 18/18 [00:10<00:00,  1.76it/s]
Validating: 100%|██████████| 5/5 [00:02<00:00,  1.99it/s]


Epoch 9: Train Loss: 0.0737 | Val Loss: 0.0726 | Val R2: 0.5926
New best model for fold 5 saved with R2: 0.5926
--- Fold 5, Epoch 10/150 ---


Training: 100%|██████████| 18/18 [00:10<00:00,  1.75it/s]
Validating: 100%|██████████| 5/5 [00:02<00:00,  1.96it/s]


Epoch 10: Train Loss: 0.0621 | Val Loss: 0.0799 | Val R2: -0.7354
No improvement. Patience: 1/15
--- Fold 5, Epoch 11/150 ---


Training: 100%|██████████| 18/18 [00:10<00:00,  1.76it/s]
Validating: 100%|██████████| 5/5 [00:02<00:00,  1.89it/s]


Epoch 11: Train Loss: 0.0851 | Val Loss: 0.0739 | Val R2: 0.6024
New best model for fold 5 saved with R2: 0.6024
--- Fold 5, Epoch 12/150 ---


Training: 100%|██████████| 18/18 [00:10<00:00,  1.75it/s]
Validating: 100%|██████████| 5/5 [00:02<00:00,  1.97it/s]


Epoch 12: Train Loss: 0.0764 | Val Loss: 0.0629 | Val R2: 0.6629
New best model for fold 5 saved with R2: 0.6629
--- Fold 5, Epoch 13/150 ---


Training: 100%|██████████| 18/18 [00:10<00:00,  1.77it/s]
Validating: 100%|██████████| 5/5 [00:02<00:00,  1.94it/s]


Epoch 13: Train Loss: 0.0703 | Val Loss: 0.0878 | Val R2: 0.4334
No improvement. Patience: 1/15
--- Fold 5, Epoch 14/150 ---


Training: 100%|██████████| 18/18 [00:10<00:00,  1.75it/s]
Validating: 100%|██████████| 5/5 [00:02<00:00,  1.97it/s]


Epoch 14: Train Loss: 0.0622 | Val Loss: 0.0701 | Val R2: 0.4271
No improvement. Patience: 2/15
--- Fold 5, Epoch 15/150 ---


Training: 100%|██████████| 18/18 [00:10<00:00,  1.74it/s]
Validating: 100%|██████████| 5/5 [00:02<00:00,  1.97it/s]


Epoch 15: Train Loss: 0.0627 | Val Loss: 0.1099 | Val R2: -0.6326
No improvement. Patience: 3/15
--- Fold 5, Epoch 16/150 ---


Training: 100%|██████████| 18/18 [00:10<00:00,  1.75it/s]
Validating: 100%|██████████| 5/5 [00:02<00:00,  2.00it/s]


Epoch 16: Train Loss: 0.0703 | Val Loss: 0.0794 | Val R2: 0.5673
No improvement. Patience: 4/15
--- Fold 5, Epoch 17/150 ---


Training: 100%|██████████| 18/18 [00:10<00:00,  1.75it/s]
Validating: 100%|██████████| 5/5 [00:02<00:00,  1.96it/s]


Epoch 17: Train Loss: 0.0646 | Val Loss: 0.0669 | Val R2: 0.3528
No improvement. Patience: 5/15
--- Fold 5, Epoch 18/150 ---


Training: 100%|██████████| 18/18 [00:10<00:00,  1.76it/s]
Validating: 100%|██████████| 5/5 [00:02<00:00,  1.97it/s]


Epoch 18: Train Loss: 0.0777 | Val Loss: 0.0675 | Val R2: 0.4082
No improvement. Patience: 6/15
--- Fold 5, Epoch 19/150 ---


Training: 100%|██████████| 18/18 [00:10<00:00,  1.76it/s]
Validating: 100%|██████████| 5/5 [00:02<00:00,  1.94it/s]


Epoch 19: Train Loss: 0.0694 | Val Loss: 0.0929 | Val R2: 0.1398
No improvement. Patience: 7/15
--- Fold 5, Epoch 20/150 ---


Training: 100%|██████████| 18/18 [00:10<00:00,  1.76it/s]
Validating: 100%|██████████| 5/5 [00:02<00:00,  1.94it/s]


Epoch 20: Train Loss: 0.0655 | Val Loss: 0.0714 | Val R2: 0.3513
No improvement. Patience: 8/15
--- Fold 5, Epoch 21/150 ---


Training: 100%|██████████| 18/18 [00:10<00:00,  1.76it/s]
Validating: 100%|██████████| 5/5 [00:02<00:00,  1.97it/s]


Epoch 21: Train Loss: 0.0543 | Val Loss: 0.0610 | Val R2: 0.6759
New best model for fold 5 saved with R2: 0.6759
--- Fold 5, Epoch 22/150 ---


Training: 100%|██████████| 18/18 [00:10<00:00,  1.76it/s]
Validating: 100%|██████████| 5/5 [00:02<00:00,  1.99it/s]


Epoch 22: Train Loss: 0.0475 | Val Loss: 0.0594 | Val R2: 0.6416
No improvement. Patience: 1/15
--- Fold 5, Epoch 23/150 ---


Training: 100%|██████████| 18/18 [00:10<00:00,  1.75it/s]
Validating: 100%|██████████| 5/5 [00:02<00:00,  1.99it/s]


Epoch 23: Train Loss: 0.0508 | Val Loss: 0.0593 | Val R2: 0.6382
No improvement. Patience: 2/15
--- Fold 5, Epoch 24/150 ---


Training: 100%|██████████| 18/18 [00:10<00:00,  1.79it/s]
Validating: 100%|██████████| 5/5 [00:02<00:00,  1.95it/s]


Epoch 24: Train Loss: 0.0497 | Val Loss: 0.0596 | Val R2: 0.6704
No improvement. Patience: 3/15
--- Fold 5, Epoch 25/150 ---


Training: 100%|██████████| 18/18 [00:10<00:00,  1.77it/s]
Validating: 100%|██████████| 5/5 [00:02<00:00,  2.00it/s]


Epoch 25: Train Loss: 0.0484 | Val Loss: 0.0550 | Val R2: 0.7172
New best model for fold 5 saved with R2: 0.7172
--- Fold 5, Epoch 26/150 ---


Training: 100%|██████████| 18/18 [00:10<00:00,  1.75it/s]
Validating: 100%|██████████| 5/5 [00:02<00:00,  1.97it/s]


Epoch 26: Train Loss: 0.0471 | Val Loss: 0.0593 | Val R2: 0.6285
No improvement. Patience: 1/15
--- Fold 5, Epoch 27/150 ---


Training: 100%|██████████| 18/18 [00:10<00:00,  1.78it/s]
Validating: 100%|██████████| 5/5 [00:02<00:00,  1.94it/s]


Epoch 27: Train Loss: 0.0468 | Val Loss: 0.0575 | Val R2: 0.6568
No improvement. Patience: 2/15
--- Fold 5, Epoch 28/150 ---


Training: 100%|██████████| 18/18 [00:10<00:00,  1.74it/s]
Validating: 100%|██████████| 5/5 [00:02<00:00,  2.00it/s]


Epoch 28: Train Loss: 0.0453 | Val Loss: 0.0559 | Val R2: 0.7048
No improvement. Patience: 3/15
--- Fold 5, Epoch 29/150 ---


Training: 100%|██████████| 18/18 [00:10<00:00,  1.77it/s]
Validating: 100%|██████████| 5/5 [00:02<00:00,  1.97it/s]


Epoch 29: Train Loss: 0.0456 | Val Loss: 0.0561 | Val R2: 0.6705
No improvement. Patience: 4/15
--- Fold 5, Epoch 30/150 ---


Training: 100%|██████████| 18/18 [00:10<00:00,  1.75it/s]
Validating: 100%|██████████| 5/5 [00:02<00:00,  1.95it/s]


Epoch 30: Train Loss: 0.0463 | Val Loss: 0.0544 | Val R2: 0.7145
No improvement. Patience: 5/15
--- Fold 5, Epoch 31/150 ---


Training: 100%|██████████| 18/18 [00:10<00:00,  1.73it/s]
Validating: 100%|██████████| 5/5 [00:02<00:00,  1.93it/s]


Epoch 31: Train Loss: 0.0425 | Val Loss: 0.0546 | Val R2: 0.6862
No improvement. Patience: 6/15
--- Fold 5, Epoch 32/150 ---


Training: 100%|██████████| 18/18 [00:10<00:00,  1.74it/s]
Validating: 100%|██████████| 5/5 [00:02<00:00,  1.96it/s]


Epoch 32: Train Loss: 0.0453 | Val Loss: 0.0539 | Val R2: 0.6777
No improvement. Patience: 7/15
--- Fold 5, Epoch 33/150 ---


Training: 100%|██████████| 18/18 [00:10<00:00,  1.74it/s]
Validating: 100%|██████████| 5/5 [00:02<00:00,  1.95it/s]


Epoch 33: Train Loss: 0.0496 | Val Loss: 0.0554 | Val R2: 0.6476
No improvement. Patience: 8/15
--- Fold 5, Epoch 34/150 ---


Training: 100%|██████████| 18/18 [00:10<00:00,  1.74it/s]
Validating: 100%|██████████| 5/5 [00:02<00:00,  1.96it/s]


Epoch 34: Train Loss: 0.0470 | Val Loss: 0.0569 | Val R2: 0.6200
No improvement. Patience: 9/15
--- Fold 5, Epoch 35/150 ---


Training: 100%|██████████| 18/18 [00:10<00:00,  1.77it/s]
Validating: 100%|██████████| 5/5 [00:02<00:00,  1.98it/s]


Epoch 35: Train Loss: 0.0425 | Val Loss: 0.0553 | Val R2: 0.6474
No improvement. Patience: 10/15
--- Fold 5, Epoch 36/150 ---


Training: 100%|██████████| 18/18 [00:10<00:00,  1.73it/s]
Validating: 100%|██████████| 5/5 [00:02<00:00,  1.95it/s]


Epoch 36: Train Loss: 0.0461 | Val Loss: 0.0556 | Val R2: 0.6666
No improvement. Patience: 11/15
--- Fold 5, Epoch 37/150 ---


Training: 100%|██████████| 18/18 [00:10<00:00,  1.76it/s]
Validating: 100%|██████████| 5/5 [00:02<00:00,  1.95it/s]


Epoch 37: Train Loss: 0.0427 | Val Loss: 0.0552 | Val R2: 0.6562
No improvement. Patience: 12/15
--- Fold 5, Epoch 38/150 ---


Training: 100%|██████████| 18/18 [00:10<00:00,  1.74it/s]
Validating: 100%|██████████| 5/5 [00:02<00:00,  1.94it/s]


Epoch 38: Train Loss: 0.0470 | Val Loss: 0.0564 | Val R2: 0.6614
No improvement. Patience: 13/15
--- Fold 5, Epoch 39/150 ---


Training: 100%|██████████| 18/18 [00:10<00:00,  1.73it/s]
Validating: 100%|██████████| 5/5 [00:02<00:00,  1.97it/s]


Epoch 39: Train Loss: 0.0461 | Val Loss: 0.0555 | Val R2: 0.6674
No improvement. Patience: 14/15
--- Fold 5, Epoch 40/150 ---


Training: 100%|██████████| 18/18 [00:10<00:00,  1.76it/s]
Validating: 100%|██████████| 5/5 [00:02<00:00,  1.93it/s]

Epoch 40: Train Loss: 0.0427 | Val Loss: 0.0545 | Val R2: 0.6725
No improvement. Patience: 15/15
--- Early stopping triggered at epoch 40 ---
Fold 5 complete. Best Validation R2: 0.7172


--- K-Fold Cross-Validation Complete ---
R2 scores for each fold: [0.762302815914154, 0.6965463161468506, 0.753661036491394, 0.7660254836082458, 0.7172137498855591]
Average R2: 0.7391
Std Dev R2: 0.0274





In [10]:
# --- [新] 最终训练函数 (无 KFold, 无验证) ---
def main_final_training(args):
    # 1. 设置
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    print(f"Using device: {device}")
    
    # 2. 加载数据
    df = pd.read_csv(args.data_csv, index_col='image_path')

    # 获取类别数量 (用于 Embedding)
    num_states = df['State_encoded'].nunique()
    num_species = df['Species_encoded'].nunique()
    print(f"Found {num_states} states and {num_species} species.")


    # 3. 图像预处理 
    train_transforms = transforms.Compose([
        transforms.Resize((args.img_size, args.img_size)),
        
        # --- 1. 几何变换 (强制模型去“寻找”目标) ---
        transforms.RandomHorizontalFlip(p=0.5),
        transforms.RandomVerticalFlip(p=0.5),
        transforms.RandomRotation(90),
        
        # [新] 仿射变换：平移 和 错切
        transforms.RandomAffine(
            degrees=0,
            translate=(0.15, 0.15),  # 随机平移 15%
            shear=15                 # 随机错切 15 度
        ), 

        # --- 2. 颜色变换 (模拟不同光照/季节) ---
        transforms.ColorJitter(
            brightness=0.3,
            contrast=0.3, 
            saturation=0.3, 
            hue=0.1
        ), 
        
        transforms.ToTensor(),
        transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
    ])

    # 4. 创建完整的数据集 (不再有 train/val 拆分)
    final_train_dataset = PastureDataset(df, args.img_dir, train_transforms, args.img_size)
    
    # [关键] Batch Size 可以适当调大一点，因为 GPU 显存不再需要留给验证集
    # 例如，如果之前是 16，可以试试 24 或 32
    final_train_loader = DataLoader(
        final_train_dataset, 
        batch_size=args.batch_size, 
        shuffle=True, 
        num_workers=args.num_workers
    )

    # 5. 初始化模型
    model = TeacherModel(num_states, num_species).to(device)
    criterion = WeightedMSELoss()

    # 6. 设置差分学习率 (与 CV 时相同)
    head_param_names = [
        'tab_mlp', 'state_embedding', 'species_embedding',
        'img_kv_projector', 'tab_q_projector', 'cross_attn',
        'attn_norm', 'fusion_head'
    ]
    head_params = []
    backbone_params = []

    for name, param in model.named_parameters():
        if not param.requires_grad:
            continue
        is_head = any(name.startswith(head_name) for head_name in head_param_names)
        if is_head:
            head_params.append(param)
        else:
            backbone_params.append(param)
            
    param_groups = [
        {'params': backbone_params, 'lr': args.lr}, # e.g., 1e-4
        {'params': head_params, 'lr': args.lr * 10} # e.g., 1e-3
    ]

    optimizer = optim.AdamW(param_groups, lr=args.lr, weight_decay=1e-3)
    
    # [关键] 我们不再需要 ReduceLROnPlateau，因为没有 val_r2
    # 我们可以使用 OneCycleLR 或 CosineAnnealingLR 来在固定轮数内平滑地调整学习率
    
    # 推荐：余弦退火调度器
    # 它会在 30 轮内将 LR 从最大值（1e-4 / 1e-3）平滑降至 0
    scheduler = optim.lr_scheduler.CosineAnnealingLR(
        optimizer, 
        T_max=args.final_epochs # T_max 设为总轮数
    )
    
    # 7. 最终训练循环
    print(f"Training for a fixed {args.final_epochs} epochs...")
    for epoch in range(args.final_epochs):
        print(f"--- Final Epoch {epoch+1}/{args.final_epochs} ---")
        
        # [注意] 我们只调用 train_one_epoch
        train_loss = train_one_epoch(model, final_train_loader, criterion, optimizer, device)
        
        # [注意] 我们不再调用 validate()
        
        # 更新学习率
        scheduler.step()
        
        current_lr_head = optimizer.param_groups[1]['lr']
        print(f"Epoch {epoch+1}: Train Loss: {train_loss:.4f} | Head LR: {current_lr_head:.6f}")

    # 8. 保存最终模型
    save_path = os.path.join(args.output_dir, "FINAL_teacher_model.pth")
    torch.save(model.state_dict(), save_path)
    print(f"\\n--- Final Training Complete ---")
    print(f"Final TeacherModel saved to: {save_path}")

In [11]:
import sys
import os # 确保导入 os
import argparse # 确保导入 argparse

project_root = 'CSIRO---Image2Biomass-Prediction'
if project_root not in sys.path:
    sys.path.append(project_root)

# 导入您的模块
from KnowledgeDistillation.teacher_model import TeacherModel
from KnowledgeDistillation.loss import WeightedMSELoss

if __name__ == "__main__":
    parser = argparse.ArgumentParser(description="Train Teacher Model")

    # 使用 os.path.join 和您的 project_root 变量来构建绝对路径

    parser.add_argument('--data_csv', type=str,
                        default=os.path.join(project_root, 'csiro-biomass/preprocessing_output/train_processed.csv'),
                        help='Path to the processed training CSV file')

    parser.add_argument('--img_dir', type=str,
                        default=os.path.join(project_root, 'csiro-biomass/train'),
                        help='Path to the directory containing training images')

    # 指定一个明确的输出目录
    output_path = os.path.join(project_root, 'KnowledgeDistillation/teacher_model_output')
    parser.add_argument('--output_dir', type=str,
                        default=output_path,
                        help='Directory to save the best model')

    # --------------------------

    # 训练超参数
    parser.add_argument('--img_size', type=int, default=260, # <-- [修改] 从 240 改为 260
                        help='Image size for the model (B2 uses 260)')
    parser.add_argument('--lr', type=float, default=1e-4,
                        help='Initial learning rate (1e-4 is good for fine-tuning)')
    parser.add_argument('--batch_size', type=int, default=16,
                        help='Batch size (use 8 or 16 for small datasets)')
    parser.add_argument('--final_epochs', type=int, default=30,
                        help='Number of epochs for final training on all data')
    parser.add_argument('--val_split', type=float, default=0.2,
                        help='Validation split fraction')
    parser.add_argument('--num_workers', type=int, default=2,
                        help='Number of workers for DataLoader')

    # --- [新] 早停参数 ---
    parser.add_argument('--early_stopping_patience', type=int, default=15,
                        help='Patience for early stopping (e.g., 15 epochs)')
    # -------------------------

    # 传入一个空列表，告诉 argparse "不要读取 sys.argv"
    args = parser.parse_args(args=[])

    # 确保输出目录存在
    # args.output_dir 现在是绝对路径
    os.makedirs(args.output_dir, exist_ok=True)
    print(f"Model output will be saved to: {args.output_dir}")
    print(f"Reading data from: {args.data_csv}")

    main_final_training(args)

Model output will be saved to: CSIRO---Image2Biomass-Prediction/KnowledgeDistillation/teacher_model_output
Reading data from: CSIRO---Image2Biomass-Prediction/csiro-biomass/preprocessing_output/train_processed.csv
Using device: cuda
Found 4 states and 15 species.
Training for a fixed 30 epochs...
--- Final Epoch 1/30 ---


Training: 100%|██████████| 23/23 [00:12<00:00,  1.81it/s]


Epoch 1: Train Loss: 0.7738 | Head LR: 0.000997
--- Final Epoch 2/30 ---


Training: 100%|██████████| 23/23 [00:12<00:00,  1.78it/s]


Epoch 2: Train Loss: 0.1292 | Head LR: 0.000989
--- Final Epoch 3/30 ---


Training: 100%|██████████| 23/23 [00:12<00:00,  1.80it/s]


Epoch 3: Train Loss: 0.1018 | Head LR: 0.000976
--- Final Epoch 4/30 ---


Training: 100%|██████████| 23/23 [00:12<00:00,  1.78it/s]


Epoch 4: Train Loss: 0.0918 | Head LR: 0.000957
--- Final Epoch 5/30 ---


Training: 100%|██████████| 23/23 [00:12<00:00,  1.81it/s]


Epoch 5: Train Loss: 0.0882 | Head LR: 0.000933
--- Final Epoch 6/30 ---


Training: 100%|██████████| 23/23 [00:12<00:00,  1.82it/s]


Epoch 6: Train Loss: 0.0753 | Head LR: 0.000905
--- Final Epoch 7/30 ---


Training: 100%|██████████| 23/23 [00:12<00:00,  1.83it/s]


Epoch 7: Train Loss: 0.0777 | Head LR: 0.000872
--- Final Epoch 8/30 ---


Training: 100%|██████████| 23/23 [00:13<00:00,  1.76it/s]


Epoch 8: Train Loss: 0.0664 | Head LR: 0.000835
--- Final Epoch 9/30 ---


Training: 100%|██████████| 23/23 [00:12<00:00,  1.81it/s]


Epoch 9: Train Loss: 0.0716 | Head LR: 0.000794
--- Final Epoch 10/30 ---


Training: 100%|██████████| 23/23 [00:12<00:00,  1.79it/s]


Epoch 10: Train Loss: 0.0714 | Head LR: 0.000750
--- Final Epoch 11/30 ---


Training: 100%|██████████| 23/23 [00:12<00:00,  1.80it/s]


Epoch 11: Train Loss: 0.0633 | Head LR: 0.000703
--- Final Epoch 12/30 ---


Training: 100%|██████████| 23/23 [00:12<00:00,  1.81it/s]


Epoch 12: Train Loss: 0.0667 | Head LR: 0.000655
--- Final Epoch 13/30 ---


Training: 100%|██████████| 23/23 [00:12<00:00,  1.81it/s]


Epoch 13: Train Loss: 0.0605 | Head LR: 0.000604
--- Final Epoch 14/30 ---


Training: 100%|██████████| 23/23 [00:12<00:00,  1.82it/s]


Epoch 14: Train Loss: 0.0634 | Head LR: 0.000552
--- Final Epoch 15/30 ---


Training: 100%|██████████| 23/23 [00:12<00:00,  1.80it/s]


Epoch 15: Train Loss: 0.0607 | Head LR: 0.000500
--- Final Epoch 16/30 ---


Training: 100%|██████████| 23/23 [00:12<00:00,  1.81it/s]


Epoch 16: Train Loss: 0.0589 | Head LR: 0.000448
--- Final Epoch 17/30 ---


Training: 100%|██████████| 23/23 [00:12<00:00,  1.82it/s]


Epoch 17: Train Loss: 0.0491 | Head LR: 0.000396
--- Final Epoch 18/30 ---


Training: 100%|██████████| 23/23 [00:12<00:00,  1.81it/s]


Epoch 18: Train Loss: 0.0582 | Head LR: 0.000345
--- Final Epoch 19/30 ---


Training: 100%|██████████| 23/23 [00:12<00:00,  1.82it/s]


Epoch 19: Train Loss: 0.0553 | Head LR: 0.000297
--- Final Epoch 20/30 ---


Training: 100%|██████████| 23/23 [00:12<00:00,  1.80it/s]


Epoch 20: Train Loss: 0.0517 | Head LR: 0.000250
--- Final Epoch 21/30 ---


Training: 100%|██████████| 23/23 [00:12<00:00,  1.83it/s]


Epoch 21: Train Loss: 0.0491 | Head LR: 0.000206
--- Final Epoch 22/30 ---


Training: 100%|██████████| 23/23 [00:12<00:00,  1.79it/s]


Epoch 22: Train Loss: 0.0433 | Head LR: 0.000165
--- Final Epoch 23/30 ---


Training: 100%|██████████| 23/23 [00:12<00:00,  1.78it/s]


Epoch 23: Train Loss: 0.0497 | Head LR: 0.000128
--- Final Epoch 24/30 ---


Training: 100%|██████████| 23/23 [00:12<00:00,  1.81it/s]


Epoch 24: Train Loss: 0.0494 | Head LR: 0.000095
--- Final Epoch 25/30 ---


Training: 100%|██████████| 23/23 [00:12<00:00,  1.80it/s]


Epoch 25: Train Loss: 0.0444 | Head LR: 0.000067
--- Final Epoch 26/30 ---


Training: 100%|██████████| 23/23 [00:12<00:00,  1.77it/s]


Epoch 26: Train Loss: 0.0448 | Head LR: 0.000043
--- Final Epoch 27/30 ---


Training: 100%|██████████| 23/23 [00:12<00:00,  1.83it/s]


Epoch 27: Train Loss: 0.0453 | Head LR: 0.000024
--- Final Epoch 28/30 ---


Training: 100%|██████████| 23/23 [00:12<00:00,  1.83it/s]


Epoch 28: Train Loss: 0.0453 | Head LR: 0.000011
--- Final Epoch 29/30 ---


Training: 100%|██████████| 23/23 [00:12<00:00,  1.83it/s]


Epoch 29: Train Loss: 0.0421 | Head LR: 0.000003
--- Final Epoch 30/30 ---


Training: 100%|██████████| 23/23 [00:12<00:00,  1.80it/s]

Epoch 30: Train Loss: 0.0424 | Head LR: 0.000000
\n--- Final Training Complete ---
Final TeacherModel saved to: CSIRO---Image2Biomass-Prediction/KnowledgeDistillation/teacher_model_output/FINAL_teacher_model.pth



