In [1]:
import os
from kaggle_secrets import UserSecretsClient
user_secrets = UserSecretsClient()
TOKEN = user_secrets.get_secret("GITHUB_TOKEN")
USERNAME = 'ada-yl2425'
REPO_NAME = 'CSIRO---Image2Biomass-Prediction'
!git clone https://{USERNAME}:{TOKEN}@github.com/{USERNAME}/{REPO_NAME}.git
!git pull origin main
!ls

Cloning into 'CSIRO---Image2Biomass-Prediction'...
remote: Enumerating objects: 511, done.[K
remote: Total 511 (delta 0), reused 0 (delta 0), pack-reused 511 (from 2)[K
Receiving objects: 100% (511/511), 1.08 GiB | 19.73 MiB/s, done.
Resolving deltas: 100% (87/87), done.
Updating files: 100% (379/379), done.
fatal: not a git repository (or any parent up to mount point /kaggle)
Stopping at filesystem boundary (GIT_DISCOVERY_ACROSS_FILESYSTEM not set).
CSIRO---Image2Biomass-Prediction  __notebook__.ipynb


In [2]:
!pip install torch torchvision pandas scikit-learn pillow tqdm timm

Collecting nvidia-cuda-nvrtc-cu12==12.4.127 (from torch)
  Downloading nvidia_cuda_nvrtc_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-cuda-runtime-cu12==12.4.127 (from torch)
  Downloading nvidia_cuda_runtime_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-cuda-cupti-cu12==12.4.127 (from torch)
  Downloading nvidia_cuda_cupti_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.6 kB)
Collecting nvidia-cudnn-cu12==9.1.0.70 (from torch)
  Downloading nvidia_cudnn_cu12-9.1.0.70-py3-none-manylinux2014_x86_64.whl.metadata (1.6 kB)
Collecting nvidia-cublas-cu12==12.4.5.8 (from torch)
  Downloading nvidia_cublas_cu12-12.4.5.8-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-cufft-cu12==11.2.1.3 (from torch)
  Downloading nvidia_cufft_cu12-11.2.1.3-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-curand-cu12==10.3.5.147 (from torch)
  Downloading nvidia_curan

In [3]:
import os
import argparse
import pandas as pd
import numpy as np
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import Dataset, DataLoader
from sklearn.model_selection import train_test_split, KFold
from torchvision import transforms
from PIL import Image
from tqdm import tqdm
import warnings
import sys

In [4]:
# 忽略 PIL 的一些警告
warnings.filterwarnings("ignore", "(Possibly corrupt EXIF data|Truncated File Read)")

In [5]:
# --- 1. 评估指标 (Weighted R2) ---
def calculate_weighted_r2(y_true, y_pred, device):
    """
    在原始尺度上计算全局加权 R2
    (此版本已修复，符合官方公式)
    y_true, y_pred: 形状为 [N, 5] 的张量 (在原始尺度上)
    """
    weights = torch.tensor([0.1, 0.1, 0.1, 0.2, 0.5], dtype=torch.float32).to(device) # 形状 [5]

    # --- 1. SS_res (Residual Sum of Squares) ---
    # 按照公式: SS_res = Σ w_j * (y_j - ŷ_j)^2
    # weights * (y_true - y_pred) ** 2 -> 广播 [5] 到 [N, 5]
    # torch.sum(...) -> 聚合 N*5 个元素
    ss_res = torch.sum(weights * (y_true - y_pred) ** 2)

    # --- 2. SS_tot (Total Sum of Squares) ---

    # [修复] 2a. 计算全局加权均值 ȳ_w (y_mean_w)
    # ȳ_w = (Σ w_j * y_j) / (Σ w_j)

    # 分子 (Numerator): Σ w_j * y_j
    # (weights * y_true) -> 广播 [5] 到 [N, 5]
    # torch.sum(...) -> 聚合 N*5 个元素
    sum_weighted_values = torch.sum(weights * y_true)

    # 分母 (Denominator): Σ w_j
    # 1. 将 weights [5] 广播到 [N, 5] (N是批量大小)
    weights_broadcasted = weights.expand_as(y_true)
    # 2. 计算总权重和 (这等于 N * 1.0)
    sum_of_all_weights = torch.sum(weights_broadcasted)

    # 计算 ȳ_w
    y_mean_w = sum_weighted_values / (sum_of_all_weights + 1e-6)

    # [修复] 2b. 计算 SS_tot
    # 按照公式: SS_tot = Σ w_j * (y_j - ȳ_w)^2
    # (y_true - y_mean_w) -> 广播标量 ȳ_w 到 [N, 5]
    # weights * (...) -> 广播 [5] 到 [N, 5]
    ss_tot = torch.sum(weights * (y_true - y_mean_w) ** 2)

    # --- 3. R2 ---
    r2 = 1.0 - (ss_res / (ss_tot + 1e-6)) # +1e-6 防止除以零
    return r2.item()

In [6]:
# --- 2. 自定义数据集 ---
# (与 teacher_train 相同, Student 训练循环需要所有数据)
class PastureDataset(Dataset):
    def __init__(self, df, img_dir, transforms, img_size):
        self.df = df
        self.img_dir = img_dir
        self.transforms = transforms
        self.img_size = img_size

        self.numeric_cols = ['Pre_GSHH_NDVI', 'Height_Ave_cm', 'month_sin', 'month_cos']
        self.categorical_cols = ['State_encoded', 'Species_encoded']
        self.log_target_cols = ['log_Dry_Green_g', 'log_Dry_Dead_g',
                                'log_Dry_Clover_g', 'log_GDM_g', 'log_Dry_Total_g']
        self.orig_target_cols = ['Dry_Green_g', 'Dry_Dead_g', 'Dry_Clover_g',
                                 'GDM_g', 'Dry_Total_g']

    def __len__(self):
        return len(self.df)

    def __getitem__(self, idx):
        row = self.df.iloc[idx]
        filename = row.name.split('/')[-1]
        img_path = os.path.join(self.img_dir, filename)

        try:
            image = Image.open(img_path).convert('RGB')
            image = self.transforms(image)
        except Exception as e:
            print(f"Warning: Error loading image {img_path}. Using a dummy image. Error: {e}")
            image = torch.zeros((3, self.img_size, self.img_size))

        numeric = torch.tensor(
            row[self.numeric_cols].values.astype(np.float32),
            dtype=torch.float32
        )
        categorical = torch.tensor(
            row[self.categorical_cols].values.astype(np.int64),
            dtype=torch.long
        )
        log_target = torch.tensor(
            row[self.log_target_cols].values.astype(np.float32),
            dtype=torch.float32
        )
        orig_target = torch.tensor(
            row[self.orig_target_cols].values.astype(np.float32),
            dtype=torch.float32
        )

        return {
            'image': image,
            'numeric': numeric,
            'categorical': categorical,
            'log_target': log_target,
            'orig_target': orig_target
        }

In [7]:
# --- 3. 训练和验证循环 ---

def train_one_epoch_student(student_model, teacher_model, loader, criterion, optimizer, device):
    """
    [新] 蒸馏训练循环
    """
    student_model.train()
    teacher_model.eval() # 教师始终处于评估模式
    total_loss = 0.0

    for batch in tqdm(loader, desc="Distilling"):
        # 移动所有数据到设备
        image = batch['image'].to(device)
        numeric = batch['numeric'].to(device)
        categorical = batch['categorical'].to(device)
        log_target = batch['log_target'].to(device) # 真实标签 (y_true)

        # 梯度清零 (只为 Student)
        optimizer.zero_grad()

        # 1. 获取教师预测 (Soft Labels)
        with torch.no_grad():
            teacher_pred = teacher_model(image, numeric, categorical)

        # 2. 获取学生预测
        student_pred = student_model(image) # Student 只需要图像

        # 3. 计算蒸馏损失 (StudentLoss)
        # criterion(student_output, teacher_output, y_true)
        loss = criterion(student_pred, teacher_pred, log_target)

        # 4. 反向传播 (只更新 Student 的权重)
        loss.backward()
        optimizer.step()

        total_loss += loss.item()

    return total_loss / len(loader)

def validate_student(model, loader, criterion, device):
    """
    [新] 验证 Student
    (与 teacher_train 的 'validate' 几乎相同, 
     但只使用 image, 且 criterion 必须是 WeightedMSELoss)
    """
    model.eval() # Student 进入评估模式
    total_val_loss = 0.0
    all_preds_orig = []
    all_targets_orig = []

    with torch.no_grad():
        for batch in tqdm(loader, desc="Validating Student"):
            image = batch['image'].to(device)
            log_target = batch['log_target'].to(device)
            orig_target = batch['orig_target'].to(device)

            # [修改] Student 只需要图像
            pred_log = model(image)

            # [关键] 此处的 criterion 是 WeightedMSELoss (Hard Loss)
            # 这样 Val Loss 才具有可比性
            loss = criterion(pred_log, log_target)
            total_val_loss += loss.item()

            # 转换回原始尺度
            pred_orig = torch.expm1(pred_log)

            all_preds_orig.append(pred_orig)
            all_targets_orig.append(orig_target)

    # 拼接所有批次的结果
    all_preds_orig = torch.cat(all_preds_orig, dim=0)
    all_targets_orig = torch.cat(all_targets_orig, dim=0)

    # 计算 R2 (原始尺度)
    val_r2 = calculate_weighted_r2(all_targets_orig, all_preds_orig, device)
    avg_val_loss = total_val_loss / len(loader)

    return avg_val_loss, val_r2

In [8]:
# --- 4. [新] 主函数 (Student K-Fold CV) ---
def main(args):
    # 设置设备
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    print(f"Using device: {device}")

    # 加载数据
    df = pd.read_csv(args.data_csv, index_col='image_path')

    # 获取类别数量 (用于初始化 Teacher)
    num_states = df['State_encoded'].nunique()
    num_species = df['Species_encoded'].nunique()
    print(f"Found {num_states} states and {num_species} species.")

    # --- [关键] 加载训练好的 Teacher Model (加载一次) ---
    print(f"Loading trained Teacher model from: {args.teacher_model_path}")
    teacher_model = TeacherModel(num_states, num_species).to(device)
    teacher_model.load_state_dict(torch.load(args.teacher_model_path))
    teacher_model.eval() # 永久设置为评估模式
    print("Teacher model loaded successfully.")
    # ----------------------------------------------------

    # 图像预处理 (与 teacher_train 相同)
    train_transforms = transforms.Compose([
        transforms.Resize((args.img_size, args.img_size)),
        transforms.RandomHorizontalFlip(p=0.5),
        transforms.RandomVerticalFlip(p=0.5),
        transforms.RandomRotation(90),
        transforms.RandomAffine(degrees=0, translate=(0.15, 0.15), shear=15),
        transforms.ColorJitter(brightness=0.3, contrast=0.3, saturation=0.3, hue=0.1),
        transforms.ToTensor(),
        transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
    ])
    val_transforms = transforms.Compose([
        transforms.Resize((args.img_size, args.img_size)),
        transforms.ToTensor(),
        transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
    ])

    # K-Fold CV 设置
    N_SPLITS = 5
    kf = KFold(n_splits=N_SPLITS, shuffle=True, random_state=42)
    all_fold_best_r2 = []
    
    # [新] 导入 LR 调度器
    from torch.optim.lr_scheduler import CosineAnnealingLR, LinearLR, SequentialLR

    # K-Fold 训练循环
    for fold, (train_indices, val_indices) in enumerate(kf.split(df)):
        print(f"========== FOLD {fold + 1}/{N_SPLITS} ==========\n")

        # 1. 创建数据
        train_df = df.iloc[train_indices]
        val_df = df.iloc[val_indices]
        train_dataset = PastureDataset(train_df, args.img_dir, train_transforms, args.img_size)
        val_dataset = PastureDataset(val_df, args.img_dir, val_transforms, args.img_size)
        train_loader = DataLoader(train_dataset, batch_size=args.batch_size, shuffle=True, num_workers=args.num_workers)
        val_loader = DataLoader(val_dataset, batch_size=args.batch_size, shuffle=False, num_workers=args.num_workers)

        # 2. !! 重新初始化 Student Model 和优化器 !!
        student_model = StudentModel().to(device) # 使用您更新后的 StudentModel

        # 3. [新] 初始化两个损失函数
        criterion_train = StudentLoss(alpha=args.alpha) # 用于训练 (Soft+Hard)
        criterion_val = WeightedMSELoss()              # 用于验证 (Hard only)

        # 4. [新] 为 Student 设置差分学习率
        head_param_names = [
            'patch_projector',
            'query_tokens',
            'transformer_decoder',
            'prediction_head'
        ]
        head_params = []
        backbone_params = []

        for name, param in student_model.named_parameters():
            if not param.requires_grad:
                continue
            is_head = any(name.startswith(head_name) for head_name in head_param_names)
            if is_head:
                head_params.append(param)
            else:
                backbone_params.append(param)
        
        param_groups = [
            {'params': backbone_params, 'lr': args.lr},      
            {'params': head_params, 'lr': args.lr * 10} # [改回 10] (您之前日志中是 5，但 10 通常更好)
        ]
        
        optimizer = optim.AdamW(param_groups, lr=args.lr, weight_decay=1e-3)

        # [新] LR 调度器设置
        TOTAL_EPOCHS = args.epochs 
        WARMUP_EPOCHS = 5 # 前 5 轮用于预热
        
        scheduler_warmup = LinearLR(optimizer, start_factor=0.1, total_iters=WARMUP_EPOCHS)
        scheduler_cosine = CosineAnnealingLR(optimizer, T_max=(TOTAL_EPOCHS - WARMUP_EPOCHS), eta_min=1e-7)
        scheduler = SequentialLR(optimizer, schedulers=[scheduler_warmup, scheduler_cosine], milestones=[WARMUP_EPOCHS])
        
        # [删除] 删除了在这里的 scheduler.step()
        
        # 5. 训练循环
        best_val_r2 = -float('inf')
        patience_counter = 0

        for epoch in range(args.epochs):
            print(f"--- Fold {fold+1}, Epoch {epoch+1}/{args.epochs} ---")

            train_loss = train_one_epoch_student(
                student_model, teacher_model, train_loader, criterion_train, optimizer, device
            )
            
            val_loss, val_r2 = validate_student(
                student_model, val_loader, criterion_val, device
            )

            print(f"Epoch {epoch+1}: Train Loss: {train_loss:.4f} | Val Loss: {val_loss:.4f} | Val R2: {val_r2:.4f}")

            # [修正] 调度器在 optimizer.step() 之后调用，并且不带参数
            scheduler.step()

            if val_r2 > best_val_r2:
                best_val_r2 = val_r2
                patience_counter = 0
                save_path = os.path.join(args.output_dir, f"best_student_model_fold_{fold+1}.pth")
                torch.save(student_model.state_dict(), save_path)
                print(f"New best model for fold {fold+1} saved with R2: {best_val_r2:.4f}")
            else:
                patience_counter += 1
                print(f"No improvement. Patience: {patience_counter}/{args.early_stopping_patience}")

            if patience_counter >= args.early_stopping_patience:
                print(f"--- Early stopping triggered at epoch {epoch+1} ---")
                break

        print(f"Fold {fold+1} complete. Best Validation R2: {best_val_r2:.4f}")
        all_fold_best_r2.append(best_val_r2)
        print("=============================\n")

    # K-Fold 结束后
    print("\n--- Student K-Fold Cross-Validation Complete ---")
    print(f"R2 scores for each fold: {all_fold_best_r2}")
    print(f"Average R2: {np.mean(all_fold_best_r2):.4f}")
    print(f"Std Dev R2: {np.std(all_fold_best_r2):.4f}")

In [9]:
project_root = 'CSIRO---Image2Biomass-Prediction'
if project_root not in sys.path:
    sys.path.append(project_root)


from KnowledgeDistillation.teacher_model import TeacherModel
from KnowledgeDistillation.student_model import StudentModel
from KnowledgeDistillation.loss import WeightedMSELoss, StudentLoss
from torch.optim.lr_scheduler import CosineAnnealingLR, LinearLR, SequentialLR

if __name__ == "__main__":
    parser = argparse.ArgumentParser(description="Train Student Model via Distillation") # [修改]

    # --- 路径 ---
    parser.add_argument('--data_csv', type=str,
                        default=os.path.join(project_root, 'csiro-biomass/preprocessing_output/train_processed.csv'))
    parser.add_argument('--img_dir', type=str,
                        default=os.path.join(project_root, 'csiro-biomass/train'))
    
    # [新] Student 输出目录
    output_path = os.path.join(project_root, 'KnowledgeDistillation/student_model_output')
    parser.add_argument('--output_dir', type=str,
                        default=output_path,
                        help='Directory to save the best student model')

    # [新] 指向已训练好的 Teacher 模型
    parser.add_argument('--teacher_model_path', type=str,
                        default=os.path.join(
                            project_root, # <-- 'CSIRO---Image2Biomass-Prediction'
                            'KnowledgeDistillation/teacher_model_output/FINAL_teacher_model.pth' # <-- 您提供的路径
                        ),
                        help='Path to the FINAL trained teacher model')

    # --- 训练超参数 ---
    parser.add_argument('--img_size', type=int, default=260)
    
    # [保持] 学习率与 Teacher fine-tuning 时相同
    parser.add_argument('--lr', type=float, default=1e-4, 
                        help='Base learning rate (Backbone)')
    
    parser.add_argument('--batch_size', type=int, default=16)
    parser.add_argument('--epochs', type=int, default=150) # (早停会处理)
    parser.add_argument('--num_workers', type=int, default=2)
    parser.add_argument('--early_stopping_patience', type=int, default=15)

    # --- [新] 蒸馏超参数 ---
    parser.add_argument('--alpha', type=float, default=0.6,
                        help='Weight for Hard Loss (Student vs Labels). Soft Loss = (1-alpha)')
    
    # ------------------------
    args = parser.parse_args(args=[])
    os.makedirs(args.output_dir, exist_ok=True)
    print(f"Student models will be saved to: {args.output_dir}")
    print(f"Reading data from: {args.data_csv}")

    main(args)



Student models will be saved to: CSIRO---Image2Biomass-Prediction/KnowledgeDistillation/student_model_output
Reading data from: CSIRO---Image2Biomass-Prediction/csiro-biomass/preprocessing_output/train_processed.csv
Using device: cuda
Found 4 states and 15 species.
Loading trained Teacher model from: CSIRO---Image2Biomass-Prediction/KnowledgeDistillation/teacher_model_output/FINAL_teacher_model.pth


model.safetensors:   0%|          | 0.00/36.8M [00:00<?, ?B/s]

Teacher model loaded successfully.

--- Fold 1, Epoch 1/150 ---


Distilling: 100%|██████████| 18/18 [00:09<00:00,  1.85it/s]
Validating Student: 100%|██████████| 5/5 [00:02<00:00,  2.02it/s]


Epoch 1: Train Loss: 0.5716 | Val Loss: 0.1667 | Val R2: 0.2283
New best model for fold 1 saved with R2: 0.2283
--- Fold 1, Epoch 2/150 ---


Distilling: 100%|██████████| 18/18 [00:09<00:00,  1.81it/s]
Validating Student: 100%|██████████| 5/5 [00:02<00:00,  2.02it/s]


Epoch 2: Train Loss: 0.1682 | Val Loss: 0.1500 | Val R2: -0.0335
No improvement. Patience: 1/15
--- Fold 1, Epoch 3/150 ---


Distilling: 100%|██████████| 18/18 [00:09<00:00,  1.87it/s]
Validating Student: 100%|██████████| 5/5 [00:02<00:00,  2.07it/s]


Epoch 3: Train Loss: 0.1623 | Val Loss: 0.1173 | Val R2: 0.3302
New best model for fold 1 saved with R2: 0.3302
--- Fold 1, Epoch 4/150 ---


Distilling: 100%|██████████| 18/18 [00:09<00:00,  1.86it/s]
Validating Student: 100%|██████████| 5/5 [00:02<00:00,  2.09it/s]


Epoch 4: Train Loss: 0.1394 | Val Loss: 0.1161 | Val R2: 0.4405
New best model for fold 1 saved with R2: 0.4405
--- Fold 1, Epoch 5/150 ---


Distilling: 100%|██████████| 18/18 [00:09<00:00,  1.88it/s]
Validating Student: 100%|██████████| 5/5 [00:02<00:00,  2.02it/s]


Epoch 5: Train Loss: 0.1327 | Val Loss: 0.1036 | Val R2: 0.3681
No improvement. Patience: 1/15
--- Fold 1, Epoch 6/150 ---


Distilling: 100%|██████████| 18/18 [00:09<00:00,  1.88it/s]
Validating Student: 100%|██████████| 5/5 [00:02<00:00,  1.99it/s]


Epoch 6: Train Loss: 0.1231 | Val Loss: 0.1694 | Val R2: 0.1877
No improvement. Patience: 2/15
--- Fold 1, Epoch 7/150 ---


Distilling: 100%|██████████| 18/18 [00:09<00:00,  1.86it/s]
Validating Student: 100%|██████████| 5/5 [00:02<00:00,  2.01it/s]


Epoch 7: Train Loss: 0.0990 | Val Loss: 0.1219 | Val R2: 0.4060
No improvement. Patience: 3/15
--- Fold 1, Epoch 8/150 ---


Distilling: 100%|██████████| 18/18 [00:10<00:00,  1.78it/s]
Validating Student: 100%|██████████| 5/5 [00:02<00:00,  2.03it/s]


Epoch 8: Train Loss: 0.0941 | Val Loss: 0.1109 | Val R2: 0.3144
No improvement. Patience: 4/15
--- Fold 1, Epoch 9/150 ---


Distilling: 100%|██████████| 18/18 [00:09<00:00,  1.82it/s]
Validating Student: 100%|██████████| 5/5 [00:02<00:00,  2.06it/s]


Epoch 9: Train Loss: 0.0901 | Val Loss: 0.1354 | Val R2: 0.4040
No improvement. Patience: 5/15
--- Fold 1, Epoch 10/150 ---


Distilling: 100%|██████████| 18/18 [00:09<00:00,  1.81it/s]
Validating Student: 100%|██████████| 5/5 [00:02<00:00,  1.98it/s]


Epoch 10: Train Loss: 0.0898 | Val Loss: 0.1322 | Val R2: -0.0323
No improvement. Patience: 6/15
--- Fold 1, Epoch 11/150 ---


Distilling: 100%|██████████| 18/18 [00:09<00:00,  1.86it/s]
Validating Student: 100%|██████████| 5/5 [00:02<00:00,  2.03it/s]


Epoch 11: Train Loss: 0.0924 | Val Loss: 0.1083 | Val R2: 0.4534
New best model for fold 1 saved with R2: 0.4534
--- Fold 1, Epoch 12/150 ---


Distilling: 100%|██████████| 18/18 [00:09<00:00,  1.85it/s]
Validating Student: 100%|██████████| 5/5 [00:02<00:00,  2.01it/s]


Epoch 12: Train Loss: 0.1054 | Val Loss: 0.0907 | Val R2: 0.5278
New best model for fold 1 saved with R2: 0.5278
--- Fold 1, Epoch 13/150 ---


Distilling: 100%|██████████| 18/18 [00:09<00:00,  1.88it/s]
Validating Student: 100%|██████████| 5/5 [00:02<00:00,  2.02it/s]


Epoch 13: Train Loss: 0.0881 | Val Loss: 0.0816 | Val R2: 0.5444
New best model for fold 1 saved with R2: 0.5444
--- Fold 1, Epoch 14/150 ---


Distilling: 100%|██████████| 18/18 [00:09<00:00,  1.84it/s]
Validating Student: 100%|██████████| 5/5 [00:02<00:00,  2.05it/s]


Epoch 14: Train Loss: 0.0793 | Val Loss: 0.1040 | Val R2: 0.2637
No improvement. Patience: 1/15
--- Fold 1, Epoch 15/150 ---


Distilling: 100%|██████████| 18/18 [00:09<00:00,  1.89it/s]
Validating Student: 100%|██████████| 5/5 [00:02<00:00,  2.00it/s]


Epoch 15: Train Loss: 0.0972 | Val Loss: 0.0947 | Val R2: 0.3721
No improvement. Patience: 2/15
--- Fold 1, Epoch 16/150 ---


Distilling: 100%|██████████| 18/18 [00:09<00:00,  1.83it/s]
Validating Student: 100%|██████████| 5/5 [00:02<00:00,  2.02it/s]


Epoch 16: Train Loss: 0.0825 | Val Loss: 0.0823 | Val R2: 0.4459
No improvement. Patience: 3/15
--- Fold 1, Epoch 17/150 ---


Distilling: 100%|██████████| 18/18 [00:10<00:00,  1.79it/s]
Validating Student: 100%|██████████| 5/5 [00:02<00:00,  2.04it/s]


Epoch 17: Train Loss: 0.0887 | Val Loss: 0.0812 | Val R2: 0.4253
No improvement. Patience: 4/15
--- Fold 1, Epoch 18/150 ---


Distilling: 100%|██████████| 18/18 [00:09<00:00,  1.84it/s]
Validating Student: 100%|██████████| 5/5 [00:02<00:00,  1.99it/s]


Epoch 18: Train Loss: 0.0746 | Val Loss: 0.0774 | Val R2: 0.4785
No improvement. Patience: 5/15
--- Fold 1, Epoch 19/150 ---


Distilling: 100%|██████████| 18/18 [00:09<00:00,  1.82it/s]
Validating Student: 100%|██████████| 5/5 [00:02<00:00,  1.99it/s]


Epoch 19: Train Loss: 0.0598 | Val Loss: 0.0738 | Val R2: 0.5810
New best model for fold 1 saved with R2: 0.5810
--- Fold 1, Epoch 20/150 ---


Distilling: 100%|██████████| 18/18 [00:09<00:00,  1.89it/s]
Validating Student: 100%|██████████| 5/5 [00:02<00:00,  2.01it/s]


Epoch 20: Train Loss: 0.0732 | Val Loss: 0.0833 | Val R2: 0.1798
No improvement. Patience: 1/15
--- Fold 1, Epoch 21/150 ---


Distilling: 100%|██████████| 18/18 [00:09<00:00,  1.86it/s]
Validating Student: 100%|██████████| 5/5 [00:02<00:00,  2.02it/s]


Epoch 21: Train Loss: 0.0596 | Val Loss: 0.0746 | Val R2: 0.4732
No improvement. Patience: 2/15
--- Fold 1, Epoch 22/150 ---


Distilling: 100%|██████████| 18/18 [00:09<00:00,  1.85it/s]
Validating Student: 100%|██████████| 5/5 [00:02<00:00,  2.03it/s]


Epoch 22: Train Loss: 0.0581 | Val Loss: 0.0776 | Val R2: 0.5624
No improvement. Patience: 3/15
--- Fold 1, Epoch 23/150 ---


Distilling: 100%|██████████| 18/18 [00:09<00:00,  1.81it/s]
Validating Student: 100%|██████████| 5/5 [00:02<00:00,  1.94it/s]


Epoch 23: Train Loss: 0.0675 | Val Loss: 0.0771 | Val R2: 0.5859
New best model for fold 1 saved with R2: 0.5859
--- Fold 1, Epoch 24/150 ---


Distilling: 100%|██████████| 18/18 [00:09<00:00,  1.84it/s]
Validating Student: 100%|██████████| 5/5 [00:02<00:00,  2.03it/s]


Epoch 24: Train Loss: 0.0652 | Val Loss: 0.0608 | Val R2: 0.6040
New best model for fold 1 saved with R2: 0.6040
--- Fold 1, Epoch 25/150 ---


Distilling: 100%|██████████| 18/18 [00:09<00:00,  1.85it/s]
Validating Student: 100%|██████████| 5/5 [00:02<00:00,  2.08it/s]


Epoch 25: Train Loss: 0.0712 | Val Loss: 0.0793 | Val R2: 0.5901
No improvement. Patience: 1/15
--- Fold 1, Epoch 26/150 ---


Distilling: 100%|██████████| 18/18 [00:09<00:00,  1.85it/s]
Validating Student: 100%|██████████| 5/5 [00:02<00:00,  2.06it/s]


Epoch 26: Train Loss: 0.0593 | Val Loss: 0.0922 | Val R2: 0.5457
No improvement. Patience: 2/15
--- Fold 1, Epoch 27/150 ---


Distilling: 100%|██████████| 18/18 [00:09<00:00,  1.83it/s]
Validating Student: 100%|██████████| 5/5 [00:02<00:00,  2.01it/s]


Epoch 27: Train Loss: 0.0615 | Val Loss: 0.0781 | Val R2: 0.5817
No improvement. Patience: 3/15
--- Fold 1, Epoch 28/150 ---


Distilling: 100%|██████████| 18/18 [00:09<00:00,  1.86it/s]
Validating Student: 100%|██████████| 5/5 [00:02<00:00,  2.00it/s]


Epoch 28: Train Loss: 0.0580 | Val Loss: 0.0917 | Val R2: 0.2388
No improvement. Patience: 4/15
--- Fold 1, Epoch 29/150 ---


Distilling: 100%|██████████| 18/18 [00:09<00:00,  1.85it/s]
Validating Student: 100%|██████████| 5/5 [00:02<00:00,  2.02it/s]


Epoch 29: Train Loss: 0.0524 | Val Loss: 0.0841 | Val R2: 0.2561
No improvement. Patience: 5/15
--- Fold 1, Epoch 30/150 ---


Distilling: 100%|██████████| 18/18 [00:09<00:00,  1.84it/s]
Validating Student: 100%|██████████| 5/5 [00:02<00:00,  2.01it/s]


Epoch 30: Train Loss: 0.0529 | Val Loss: 0.0703 | Val R2: 0.4299
No improvement. Patience: 6/15
--- Fold 1, Epoch 31/150 ---


Distilling: 100%|██████████| 18/18 [00:09<00:00,  1.85it/s]
Validating Student: 100%|██████████| 5/5 [00:02<00:00,  2.09it/s]


Epoch 31: Train Loss: 0.0447 | Val Loss: 0.0746 | Val R2: 0.5096
No improvement. Patience: 7/15
--- Fold 1, Epoch 32/150 ---


Distilling: 100%|██████████| 18/18 [00:09<00:00,  1.88it/s]
Validating Student: 100%|██████████| 5/5 [00:02<00:00,  2.04it/s]


Epoch 32: Train Loss: 0.0796 | Val Loss: 0.0632 | Val R2: 0.5880
No improvement. Patience: 8/15
--- Fold 1, Epoch 33/150 ---


Distilling: 100%|██████████| 18/18 [00:09<00:00,  1.83it/s]
Validating Student: 100%|██████████| 5/5 [00:02<00:00,  2.03it/s]


Epoch 33: Train Loss: 0.0655 | Val Loss: 0.0823 | Val R2: 0.3831
No improvement. Patience: 9/15
--- Fold 1, Epoch 34/150 ---


Distilling: 100%|██████████| 18/18 [00:09<00:00,  1.88it/s]
Validating Student: 100%|██████████| 5/5 [00:02<00:00,  2.05it/s]


Epoch 34: Train Loss: 0.0611 | Val Loss: 0.1000 | Val R2: 0.0672
No improvement. Patience: 10/15
--- Fold 1, Epoch 35/150 ---


Distilling: 100%|██████████| 18/18 [00:09<00:00,  1.82it/s]
Validating Student: 100%|██████████| 5/5 [00:02<00:00,  1.98it/s]


Epoch 35: Train Loss: 0.0582 | Val Loss: 0.0706 | Val R2: 0.5334
No improvement. Patience: 11/15
--- Fold 1, Epoch 36/150 ---


Distilling: 100%|██████████| 18/18 [00:09<00:00,  1.83it/s]
Validating Student: 100%|██████████| 5/5 [00:02<00:00,  1.97it/s]


Epoch 36: Train Loss: 0.0531 | Val Loss: 0.0675 | Val R2: 0.6266
New best model for fold 1 saved with R2: 0.6266
--- Fold 1, Epoch 37/150 ---


Distilling: 100%|██████████| 18/18 [00:09<00:00,  1.84it/s]
Validating Student: 100%|██████████| 5/5 [00:02<00:00,  2.09it/s]


Epoch 37: Train Loss: 0.0662 | Val Loss: 0.0653 | Val R2: 0.6281
New best model for fold 1 saved with R2: 0.6281
--- Fold 1, Epoch 38/150 ---


Distilling: 100%|██████████| 18/18 [00:09<00:00,  1.84it/s]
Validating Student: 100%|██████████| 5/5 [00:02<00:00,  2.03it/s]


Epoch 38: Train Loss: 0.0431 | Val Loss: 0.0697 | Val R2: 0.5711
No improvement. Patience: 1/15
--- Fold 1, Epoch 39/150 ---


Distilling: 100%|██████████| 18/18 [00:09<00:00,  1.87it/s]
Validating Student: 100%|██████████| 5/5 [00:02<00:00,  2.09it/s]


Epoch 39: Train Loss: 0.0630 | Val Loss: 0.0689 | Val R2: 0.6304
New best model for fold 1 saved with R2: 0.6304
--- Fold 1, Epoch 40/150 ---


Distilling: 100%|██████████| 18/18 [00:09<00:00,  1.85it/s]
Validating Student: 100%|██████████| 5/5 [00:02<00:00,  2.12it/s]


Epoch 40: Train Loss: 0.0566 | Val Loss: 0.0676 | Val R2: 0.6450
New best model for fold 1 saved with R2: 0.6450
--- Fold 1, Epoch 41/150 ---


Distilling: 100%|██████████| 18/18 [00:09<00:00,  1.90it/s]
Validating Student: 100%|██████████| 5/5 [00:02<00:00,  1.97it/s]


Epoch 41: Train Loss: 0.0519 | Val Loss: 0.0587 | Val R2: 0.6673
New best model for fold 1 saved with R2: 0.6673
--- Fold 1, Epoch 42/150 ---


Distilling: 100%|██████████| 18/18 [00:09<00:00,  1.86it/s]
Validating Student: 100%|██████████| 5/5 [00:02<00:00,  2.13it/s]


Epoch 42: Train Loss: 0.0544 | Val Loss: 0.0737 | Val R2: 0.5359
No improvement. Patience: 1/15
--- Fold 1, Epoch 43/150 ---


Distilling: 100%|██████████| 18/18 [00:09<00:00,  1.84it/s]
Validating Student: 100%|██████████| 5/5 [00:02<00:00,  2.06it/s]


Epoch 43: Train Loss: 0.0644 | Val Loss: 0.0675 | Val R2: 0.5529
No improvement. Patience: 2/15
--- Fold 1, Epoch 44/150 ---


Distilling: 100%|██████████| 18/18 [00:10<00:00,  1.77it/s]
Validating Student: 100%|██████████| 5/5 [00:02<00:00,  2.07it/s]


Epoch 44: Train Loss: 0.0505 | Val Loss: 0.0639 | Val R2: 0.6609
No improvement. Patience: 3/15
--- Fold 1, Epoch 45/150 ---


Distilling: 100%|██████████| 18/18 [00:09<00:00,  1.86it/s]
Validating Student: 100%|██████████| 5/5 [00:02<00:00,  2.08it/s]


Epoch 45: Train Loss: 0.0505 | Val Loss: 0.0614 | Val R2: 0.6209
No improvement. Patience: 4/15
--- Fold 1, Epoch 46/150 ---


Distilling: 100%|██████████| 18/18 [00:09<00:00,  1.85it/s]
Validating Student: 100%|██████████| 5/5 [00:02<00:00,  2.03it/s]


Epoch 46: Train Loss: 0.0437 | Val Loss: 0.0667 | Val R2: 0.4704
No improvement. Patience: 5/15
--- Fold 1, Epoch 47/150 ---


Distilling: 100%|██████████| 18/18 [00:10<00:00,  1.80it/s]
Validating Student: 100%|██████████| 5/5 [00:02<00:00,  2.08it/s]


Epoch 47: Train Loss: 0.0507 | Val Loss: 0.0635 | Val R2: 0.6431
No improvement. Patience: 6/15
--- Fold 1, Epoch 48/150 ---


Distilling: 100%|██████████| 18/18 [00:09<00:00,  1.84it/s]
Validating Student: 100%|██████████| 5/5 [00:02<00:00,  1.99it/s]


Epoch 48: Train Loss: 0.0461 | Val Loss: 0.0698 | Val R2: 0.6087
No improvement. Patience: 7/15
--- Fold 1, Epoch 49/150 ---


Distilling: 100%|██████████| 18/18 [00:09<00:00,  1.83it/s]
Validating Student: 100%|██████████| 5/5 [00:02<00:00,  2.04it/s]


Epoch 49: Train Loss: 0.0593 | Val Loss: 0.0589 | Val R2: 0.6473
No improvement. Patience: 8/15
--- Fold 1, Epoch 50/150 ---


Distilling: 100%|██████████| 18/18 [00:09<00:00,  1.90it/s]
Validating Student: 100%|██████████| 5/5 [00:02<00:00,  2.02it/s]


Epoch 50: Train Loss: 0.0584 | Val Loss: 0.0563 | Val R2: 0.6995
New best model for fold 1 saved with R2: 0.6995
--- Fold 1, Epoch 51/150 ---


Distilling: 100%|██████████| 18/18 [00:10<00:00,  1.79it/s]
Validating Student: 100%|██████████| 5/5 [00:02<00:00,  2.09it/s]


Epoch 51: Train Loss: 0.0417 | Val Loss: 0.0683 | Val R2: 0.5413
No improvement. Patience: 1/15
--- Fold 1, Epoch 52/150 ---


Distilling: 100%|██████████| 18/18 [00:09<00:00,  1.83it/s]
Validating Student: 100%|██████████| 5/5 [00:02<00:00,  2.09it/s]


Epoch 52: Train Loss: 0.0429 | Val Loss: 0.0557 | Val R2: 0.6100
No improvement. Patience: 2/15
--- Fold 1, Epoch 53/150 ---


Distilling: 100%|██████████| 18/18 [00:09<00:00,  1.86it/s]
Validating Student: 100%|██████████| 5/5 [00:02<00:00,  2.10it/s]


Epoch 53: Train Loss: 0.0469 | Val Loss: 0.0581 | Val R2: 0.6853
No improvement. Patience: 3/15
--- Fold 1, Epoch 54/150 ---


Distilling: 100%|██████████| 18/18 [00:09<00:00,  1.82it/s]
Validating Student: 100%|██████████| 5/5 [00:02<00:00,  2.07it/s]


Epoch 54: Train Loss: 0.0407 | Val Loss: 0.0743 | Val R2: 0.4954
No improvement. Patience: 4/15
--- Fold 1, Epoch 55/150 ---


Distilling: 100%|██████████| 18/18 [00:09<00:00,  1.86it/s]
Validating Student: 100%|██████████| 5/5 [00:02<00:00,  2.12it/s]


Epoch 55: Train Loss: 0.0482 | Val Loss: 0.0619 | Val R2: 0.5579
No improvement. Patience: 5/15
--- Fold 1, Epoch 56/150 ---


Distilling: 100%|██████████| 18/18 [00:09<00:00,  1.83it/s]
Validating Student: 100%|██████████| 5/5 [00:02<00:00,  2.00it/s]


Epoch 56: Train Loss: 0.0439 | Val Loss: 0.0588 | Val R2: 0.6088
No improvement. Patience: 6/15
--- Fold 1, Epoch 57/150 ---


Distilling: 100%|██████████| 18/18 [00:10<00:00,  1.80it/s]
Validating Student: 100%|██████████| 5/5 [00:02<00:00,  2.09it/s]


Epoch 57: Train Loss: 0.0411 | Val Loss: 0.0623 | Val R2: 0.5752
No improvement. Patience: 7/15
--- Fold 1, Epoch 58/150 ---


Distilling: 100%|██████████| 18/18 [00:09<00:00,  1.83it/s]
Validating Student: 100%|██████████| 5/5 [00:02<00:00,  2.13it/s]


Epoch 58: Train Loss: 0.0395 | Val Loss: 0.0632 | Val R2: 0.6061
No improvement. Patience: 8/15
--- Fold 1, Epoch 59/150 ---


Distilling: 100%|██████████| 18/18 [00:09<00:00,  1.86it/s]
Validating Student: 100%|██████████| 5/5 [00:02<00:00,  2.07it/s]


Epoch 59: Train Loss: 0.0486 | Val Loss: 0.0638 | Val R2: 0.6601
No improvement. Patience: 9/15
--- Fold 1, Epoch 60/150 ---


Distilling: 100%|██████████| 18/18 [00:09<00:00,  1.85it/s]
Validating Student: 100%|██████████| 5/5 [00:02<00:00,  2.06it/s]


Epoch 60: Train Loss: 0.0564 | Val Loss: 0.0762 | Val R2: 0.5180
No improvement. Patience: 10/15
--- Fold 1, Epoch 61/150 ---


Distilling: 100%|██████████| 18/18 [00:09<00:00,  1.84it/s]
Validating Student: 100%|██████████| 5/5 [00:02<00:00,  2.09it/s]


Epoch 61: Train Loss: 0.0499 | Val Loss: 0.0617 | Val R2: 0.5467
No improvement. Patience: 11/15
--- Fold 1, Epoch 62/150 ---


Distilling: 100%|██████████| 18/18 [00:09<00:00,  1.83it/s]
Validating Student: 100%|██████████| 5/5 [00:02<00:00,  2.10it/s]


Epoch 62: Train Loss: 0.0407 | Val Loss: 0.0695 | Val R2: 0.3762
No improvement. Patience: 12/15
--- Fold 1, Epoch 63/150 ---


Distilling: 100%|██████████| 18/18 [00:09<00:00,  1.85it/s]
Validating Student: 100%|██████████| 5/5 [00:02<00:00,  2.06it/s]


Epoch 63: Train Loss: 0.0500 | Val Loss: 0.0728 | Val R2: 0.5956
No improvement. Patience: 13/15
--- Fold 1, Epoch 64/150 ---


Distilling: 100%|██████████| 18/18 [00:09<00:00,  1.81it/s]
Validating Student: 100%|██████████| 5/5 [00:02<00:00,  2.12it/s]


Epoch 64: Train Loss: 0.0376 | Val Loss: 0.0561 | Val R2: 0.6937
No improvement. Patience: 14/15
--- Fold 1, Epoch 65/150 ---


Distilling: 100%|██████████| 18/18 [00:09<00:00,  1.85it/s]
Validating Student: 100%|██████████| 5/5 [00:02<00:00,  2.06it/s]


Epoch 65: Train Loss: 0.0364 | Val Loss: 0.0517 | Val R2: 0.6882
No improvement. Patience: 15/15
--- Early stopping triggered at epoch 65 ---
Fold 1 complete. Best Validation R2: 0.6995


--- Fold 2, Epoch 1/150 ---


Distilling: 100%|██████████| 18/18 [00:09<00:00,  1.90it/s]
Validating Student: 100%|██████████| 5/5 [00:02<00:00,  2.11it/s]


Epoch 1: Train Loss: 0.5255 | Val Loss: 0.1645 | Val R2: 0.1032
New best model for fold 2 saved with R2: 0.1032
--- Fold 2, Epoch 2/150 ---


Distilling: 100%|██████████| 18/18 [00:09<00:00,  1.88it/s]
Validating Student: 100%|██████████| 5/5 [00:02<00:00,  2.08it/s]


Epoch 2: Train Loss: 0.1626 | Val Loss: 0.1533 | Val R2: 0.1554
New best model for fold 2 saved with R2: 0.1554
--- Fold 2, Epoch 3/150 ---


Distilling: 100%|██████████| 18/18 [00:09<00:00,  1.92it/s]
Validating Student: 100%|██████████| 5/5 [00:02<00:00,  2.07it/s]


Epoch 3: Train Loss: 0.1530 | Val Loss: 0.1478 | Val R2: 0.0760
No improvement. Patience: 1/15
--- Fold 2, Epoch 4/150 ---


Distilling: 100%|██████████| 18/18 [00:09<00:00,  1.91it/s]
Validating Student: 100%|██████████| 5/5 [00:02<00:00,  2.13it/s]


Epoch 4: Train Loss: 0.1297 | Val Loss: 0.1286 | Val R2: 0.1972
New best model for fold 2 saved with R2: 0.1972
--- Fold 2, Epoch 5/150 ---


Distilling: 100%|██████████| 18/18 [00:09<00:00,  1.86it/s]
Validating Student: 100%|██████████| 5/5 [00:02<00:00,  2.14it/s]


Epoch 5: Train Loss: 0.1201 | Val Loss: 0.1170 | Val R2: 0.3125
New best model for fold 2 saved with R2: 0.3125
--- Fold 2, Epoch 6/150 ---


Distilling: 100%|██████████| 18/18 [00:09<00:00,  1.93it/s]
Validating Student: 100%|██████████| 5/5 [00:02<00:00,  2.13it/s]


Epoch 6: Train Loss: 0.1643 | Val Loss: 0.1218 | Val R2: 0.4321
New best model for fold 2 saved with R2: 0.4321
--- Fold 2, Epoch 7/150 ---


Distilling: 100%|██████████| 18/18 [00:09<00:00,  1.90it/s]
Validating Student: 100%|██████████| 5/5 [00:02<00:00,  2.08it/s]


Epoch 7: Train Loss: 0.1038 | Val Loss: 0.1049 | Val R2: 0.4104
No improvement. Patience: 1/15
--- Fold 2, Epoch 8/150 ---


Distilling: 100%|██████████| 18/18 [00:09<00:00,  1.93it/s]
Validating Student: 100%|██████████| 5/5 [00:02<00:00,  2.12it/s]


Epoch 8: Train Loss: 0.1018 | Val Loss: 0.1062 | Val R2: 0.3387
No improvement. Patience: 2/15
--- Fold 2, Epoch 9/150 ---


Distilling: 100%|██████████| 18/18 [00:09<00:00,  1.90it/s]
Validating Student: 100%|██████████| 5/5 [00:02<00:00,  2.14it/s]


Epoch 9: Train Loss: 0.1329 | Val Loss: 0.1335 | Val R2: 0.2086
No improvement. Patience: 3/15
--- Fold 2, Epoch 10/150 ---


Distilling: 100%|██████████| 18/18 [00:09<00:00,  1.82it/s]
Validating Student: 100%|██████████| 5/5 [00:02<00:00,  2.15it/s]


Epoch 10: Train Loss: 0.0895 | Val Loss: 0.1071 | Val R2: 0.2551
No improvement. Patience: 4/15
--- Fold 2, Epoch 11/150 ---


Distilling: 100%|██████████| 18/18 [00:09<00:00,  1.90it/s]
Validating Student: 100%|██████████| 5/5 [00:02<00:00,  2.15it/s]


Epoch 11: Train Loss: 0.1257 | Val Loss: 0.0838 | Val R2: 0.5638
New best model for fold 2 saved with R2: 0.5638
--- Fold 2, Epoch 12/150 ---


Distilling: 100%|██████████| 18/18 [00:09<00:00,  1.95it/s]
Validating Student: 100%|██████████| 5/5 [00:02<00:00,  2.08it/s]


Epoch 12: Train Loss: 0.0881 | Val Loss: 0.1034 | Val R2: 0.5119
No improvement. Patience: 1/15
--- Fold 2, Epoch 13/150 ---


Distilling: 100%|██████████| 18/18 [00:09<00:00,  1.90it/s]
Validating Student: 100%|██████████| 5/5 [00:02<00:00,  2.06it/s]


Epoch 13: Train Loss: 0.0801 | Val Loss: 0.1269 | Val R2: 0.3420
No improvement. Patience: 2/15
--- Fold 2, Epoch 14/150 ---


Distilling: 100%|██████████| 18/18 [00:09<00:00,  1.92it/s]
Validating Student: 100%|██████████| 5/5 [00:02<00:00,  2.12it/s]


Epoch 14: Train Loss: 0.0793 | Val Loss: 0.0717 | Val R2: 0.5720
New best model for fold 2 saved with R2: 0.5720
--- Fold 2, Epoch 15/150 ---


Distilling: 100%|██████████| 18/18 [00:09<00:00,  1.87it/s]
Validating Student: 100%|██████████| 5/5 [00:02<00:00,  2.14it/s]


Epoch 15: Train Loss: 0.0816 | Val Loss: 0.0671 | Val R2: 0.6602
New best model for fold 2 saved with R2: 0.6602
--- Fold 2, Epoch 16/150 ---


Distilling: 100%|██████████| 18/18 [00:09<00:00,  1.83it/s]
Validating Student: 100%|██████████| 5/5 [00:02<00:00,  2.11it/s]


Epoch 16: Train Loss: 0.0713 | Val Loss: 0.0741 | Val R2: 0.4067
No improvement. Patience: 1/15
--- Fold 2, Epoch 17/150 ---


Distilling: 100%|██████████| 18/18 [00:09<00:00,  1.92it/s]
Validating Student: 100%|██████████| 5/5 [00:02<00:00,  1.99it/s]


Epoch 17: Train Loss: 0.0664 | Val Loss: 0.0600 | Val R2: 0.6729
New best model for fold 2 saved with R2: 0.6729
--- Fold 2, Epoch 18/150 ---


Distilling: 100%|██████████| 18/18 [00:09<00:00,  1.87it/s]
Validating Student: 100%|██████████| 5/5 [00:02<00:00,  2.08it/s]


Epoch 18: Train Loss: 0.0717 | Val Loss: 0.0718 | Val R2: 0.4366
No improvement. Patience: 1/15
--- Fold 2, Epoch 19/150 ---


Distilling: 100%|██████████| 18/18 [00:09<00:00,  1.93it/s]
Validating Student: 100%|██████████| 5/5 [00:02<00:00,  2.08it/s]


Epoch 19: Train Loss: 0.0924 | Val Loss: 0.0735 | Val R2: 0.4509
No improvement. Patience: 2/15
--- Fold 2, Epoch 20/150 ---


Distilling: 100%|██████████| 18/18 [00:09<00:00,  1.89it/s]
Validating Student: 100%|██████████| 5/5 [00:02<00:00,  2.10it/s]


Epoch 20: Train Loss: 0.0733 | Val Loss: 0.0585 | Val R2: 0.6514
No improvement. Patience: 3/15
--- Fold 2, Epoch 21/150 ---


Distilling: 100%|██████████| 18/18 [00:09<00:00,  1.93it/s]
Validating Student: 100%|██████████| 5/5 [00:02<00:00,  2.13it/s]


Epoch 21: Train Loss: 0.0641 | Val Loss: 0.0523 | Val R2: 0.6707
No improvement. Patience: 4/15
--- Fold 2, Epoch 22/150 ---


Distilling: 100%|██████████| 18/18 [00:10<00:00,  1.75it/s]
Validating Student: 100%|██████████| 5/5 [00:02<00:00,  2.04it/s]


Epoch 22: Train Loss: 0.0807 | Val Loss: 0.0658 | Val R2: 0.6621
No improvement. Patience: 5/15
--- Fold 2, Epoch 23/150 ---


Distilling: 100%|██████████| 18/18 [00:09<00:00,  1.93it/s]
Validating Student: 100%|██████████| 5/5 [00:02<00:00,  2.07it/s]


Epoch 23: Train Loss: 0.0709 | Val Loss: 0.0550 | Val R2: 0.6908
New best model for fold 2 saved with R2: 0.6908
--- Fold 2, Epoch 24/150 ---


Distilling: 100%|██████████| 18/18 [00:10<00:00,  1.78it/s]
Validating Student: 100%|██████████| 5/5 [00:02<00:00,  2.08it/s]


Epoch 24: Train Loss: 0.0678 | Val Loss: 0.0508 | Val R2: 0.7169
New best model for fold 2 saved with R2: 0.7169
--- Fold 2, Epoch 25/150 ---


Distilling: 100%|██████████| 18/18 [00:09<00:00,  1.83it/s]
Validating Student: 100%|██████████| 5/5 [00:02<00:00,  2.13it/s]


Epoch 25: Train Loss: 0.0714 | Val Loss: 0.0645 | Val R2: 0.6488
No improvement. Patience: 1/15
--- Fold 2, Epoch 26/150 ---


Distilling: 100%|██████████| 18/18 [00:09<00:00,  1.90it/s]
Validating Student: 100%|██████████| 5/5 [00:02<00:00,  1.99it/s]


Epoch 26: Train Loss: 0.0604 | Val Loss: 0.0702 | Val R2: 0.6767
No improvement. Patience: 2/15
--- Fold 2, Epoch 27/150 ---


Distilling: 100%|██████████| 18/18 [00:09<00:00,  1.92it/s]
Validating Student: 100%|██████████| 5/5 [00:02<00:00,  2.12it/s]


Epoch 27: Train Loss: 0.0603 | Val Loss: 0.0644 | Val R2: 0.6671
No improvement. Patience: 3/15
--- Fold 2, Epoch 28/150 ---


Distilling: 100%|██████████| 18/18 [00:09<00:00,  1.91it/s]
Validating Student: 100%|██████████| 5/5 [00:02<00:00,  2.14it/s]


Epoch 28: Train Loss: 0.0792 | Val Loss: 0.0587 | Val R2: 0.6446
No improvement. Patience: 4/15
--- Fold 2, Epoch 29/150 ---


Distilling: 100%|██████████| 18/18 [00:09<00:00,  1.86it/s]
Validating Student: 100%|██████████| 5/5 [00:02<00:00,  2.13it/s]


Epoch 29: Train Loss: 0.0577 | Val Loss: 0.0677 | Val R2: 0.5648
No improvement. Patience: 5/15
--- Fold 2, Epoch 30/150 ---


Distilling: 100%|██████████| 18/18 [00:10<00:00,  1.79it/s]
Validating Student: 100%|██████████| 5/5 [00:02<00:00,  2.10it/s]


Epoch 30: Train Loss: 0.0610 | Val Loss: 0.0498 | Val R2: 0.6579
No improvement. Patience: 6/15
--- Fold 2, Epoch 31/150 ---


Distilling: 100%|██████████| 18/18 [00:09<00:00,  1.93it/s]
Validating Student: 100%|██████████| 5/5 [00:02<00:00,  2.15it/s]


Epoch 31: Train Loss: 0.0568 | Val Loss: 0.0501 | Val R2: 0.6657
No improvement. Patience: 7/15
--- Fold 2, Epoch 32/150 ---


Distilling: 100%|██████████| 18/18 [00:09<00:00,  1.93it/s]
Validating Student: 100%|██████████| 5/5 [00:02<00:00,  2.12it/s]


Epoch 32: Train Loss: 0.0521 | Val Loss: 0.0478 | Val R2: 0.6946
No improvement. Patience: 8/15
--- Fold 2, Epoch 33/150 ---


Distilling: 100%|██████████| 18/18 [00:09<00:00,  1.90it/s]
Validating Student: 100%|██████████| 5/5 [00:02<00:00,  2.10it/s]


Epoch 33: Train Loss: 0.0503 | Val Loss: 0.0586 | Val R2: 0.6799
No improvement. Patience: 9/15
--- Fold 2, Epoch 34/150 ---


Distilling: 100%|██████████| 18/18 [00:09<00:00,  1.93it/s]
Validating Student: 100%|██████████| 5/5 [00:02<00:00,  2.15it/s]


Epoch 34: Train Loss: 0.0491 | Val Loss: 0.0427 | Val R2: 0.7437
New best model for fold 2 saved with R2: 0.7437
--- Fold 2, Epoch 35/150 ---


Distilling: 100%|██████████| 18/18 [00:09<00:00,  1.91it/s]
Validating Student: 100%|██████████| 5/5 [00:02<00:00,  2.14it/s]


Epoch 35: Train Loss: 0.0551 | Val Loss: 0.0544 | Val R2: 0.6987
No improvement. Patience: 1/15
--- Fold 2, Epoch 36/150 ---


Distilling: 100%|██████████| 18/18 [00:09<00:00,  1.92it/s]
Validating Student: 100%|██████████| 5/5 [00:02<00:00,  2.12it/s]


Epoch 36: Train Loss: 0.0514 | Val Loss: 0.0537 | Val R2: 0.6804
No improvement. Patience: 2/15
--- Fold 2, Epoch 37/150 ---


Distilling: 100%|██████████| 18/18 [00:09<00:00,  1.92it/s]
Validating Student: 100%|██████████| 5/5 [00:02<00:00,  2.15it/s]


Epoch 37: Train Loss: 0.0586 | Val Loss: 0.0585 | Val R2: 0.6703
No improvement. Patience: 3/15
--- Fold 2, Epoch 38/150 ---


Distilling: 100%|██████████| 18/18 [00:09<00:00,  1.82it/s]
Validating Student: 100%|██████████| 5/5 [00:02<00:00,  2.07it/s]


Epoch 38: Train Loss: 0.0760 | Val Loss: 0.0563 | Val R2: 0.6205
No improvement. Patience: 4/15
--- Fold 2, Epoch 39/150 ---


Distilling: 100%|██████████| 18/18 [00:09<00:00,  1.94it/s]
Validating Student: 100%|██████████| 5/5 [00:02<00:00,  2.09it/s]


Epoch 39: Train Loss: 0.0525 | Val Loss: 0.0579 | Val R2: 0.5916
No improvement. Patience: 5/15
--- Fold 2, Epoch 40/150 ---


Distilling: 100%|██████████| 18/18 [00:09<00:00,  1.90it/s]
Validating Student: 100%|██████████| 5/5 [00:02<00:00,  2.12it/s]


Epoch 40: Train Loss: 0.0603 | Val Loss: 0.0473 | Val R2: 0.6456
No improvement. Patience: 6/15
--- Fold 2, Epoch 41/150 ---


Distilling: 100%|██████████| 18/18 [00:09<00:00,  1.91it/s]
Validating Student: 100%|██████████| 5/5 [00:02<00:00,  2.13it/s]


Epoch 41: Train Loss: 0.0641 | Val Loss: 0.0469 | Val R2: 0.7501
New best model for fold 2 saved with R2: 0.7501
--- Fold 2, Epoch 42/150 ---


Distilling: 100%|██████████| 18/18 [00:09<00:00,  1.86it/s]
Validating Student: 100%|██████████| 5/5 [00:02<00:00,  2.13it/s]


Epoch 42: Train Loss: 0.0471 | Val Loss: 0.0441 | Val R2: 0.7032
No improvement. Patience: 1/15
--- Fold 2, Epoch 43/150 ---


Distilling: 100%|██████████| 18/18 [00:09<00:00,  1.90it/s]
Validating Student: 100%|██████████| 5/5 [00:02<00:00,  2.12it/s]


Epoch 43: Train Loss: 0.0516 | Val Loss: 0.0507 | Val R2: 0.7534
New best model for fold 2 saved with R2: 0.7534
--- Fold 2, Epoch 44/150 ---


Distilling: 100%|██████████| 18/18 [00:09<00:00,  1.92it/s]
Validating Student: 100%|██████████| 5/5 [00:02<00:00,  2.03it/s]


Epoch 44: Train Loss: 0.0415 | Val Loss: 0.0400 | Val R2: 0.7767
New best model for fold 2 saved with R2: 0.7767
--- Fold 2, Epoch 45/150 ---


Distilling: 100%|██████████| 18/18 [00:09<00:00,  1.94it/s]
Validating Student: 100%|██████████| 5/5 [00:02<00:00,  2.08it/s]


Epoch 45: Train Loss: 0.0609 | Val Loss: 0.0487 | Val R2: 0.6707
No improvement. Patience: 1/15
--- Fold 2, Epoch 46/150 ---


Distilling: 100%|██████████| 18/18 [00:09<00:00,  1.89it/s]
Validating Student: 100%|██████████| 5/5 [00:02<00:00,  2.13it/s]


Epoch 46: Train Loss: 0.0428 | Val Loss: 0.0407 | Val R2: 0.7697
No improvement. Patience: 2/15
--- Fold 2, Epoch 47/150 ---


Distilling: 100%|██████████| 18/18 [00:09<00:00,  1.92it/s]
Validating Student: 100%|██████████| 5/5 [00:02<00:00,  2.12it/s]


Epoch 47: Train Loss: 0.0483 | Val Loss: 0.0531 | Val R2: 0.6637
No improvement. Patience: 3/15
--- Fold 2, Epoch 48/150 ---


Distilling: 100%|██████████| 18/18 [00:09<00:00,  1.92it/s]
Validating Student: 100%|██████████| 5/5 [00:02<00:00,  2.11it/s]


Epoch 48: Train Loss: 0.0462 | Val Loss: 0.0408 | Val R2: 0.7792
New best model for fold 2 saved with R2: 0.7792
--- Fold 2, Epoch 49/150 ---


Distilling: 100%|██████████| 18/18 [00:09<00:00,  1.91it/s]
Validating Student: 100%|██████████| 5/5 [00:02<00:00,  2.11it/s]


Epoch 49: Train Loss: 0.0445 | Val Loss: 0.0523 | Val R2: 0.6796
No improvement. Patience: 1/15
--- Fold 2, Epoch 50/150 ---


Distilling: 100%|██████████| 18/18 [00:09<00:00,  1.94it/s]
Validating Student: 100%|██████████| 5/5 [00:02<00:00,  2.15it/s]


Epoch 50: Train Loss: 0.0489 | Val Loss: 0.0445 | Val R2: 0.6886
No improvement. Patience: 2/15
--- Fold 2, Epoch 51/150 ---


Distilling: 100%|██████████| 18/18 [00:09<00:00,  1.90it/s]
Validating Student: 100%|██████████| 5/5 [00:02<00:00,  2.11it/s]


Epoch 51: Train Loss: 0.0435 | Val Loss: 0.0421 | Val R2: 0.7902
New best model for fold 2 saved with R2: 0.7902
--- Fold 2, Epoch 52/150 ---


Distilling: 100%|██████████| 18/18 [00:09<00:00,  1.87it/s]
Validating Student: 100%|██████████| 5/5 [00:02<00:00,  2.15it/s]


Epoch 52: Train Loss: 0.0692 | Val Loss: 0.0549 | Val R2: 0.6762
No improvement. Patience: 1/15
--- Fold 2, Epoch 53/150 ---


Distilling: 100%|██████████| 18/18 [00:09<00:00,  1.95it/s]
Validating Student: 100%|██████████| 5/5 [00:02<00:00,  2.04it/s]


Epoch 53: Train Loss: 0.0554 | Val Loss: 0.0464 | Val R2: 0.6731
No improvement. Patience: 2/15
--- Fold 2, Epoch 54/150 ---


Distilling: 100%|██████████| 18/18 [00:09<00:00,  1.81it/s]
Validating Student: 100%|██████████| 5/5 [00:02<00:00,  2.12it/s]


Epoch 54: Train Loss: 0.0435 | Val Loss: 0.0391 | Val R2: 0.7663
No improvement. Patience: 3/15
--- Fold 2, Epoch 55/150 ---


Distilling: 100%|██████████| 18/18 [00:09<00:00,  1.87it/s]
Validating Student: 100%|██████████| 5/5 [00:02<00:00,  2.15it/s]


Epoch 55: Train Loss: 0.0495 | Val Loss: 0.0456 | Val R2: 0.7649
No improvement. Patience: 4/15
--- Fold 2, Epoch 56/150 ---


Distilling: 100%|██████████| 18/18 [00:09<00:00,  1.90it/s]
Validating Student: 100%|██████████| 5/5 [00:02<00:00,  2.14it/s]


Epoch 56: Train Loss: 0.0403 | Val Loss: 0.0443 | Val R2: 0.7396
No improvement. Patience: 5/15
--- Fold 2, Epoch 57/150 ---


Distilling: 100%|██████████| 18/18 [00:09<00:00,  1.93it/s]
Validating Student: 100%|██████████| 5/5 [00:02<00:00,  2.12it/s]


Epoch 57: Train Loss: 0.0508 | Val Loss: 0.0450 | Val R2: 0.7553
No improvement. Patience: 6/15
--- Fold 2, Epoch 58/150 ---


Distilling: 100%|██████████| 18/18 [00:09<00:00,  1.85it/s]
Validating Student: 100%|██████████| 5/5 [00:02<00:00,  2.16it/s]


Epoch 58: Train Loss: 0.0460 | Val Loss: 0.0487 | Val R2: 0.7416
No improvement. Patience: 7/15
--- Fold 2, Epoch 59/150 ---


Distilling: 100%|██████████| 18/18 [00:09<00:00,  1.93it/s]
Validating Student: 100%|██████████| 5/5 [00:02<00:00,  2.13it/s]


Epoch 59: Train Loss: 0.0438 | Val Loss: 0.0489 | Val R2: 0.7307
No improvement. Patience: 8/15
--- Fold 2, Epoch 60/150 ---


Distilling: 100%|██████████| 18/18 [00:09<00:00,  1.94it/s]
Validating Student: 100%|██████████| 5/5 [00:02<00:00,  1.96it/s]


Epoch 60: Train Loss: 0.0407 | Val Loss: 0.0462 | Val R2: 0.7458
No improvement. Patience: 9/15
--- Fold 2, Epoch 61/150 ---


Distilling: 100%|██████████| 18/18 [00:09<00:00,  1.90it/s]
Validating Student: 100%|██████████| 5/5 [00:02<00:00,  2.13it/s]


Epoch 61: Train Loss: 0.0401 | Val Loss: 0.0460 | Val R2: 0.7197
No improvement. Patience: 10/15
--- Fold 2, Epoch 62/150 ---


Distilling: 100%|██████████| 18/18 [00:09<00:00,  1.92it/s]
Validating Student: 100%|██████████| 5/5 [00:02<00:00,  2.12it/s]


Epoch 62: Train Loss: 0.0539 | Val Loss: 0.0441 | Val R2: 0.7512
No improvement. Patience: 11/15
--- Fold 2, Epoch 63/150 ---


Distilling: 100%|██████████| 18/18 [00:09<00:00,  1.93it/s]
Validating Student: 100%|██████████| 5/5 [00:02<00:00,  2.13it/s]


Epoch 63: Train Loss: 0.0597 | Val Loss: 0.0516 | Val R2: 0.6378
No improvement. Patience: 12/15
--- Fold 2, Epoch 64/150 ---


Distilling: 100%|██████████| 18/18 [00:09<00:00,  1.93it/s]
Validating Student: 100%|██████████| 5/5 [00:02<00:00,  2.15it/s]


Epoch 64: Train Loss: 0.0348 | Val Loss: 0.0439 | Val R2: 0.7085
No improvement. Patience: 13/15
--- Fold 2, Epoch 65/150 ---


Distilling: 100%|██████████| 18/18 [00:09<00:00,  1.90it/s]
Validating Student: 100%|██████████| 5/5 [00:02<00:00,  2.11it/s]


Epoch 65: Train Loss: 0.0436 | Val Loss: 0.0417 | Val R2: 0.7312
No improvement. Patience: 14/15
--- Fold 2, Epoch 66/150 ---


Distilling: 100%|██████████| 18/18 [00:09<00:00,  1.89it/s]
Validating Student: 100%|██████████| 5/5 [00:02<00:00,  2.08it/s]


Epoch 66: Train Loss: 0.0462 | Val Loss: 0.0480 | Val R2: 0.6677
No improvement. Patience: 15/15
--- Early stopping triggered at epoch 66 ---
Fold 2 complete. Best Validation R2: 0.7902


--- Fold 3, Epoch 1/150 ---


Distilling: 100%|██████████| 18/18 [00:10<00:00,  1.74it/s]
Validating Student: 100%|██████████| 5/5 [00:02<00:00,  1.93it/s]


Epoch 1: Train Loss: 0.5833 | Val Loss: 0.2004 | Val R2: 0.1608
New best model for fold 3 saved with R2: 0.1608
--- Fold 3, Epoch 2/150 ---


Distilling: 100%|██████████| 18/18 [00:10<00:00,  1.76it/s]
Validating Student: 100%|██████████| 5/5 [00:02<00:00,  1.95it/s]


Epoch 2: Train Loss: 0.1642 | Val Loss: 0.1637 | Val R2: 0.0677
No improvement. Patience: 1/15
--- Fold 3, Epoch 3/150 ---


Distilling: 100%|██████████| 18/18 [00:10<00:00,  1.72it/s]
Validating Student: 100%|██████████| 5/5 [00:02<00:00,  1.94it/s]


Epoch 3: Train Loss: 0.1494 | Val Loss: 0.1482 | Val R2: 0.3514
New best model for fold 3 saved with R2: 0.3514
--- Fold 3, Epoch 4/150 ---


Distilling: 100%|██████████| 18/18 [00:10<00:00,  1.74it/s]
Validating Student: 100%|██████████| 5/5 [00:02<00:00,  2.00it/s]


Epoch 4: Train Loss: 0.1256 | Val Loss: 0.1172 | Val R2: 0.3936
New best model for fold 3 saved with R2: 0.3936
--- Fold 3, Epoch 5/150 ---


Distilling: 100%|██████████| 18/18 [00:10<00:00,  1.72it/s]
Validating Student: 100%|██████████| 5/5 [00:02<00:00,  1.94it/s]


Epoch 5: Train Loss: 0.1151 | Val Loss: 0.1087 | Val R2: 0.4796
New best model for fold 3 saved with R2: 0.4796
--- Fold 3, Epoch 6/150 ---


Distilling: 100%|██████████| 18/18 [00:10<00:00,  1.71it/s]
Validating Student: 100%|██████████| 5/5 [00:02<00:00,  1.95it/s]


Epoch 6: Train Loss: 0.1086 | Val Loss: 0.1238 | Val R2: 0.1865
No improvement. Patience: 1/15
--- Fold 3, Epoch 7/150 ---


Distilling: 100%|██████████| 18/18 [00:10<00:00,  1.72it/s]
Validating Student: 100%|██████████| 5/5 [00:02<00:00,  1.97it/s]


Epoch 7: Train Loss: 0.1175 | Val Loss: 0.0984 | Val R2: 0.5392
New best model for fold 3 saved with R2: 0.5392
--- Fold 3, Epoch 8/150 ---


Distilling: 100%|██████████| 18/18 [00:10<00:00,  1.72it/s]
Validating Student: 100%|██████████| 5/5 [00:02<00:00,  2.00it/s]


Epoch 8: Train Loss: 0.1079 | Val Loss: 0.1139 | Val R2: 0.4173
No improvement. Patience: 1/15
--- Fold 3, Epoch 9/150 ---


Distilling: 100%|██████████| 18/18 [00:10<00:00,  1.71it/s]
Validating Student: 100%|██████████| 5/5 [00:02<00:00,  1.92it/s]


Epoch 9: Train Loss: 0.0947 | Val Loss: 0.1024 | Val R2: 0.6116
New best model for fold 3 saved with R2: 0.6116
--- Fold 3, Epoch 10/150 ---


Distilling: 100%|██████████| 18/18 [00:10<00:00,  1.71it/s]
Validating Student: 100%|██████████| 5/5 [00:02<00:00,  1.97it/s]


Epoch 10: Train Loss: 0.1158 | Val Loss: 0.0928 | Val R2: 0.6557
New best model for fold 3 saved with R2: 0.6557
--- Fold 3, Epoch 11/150 ---


Distilling: 100%|██████████| 18/18 [00:10<00:00,  1.73it/s]
Validating Student: 100%|██████████| 5/5 [00:02<00:00,  1.97it/s]


Epoch 11: Train Loss: 0.0913 | Val Loss: 0.1020 | Val R2: 0.5828
No improvement. Patience: 1/15
--- Fold 3, Epoch 12/150 ---


Distilling: 100%|██████████| 18/18 [00:10<00:00,  1.74it/s]
Validating Student: 100%|██████████| 5/5 [00:02<00:00,  2.02it/s]


Epoch 12: Train Loss: 0.0875 | Val Loss: 0.0865 | Val R2: 0.5540
No improvement. Patience: 2/15
--- Fold 3, Epoch 13/150 ---


Distilling: 100%|██████████| 18/18 [00:10<00:00,  1.76it/s]
Validating Student: 100%|██████████| 5/5 [00:02<00:00,  1.91it/s]


Epoch 13: Train Loss: 0.1057 | Val Loss: 0.0760 | Val R2: 0.6463
No improvement. Patience: 3/15
--- Fold 3, Epoch 14/150 ---


Distilling: 100%|██████████| 18/18 [00:10<00:00,  1.73it/s]
Validating Student: 100%|██████████| 5/5 [00:02<00:00,  1.96it/s]


Epoch 14: Train Loss: 0.2053 | Val Loss: 0.0909 | Val R2: 0.5079
No improvement. Patience: 4/15
--- Fold 3, Epoch 15/150 ---


Distilling: 100%|██████████| 18/18 [00:10<00:00,  1.75it/s]
Validating Student: 100%|██████████| 5/5 [00:02<00:00,  1.98it/s]


Epoch 15: Train Loss: 0.1027 | Val Loss: 0.0835 | Val R2: 0.4119
No improvement. Patience: 5/15
--- Fold 3, Epoch 16/150 ---


Distilling: 100%|██████████| 18/18 [00:10<00:00,  1.75it/s]
Validating Student: 100%|██████████| 5/5 [00:02<00:00,  1.96it/s]


Epoch 16: Train Loss: 0.1027 | Val Loss: 0.0719 | Val R2: 0.6136
No improvement. Patience: 6/15
--- Fold 3, Epoch 17/150 ---


Distilling: 100%|██████████| 18/18 [00:10<00:00,  1.76it/s]
Validating Student: 100%|██████████| 5/5 [00:02<00:00,  1.98it/s]


Epoch 17: Train Loss: 0.0720 | Val Loss: 0.0683 | Val R2: 0.5802
No improvement. Patience: 7/15
--- Fold 3, Epoch 18/150 ---


Distilling: 100%|██████████| 18/18 [00:10<00:00,  1.76it/s]
Validating Student: 100%|██████████| 5/5 [00:02<00:00,  1.95it/s]


Epoch 18: Train Loss: 0.0704 | Val Loss: 0.0808 | Val R2: 0.4638
No improvement. Patience: 8/15
--- Fold 3, Epoch 19/150 ---


Distilling: 100%|██████████| 18/18 [00:10<00:00,  1.75it/s]
Validating Student: 100%|██████████| 5/5 [00:02<00:00,  1.97it/s]


Epoch 19: Train Loss: 0.1117 | Val Loss: 0.0660 | Val R2: 0.6453
No improvement. Patience: 9/15
--- Fold 3, Epoch 20/150 ---


Distilling: 100%|██████████| 18/18 [00:10<00:00,  1.71it/s]
Validating Student: 100%|██████████| 5/5 [00:02<00:00,  1.95it/s]


Epoch 20: Train Loss: 0.0610 | Val Loss: 0.0700 | Val R2: 0.6947
New best model for fold 3 saved with R2: 0.6947
--- Fold 3, Epoch 21/150 ---


Distilling: 100%|██████████| 18/18 [00:10<00:00,  1.74it/s]
Validating Student: 100%|██████████| 5/5 [00:02<00:00,  1.97it/s]


Epoch 21: Train Loss: 0.0526 | Val Loss: 0.0600 | Val R2: 0.7053
New best model for fold 3 saved with R2: 0.7053
--- Fold 3, Epoch 22/150 ---


Distilling: 100%|██████████| 18/18 [00:10<00:00,  1.69it/s]
Validating Student: 100%|██████████| 5/5 [00:02<00:00,  2.00it/s]


Epoch 22: Train Loss: 0.0809 | Val Loss: 0.0705 | Val R2: 0.6366
No improvement. Patience: 1/15
--- Fold 3, Epoch 23/150 ---


Distilling: 100%|██████████| 18/18 [00:10<00:00,  1.74it/s]
Validating Student: 100%|██████████| 5/5 [00:02<00:00,  1.93it/s]


Epoch 23: Train Loss: 0.0711 | Val Loss: 0.0745 | Val R2: 0.6360
No improvement. Patience: 2/15
--- Fold 3, Epoch 24/150 ---


Distilling: 100%|██████████| 18/18 [00:10<00:00,  1.74it/s]
Validating Student: 100%|██████████| 5/5 [00:02<00:00,  1.98it/s]


Epoch 24: Train Loss: 0.0627 | Val Loss: 0.0642 | Val R2: 0.5836
No improvement. Patience: 3/15
--- Fold 3, Epoch 25/150 ---


Distilling: 100%|██████████| 18/18 [00:10<00:00,  1.73it/s]
Validating Student: 100%|██████████| 5/5 [00:02<00:00,  2.00it/s]


Epoch 25: Train Loss: 0.0613 | Val Loss: 0.0723 | Val R2: 0.7050
No improvement. Patience: 4/15
--- Fold 3, Epoch 26/150 ---


Distilling: 100%|██████████| 18/18 [00:10<00:00,  1.72it/s]
Validating Student: 100%|██████████| 5/5 [00:02<00:00,  1.88it/s]


Epoch 26: Train Loss: 0.0947 | Val Loss: 0.0641 | Val R2: 0.5855
No improvement. Patience: 5/15
--- Fold 3, Epoch 27/150 ---


Distilling: 100%|██████████| 18/18 [00:10<00:00,  1.74it/s]
Validating Student: 100%|██████████| 5/5 [00:02<00:00,  1.99it/s]


Epoch 27: Train Loss: 0.0780 | Val Loss: 0.0842 | Val R2: 0.2587
No improvement. Patience: 6/15
--- Fold 3, Epoch 28/150 ---


Distilling: 100%|██████████| 18/18 [00:10<00:00,  1.71it/s]
Validating Student: 100%|██████████| 5/5 [00:02<00:00,  1.96it/s]


Epoch 28: Train Loss: 0.0590 | Val Loss: 0.0648 | Val R2: 0.7179
New best model for fold 3 saved with R2: 0.7179
--- Fold 3, Epoch 29/150 ---


Distilling: 100%|██████████| 18/18 [00:10<00:00,  1.72it/s]
Validating Student: 100%|██████████| 5/5 [00:02<00:00,  2.03it/s]


Epoch 29: Train Loss: 0.0644 | Val Loss: 0.0711 | Val R2: 0.6295
No improvement. Patience: 1/15
--- Fold 3, Epoch 30/150 ---


Distilling: 100%|██████████| 18/18 [00:10<00:00,  1.75it/s]
Validating Student: 100%|██████████| 5/5 [00:02<00:00,  1.98it/s]


Epoch 30: Train Loss: 0.0683 | Val Loss: 0.0842 | Val R2: 0.3779
No improvement. Patience: 2/15
--- Fold 3, Epoch 31/150 ---


Distilling: 100%|██████████| 18/18 [00:10<00:00,  1.72it/s]
Validating Student: 100%|██████████| 5/5 [00:02<00:00,  1.99it/s]


Epoch 31: Train Loss: 0.0494 | Val Loss: 0.0675 | Val R2: 0.6754
No improvement. Patience: 3/15
--- Fold 3, Epoch 32/150 ---


Distilling: 100%|██████████| 18/18 [00:10<00:00,  1.74it/s]
Validating Student: 100%|██████████| 5/5 [00:02<00:00,  2.00it/s]


Epoch 32: Train Loss: 0.0558 | Val Loss: 0.0709 | Val R2: 0.6662
No improvement. Patience: 4/15
--- Fold 3, Epoch 33/150 ---


Distilling: 100%|██████████| 18/18 [00:10<00:00,  1.73it/s]
Validating Student: 100%|██████████| 5/5 [00:02<00:00,  1.97it/s]


Epoch 33: Train Loss: 0.0657 | Val Loss: 0.0583 | Val R2: 0.7232
New best model for fold 3 saved with R2: 0.7232
--- Fold 3, Epoch 34/150 ---


Distilling: 100%|██████████| 18/18 [00:10<00:00,  1.74it/s]
Validating Student: 100%|██████████| 5/5 [00:02<00:00,  1.99it/s]


Epoch 34: Train Loss: 0.0598 | Val Loss: 0.0690 | Val R2: 0.6005
No improvement. Patience: 1/15
--- Fold 3, Epoch 35/150 ---


Distilling: 100%|██████████| 18/18 [00:10<00:00,  1.76it/s]
Validating Student: 100%|██████████| 5/5 [00:02<00:00,  2.00it/s]


Epoch 35: Train Loss: 0.0479 | Val Loss: 0.0599 | Val R2: 0.6942
No improvement. Patience: 2/15
--- Fold 3, Epoch 36/150 ---


Distilling: 100%|██████████| 18/18 [00:10<00:00,  1.76it/s]
Validating Student: 100%|██████████| 5/5 [00:02<00:00,  2.01it/s]


Epoch 36: Train Loss: 0.0588 | Val Loss: 0.0764 | Val R2: 0.5633
No improvement. Patience: 3/15
--- Fold 3, Epoch 37/150 ---


Distilling: 100%|██████████| 18/18 [00:10<00:00,  1.74it/s]
Validating Student: 100%|██████████| 5/5 [00:02<00:00,  1.99it/s]


Epoch 37: Train Loss: 0.0520 | Val Loss: 0.0630 | Val R2: 0.6553
No improvement. Patience: 4/15
--- Fold 3, Epoch 38/150 ---


Distilling: 100%|██████████| 18/18 [00:10<00:00,  1.76it/s]
Validating Student: 100%|██████████| 5/5 [00:02<00:00,  1.98it/s]


Epoch 38: Train Loss: 0.0490 | Val Loss: 0.0655 | Val R2: 0.7176
No improvement. Patience: 5/15
--- Fold 3, Epoch 39/150 ---


Distilling: 100%|██████████| 18/18 [00:10<00:00,  1.75it/s]
Validating Student: 100%|██████████| 5/5 [00:02<00:00,  1.94it/s]


Epoch 39: Train Loss: 0.0544 | Val Loss: 0.0676 | Val R2: 0.6291
No improvement. Patience: 6/15
--- Fold 3, Epoch 40/150 ---


Distilling: 100%|██████████| 18/18 [00:10<00:00,  1.74it/s]
Validating Student: 100%|██████████| 5/5 [00:02<00:00,  1.98it/s]


Epoch 40: Train Loss: 0.0555 | Val Loss: 0.0665 | Val R2: 0.7265
New best model for fold 3 saved with R2: 0.7265
--- Fold 3, Epoch 41/150 ---


Distilling: 100%|██████████| 18/18 [00:10<00:00,  1.71it/s]
Validating Student: 100%|██████████| 5/5 [00:02<00:00,  2.00it/s]


Epoch 41: Train Loss: 0.0581 | Val Loss: 0.0654 | Val R2: 0.7161
No improvement. Patience: 1/15
--- Fold 3, Epoch 42/150 ---


Distilling: 100%|██████████| 18/18 [00:10<00:00,  1.76it/s]
Validating Student: 100%|██████████| 5/5 [00:02<00:00,  2.00it/s]


Epoch 42: Train Loss: 0.0441 | Val Loss: 0.0547 | Val R2: 0.7482
New best model for fold 3 saved with R2: 0.7482
--- Fold 3, Epoch 43/150 ---


Distilling: 100%|██████████| 18/18 [00:10<00:00,  1.73it/s]
Validating Student: 100%|██████████| 5/5 [00:02<00:00,  1.97it/s]


Epoch 43: Train Loss: 0.0402 | Val Loss: 0.0657 | Val R2: 0.6272
No improvement. Patience: 1/15
--- Fold 3, Epoch 44/150 ---


Distilling: 100%|██████████| 18/18 [00:10<00:00,  1.73it/s]
Validating Student: 100%|██████████| 5/5 [00:02<00:00,  1.98it/s]


Epoch 44: Train Loss: 0.0634 | Val Loss: 0.0588 | Val R2: 0.7581
New best model for fold 3 saved with R2: 0.7581
--- Fold 3, Epoch 45/150 ---


Distilling: 100%|██████████| 18/18 [00:10<00:00,  1.72it/s]
Validating Student: 100%|██████████| 5/5 [00:02<00:00,  1.91it/s]


Epoch 45: Train Loss: 0.0468 | Val Loss: 0.0667 | Val R2: 0.4955
No improvement. Patience: 1/15
--- Fold 3, Epoch 46/150 ---


Distilling: 100%|██████████| 18/18 [00:10<00:00,  1.76it/s]
Validating Student: 100%|██████████| 5/5 [00:02<00:00,  2.02it/s]


Epoch 46: Train Loss: 0.0521 | Val Loss: 0.0787 | Val R2: 0.3582
No improvement. Patience: 2/15
--- Fold 3, Epoch 47/150 ---


Distilling: 100%|██████████| 18/18 [00:10<00:00,  1.79it/s]
Validating Student: 100%|██████████| 5/5 [00:02<00:00,  1.94it/s]


Epoch 47: Train Loss: 0.0590 | Val Loss: 0.0591 | Val R2: 0.5751
No improvement. Patience: 3/15
--- Fold 3, Epoch 48/150 ---


Distilling: 100%|██████████| 18/18 [00:10<00:00,  1.76it/s]
Validating Student: 100%|██████████| 5/5 [00:02<00:00,  2.01it/s]


Epoch 48: Train Loss: 0.0548 | Val Loss: 0.0967 | Val R2: -0.1537
No improvement. Patience: 4/15
--- Fold 3, Epoch 49/150 ---


Distilling: 100%|██████████| 18/18 [00:10<00:00,  1.76it/s]
Validating Student: 100%|██████████| 5/5 [00:02<00:00,  2.00it/s]


Epoch 49: Train Loss: 0.0613 | Val Loss: 0.0484 | Val R2: 0.7763
New best model for fold 3 saved with R2: 0.7763
--- Fold 3, Epoch 50/150 ---


Distilling: 100%|██████████| 18/18 [00:10<00:00,  1.74it/s]
Validating Student: 100%|██████████| 5/5 [00:02<00:00,  1.95it/s]


Epoch 50: Train Loss: 0.0381 | Val Loss: 0.0563 | Val R2: 0.6422
No improvement. Patience: 1/15
--- Fold 3, Epoch 51/150 ---


Distilling: 100%|██████████| 18/18 [00:10<00:00,  1.76it/s]
Validating Student: 100%|██████████| 5/5 [00:02<00:00,  1.98it/s]


Epoch 51: Train Loss: 0.0418 | Val Loss: 0.0706 | Val R2: 0.6255
No improvement. Patience: 2/15
--- Fold 3, Epoch 52/150 ---


Distilling: 100%|██████████| 18/18 [00:10<00:00,  1.77it/s]
Validating Student: 100%|██████████| 5/5 [00:02<00:00,  1.97it/s]


Epoch 52: Train Loss: 0.0471 | Val Loss: 0.0725 | Val R2: 0.5694
No improvement. Patience: 3/15
--- Fold 3, Epoch 53/150 ---


Distilling: 100%|██████████| 18/18 [00:10<00:00,  1.71it/s]
Validating Student: 100%|██████████| 5/5 [00:02<00:00,  2.02it/s]


Epoch 53: Train Loss: 0.0408 | Val Loss: 0.0630 | Val R2: 0.7328
No improvement. Patience: 4/15
--- Fold 3, Epoch 54/150 ---


Distilling: 100%|██████████| 18/18 [00:10<00:00,  1.77it/s]
Validating Student: 100%|██████████| 5/5 [00:02<00:00,  1.98it/s]


Epoch 54: Train Loss: 0.0353 | Val Loss: 0.0529 | Val R2: 0.7681
No improvement. Patience: 5/15
--- Fold 3, Epoch 55/150 ---


Distilling: 100%|██████████| 18/18 [00:10<00:00,  1.74it/s]
Validating Student: 100%|██████████| 5/5 [00:02<00:00,  2.00it/s]


Epoch 55: Train Loss: 0.0707 | Val Loss: 0.0774 | Val R2: 0.1763
No improvement. Patience: 6/15
--- Fold 3, Epoch 56/150 ---


Distilling: 100%|██████████| 18/18 [00:10<00:00,  1.74it/s]
Validating Student: 100%|██████████| 5/5 [00:02<00:00,  1.98it/s]


Epoch 56: Train Loss: 0.0480 | Val Loss: 0.0565 | Val R2: 0.7710
No improvement. Patience: 7/15
--- Fold 3, Epoch 57/150 ---


Distilling: 100%|██████████| 18/18 [00:10<00:00,  1.75it/s]
Validating Student: 100%|██████████| 5/5 [00:02<00:00,  1.96it/s]


Epoch 57: Train Loss: 0.0474 | Val Loss: 0.0679 | Val R2: 0.4456
No improvement. Patience: 8/15
--- Fold 3, Epoch 58/150 ---


Distilling: 100%|██████████| 18/18 [00:10<00:00,  1.74it/s]
Validating Student: 100%|██████████| 5/5 [00:02<00:00,  1.96it/s]


Epoch 58: Train Loss: 0.0481 | Val Loss: 0.0591 | Val R2: 0.7305
No improvement. Patience: 9/15
--- Fold 3, Epoch 59/150 ---


Distilling: 100%|██████████| 18/18 [00:10<00:00,  1.72it/s]
Validating Student: 100%|██████████| 5/5 [00:02<00:00,  1.99it/s]


Epoch 59: Train Loss: 0.0522 | Val Loss: 0.0551 | Val R2: 0.7604
No improvement. Patience: 10/15
--- Fold 3, Epoch 60/150 ---


Distilling: 100%|██████████| 18/18 [00:10<00:00,  1.75it/s]
Validating Student: 100%|██████████| 5/5 [00:02<00:00,  2.02it/s]


Epoch 60: Train Loss: 0.0438 | Val Loss: 0.0526 | Val R2: 0.7652
No improvement. Patience: 11/15
--- Fold 3, Epoch 61/150 ---


Distilling: 100%|██████████| 18/18 [00:10<00:00,  1.75it/s]
Validating Student: 100%|██████████| 5/5 [00:02<00:00,  2.01it/s]


Epoch 61: Train Loss: 0.0437 | Val Loss: 0.0503 | Val R2: 0.7602
No improvement. Patience: 12/15
--- Fold 3, Epoch 62/150 ---


Distilling: 100%|██████████| 18/18 [00:10<00:00,  1.78it/s]
Validating Student: 100%|██████████| 5/5 [00:02<00:00,  1.99it/s]


Epoch 62: Train Loss: 0.0441 | Val Loss: 0.0745 | Val R2: 0.2168
No improvement. Patience: 13/15
--- Fold 3, Epoch 63/150 ---


Distilling: 100%|██████████| 18/18 [00:10<00:00,  1.77it/s]
Validating Student: 100%|██████████| 5/5 [00:02<00:00,  1.98it/s]


Epoch 63: Train Loss: 0.0460 | Val Loss: 0.0505 | Val R2: 0.7059
No improvement. Patience: 14/15
--- Fold 3, Epoch 64/150 ---


Distilling: 100%|██████████| 18/18 [00:10<00:00,  1.74it/s]
Validating Student: 100%|██████████| 5/5 [00:02<00:00,  1.96it/s]


Epoch 64: Train Loss: 0.0533 | Val Loss: 0.0723 | Val R2: 0.7199
No improvement. Patience: 15/15
--- Early stopping triggered at epoch 64 ---
Fold 3 complete. Best Validation R2: 0.7763


--- Fold 4, Epoch 1/150 ---


Distilling: 100%|██████████| 18/18 [00:10<00:00,  1.75it/s]
Validating Student: 100%|██████████| 5/5 [00:02<00:00,  2.00it/s]


Epoch 1: Train Loss: 0.5315 | Val Loss: 0.1696 | Val R2: -0.0343
New best model for fold 4 saved with R2: -0.0343
--- Fold 4, Epoch 2/150 ---


Distilling: 100%|██████████| 18/18 [00:10<00:00,  1.75it/s]
Validating Student: 100%|██████████| 5/5 [00:02<00:00,  1.96it/s]


Epoch 2: Train Loss: 0.1730 | Val Loss: 0.1256 | Val R2: 0.2048
New best model for fold 4 saved with R2: 0.2048
--- Fold 4, Epoch 3/150 ---


Distilling: 100%|██████████| 18/18 [00:10<00:00,  1.74it/s]
Validating Student: 100%|██████████| 5/5 [00:02<00:00,  1.98it/s]


Epoch 3: Train Loss: 0.1488 | Val Loss: 0.1142 | Val R2: 0.3636
New best model for fold 4 saved with R2: 0.3636
--- Fold 4, Epoch 4/150 ---


Distilling: 100%|██████████| 18/18 [00:10<00:00,  1.76it/s]
Validating Student: 100%|██████████| 5/5 [00:02<00:00,  1.95it/s]


Epoch 4: Train Loss: 0.1524 | Val Loss: 0.1057 | Val R2: 0.3932
New best model for fold 4 saved with R2: 0.3932
--- Fold 4, Epoch 5/150 ---


Distilling: 100%|██████████| 18/18 [00:10<00:00,  1.74it/s]
Validating Student: 100%|██████████| 5/5 [00:02<00:00,  1.87it/s]


Epoch 5: Train Loss: 0.1227 | Val Loss: 0.0977 | Val R2: 0.4233
New best model for fold 4 saved with R2: 0.4233
--- Fold 4, Epoch 6/150 ---


Distilling: 100%|██████████| 18/18 [00:10<00:00,  1.74it/s]
Validating Student: 100%|██████████| 5/5 [00:02<00:00,  1.94it/s]


Epoch 6: Train Loss: 0.1384 | Val Loss: 0.0986 | Val R2: 0.4366
New best model for fold 4 saved with R2: 0.4366
--- Fold 4, Epoch 7/150 ---


Distilling: 100%|██████████| 18/18 [00:10<00:00,  1.72it/s]
Validating Student: 100%|██████████| 5/5 [00:02<00:00,  1.96it/s]


Epoch 7: Train Loss: 0.1290 | Val Loss: 0.1082 | Val R2: 0.4251
No improvement. Patience: 1/15
--- Fold 4, Epoch 8/150 ---


Distilling: 100%|██████████| 18/18 [00:10<00:00,  1.75it/s]
Validating Student: 100%|██████████| 5/5 [00:02<00:00,  1.99it/s]


Epoch 8: Train Loss: 0.1298 | Val Loss: 0.1151 | Val R2: 0.4523
New best model for fold 4 saved with R2: 0.4523
--- Fold 4, Epoch 9/150 ---


Distilling: 100%|██████████| 18/18 [00:10<00:00,  1.74it/s]
Validating Student: 100%|██████████| 5/5 [00:02<00:00,  1.98it/s]


Epoch 9: Train Loss: 0.1165 | Val Loss: 0.1042 | Val R2: 0.3742
No improvement. Patience: 1/15
--- Fold 4, Epoch 10/150 ---


Distilling: 100%|██████████| 18/18 [00:10<00:00,  1.74it/s]
Validating Student: 100%|██████████| 5/5 [00:02<00:00,  1.96it/s]


Epoch 10: Train Loss: 0.0990 | Val Loss: 0.1099 | Val R2: 0.4598
New best model for fold 4 saved with R2: 0.4598
--- Fold 4, Epoch 11/150 ---


Distilling: 100%|██████████| 18/18 [00:10<00:00,  1.74it/s]
Validating Student: 100%|██████████| 5/5 [00:02<00:00,  1.99it/s]


Epoch 11: Train Loss: 0.1048 | Val Loss: 0.0885 | Val R2: 0.5166
New best model for fold 4 saved with R2: 0.5166
--- Fold 4, Epoch 12/150 ---


Distilling: 100%|██████████| 18/18 [00:10<00:00,  1.74it/s]
Validating Student: 100%|██████████| 5/5 [00:02<00:00,  1.96it/s]


Epoch 12: Train Loss: 0.0941 | Val Loss: 0.1024 | Val R2: 0.3801
No improvement. Patience: 1/15
--- Fold 4, Epoch 13/150 ---


Distilling: 100%|██████████| 18/18 [00:10<00:00,  1.70it/s]
Validating Student: 100%|██████████| 5/5 [00:02<00:00,  1.98it/s]


Epoch 13: Train Loss: 0.0841 | Val Loss: 0.0831 | Val R2: 0.5747
New best model for fold 4 saved with R2: 0.5747
--- Fold 4, Epoch 14/150 ---


Distilling: 100%|██████████| 18/18 [00:10<00:00,  1.75it/s]
Validating Student: 100%|██████████| 5/5 [00:02<00:00,  1.96it/s]


Epoch 14: Train Loss: 0.0843 | Val Loss: 0.0782 | Val R2: 0.3400
No improvement. Patience: 1/15
--- Fold 4, Epoch 15/150 ---


Distilling: 100%|██████████| 18/18 [00:10<00:00,  1.73it/s]
Validating Student: 100%|██████████| 5/5 [00:02<00:00,  1.99it/s]


Epoch 15: Train Loss: 0.0822 | Val Loss: 0.0754 | Val R2: 0.5743
No improvement. Patience: 2/15
--- Fold 4, Epoch 16/150 ---


Distilling: 100%|██████████| 18/18 [00:10<00:00,  1.73it/s]
Validating Student: 100%|██████████| 5/5 [00:02<00:00,  1.97it/s]


Epoch 16: Train Loss: 0.0700 | Val Loss: 0.1005 | Val R2: 0.4769
No improvement. Patience: 3/15
--- Fold 4, Epoch 17/150 ---


Distilling: 100%|██████████| 18/18 [00:10<00:00,  1.72it/s]
Validating Student: 100%|██████████| 5/5 [00:02<00:00,  1.94it/s]


Epoch 17: Train Loss: 0.0787 | Val Loss: 0.0745 | Val R2: 0.4872
No improvement. Patience: 4/15
--- Fold 4, Epoch 18/150 ---


Distilling: 100%|██████████| 18/18 [00:10<00:00,  1.74it/s]
Validating Student: 100%|██████████| 5/5 [00:02<00:00,  1.98it/s]


Epoch 18: Train Loss: 0.0737 | Val Loss: 0.0642 | Val R2: 0.4310
No improvement. Patience: 5/15
--- Fold 4, Epoch 19/150 ---


Distilling: 100%|██████████| 18/18 [00:10<00:00,  1.74it/s]
Validating Student: 100%|██████████| 5/5 [00:02<00:00,  2.00it/s]


Epoch 19: Train Loss: 0.0760 | Val Loss: 0.0713 | Val R2: 0.5928
New best model for fold 4 saved with R2: 0.5928
--- Fold 4, Epoch 20/150 ---


Distilling: 100%|██████████| 18/18 [00:10<00:00,  1.73it/s]
Validating Student: 100%|██████████| 5/5 [00:02<00:00,  1.94it/s]


Epoch 20: Train Loss: 0.1191 | Val Loss: 0.0757 | Val R2: 0.5714
No improvement. Patience: 1/15
--- Fold 4, Epoch 21/150 ---


Distilling: 100%|██████████| 18/18 [00:10<00:00,  1.72it/s]
Validating Student: 100%|██████████| 5/5 [00:02<00:00,  2.00it/s]


Epoch 21: Train Loss: 0.0738 | Val Loss: 0.0731 | Val R2: 0.5942
New best model for fold 4 saved with R2: 0.5942
--- Fold 4, Epoch 22/150 ---


Distilling: 100%|██████████| 18/18 [00:10<00:00,  1.73it/s]
Validating Student: 100%|██████████| 5/5 [00:02<00:00,  2.02it/s]


Epoch 22: Train Loss: 0.0893 | Val Loss: 0.0925 | Val R2: 0.5060
No improvement. Patience: 1/15
--- Fold 4, Epoch 23/150 ---


Distilling: 100%|██████████| 18/18 [00:10<00:00,  1.74it/s]
Validating Student: 100%|██████████| 5/5 [00:02<00:00,  1.99it/s]


Epoch 23: Train Loss: 0.0946 | Val Loss: 0.0652 | Val R2: 0.6458
New best model for fold 4 saved with R2: 0.6458
--- Fold 4, Epoch 24/150 ---


Distilling: 100%|██████████| 18/18 [00:10<00:00,  1.77it/s]
Validating Student: 100%|██████████| 5/5 [00:02<00:00,  1.97it/s]


Epoch 24: Train Loss: 0.0612 | Val Loss: 0.0607 | Val R2: 0.6262
No improvement. Patience: 1/15
--- Fold 4, Epoch 25/150 ---


Distilling: 100%|██████████| 18/18 [00:10<00:00,  1.74it/s]
Validating Student: 100%|██████████| 5/5 [00:02<00:00,  2.01it/s]


Epoch 25: Train Loss: 0.0583 | Val Loss: 0.0802 | Val R2: 0.4138
No improvement. Patience: 2/15
--- Fold 4, Epoch 26/150 ---


Distilling: 100%|██████████| 18/18 [00:10<00:00,  1.74it/s]
Validating Student: 100%|██████████| 5/5 [00:02<00:00,  2.00it/s]


Epoch 26: Train Loss: 0.0656 | Val Loss: 0.0621 | Val R2: 0.6438
No improvement. Patience: 3/15
--- Fold 4, Epoch 27/150 ---


Distilling: 100%|██████████| 18/18 [00:10<00:00,  1.74it/s]
Validating Student: 100%|██████████| 5/5 [00:02<00:00,  1.97it/s]


Epoch 27: Train Loss: 0.0654 | Val Loss: 0.0602 | Val R2: 0.4618
No improvement. Patience: 4/15
--- Fold 4, Epoch 28/150 ---


Distilling: 100%|██████████| 18/18 [00:10<00:00,  1.74it/s]
Validating Student: 100%|██████████| 5/5 [00:02<00:00,  1.99it/s]


Epoch 28: Train Loss: 0.0712 | Val Loss: 0.0522 | Val R2: 0.6269
No improvement. Patience: 5/15
--- Fold 4, Epoch 29/150 ---


Distilling: 100%|██████████| 18/18 [00:10<00:00,  1.71it/s]
Validating Student: 100%|██████████| 5/5 [00:02<00:00,  2.00it/s]


Epoch 29: Train Loss: 0.0570 | Val Loss: 0.0626 | Val R2: 0.6232
No improvement. Patience: 6/15
--- Fold 4, Epoch 30/150 ---


Distilling: 100%|██████████| 18/18 [00:10<00:00,  1.75it/s]
Validating Student: 100%|██████████| 5/5 [00:02<00:00,  1.99it/s]


Epoch 30: Train Loss: 0.0779 | Val Loss: 0.0598 | Val R2: 0.6844
New best model for fold 4 saved with R2: 0.6844
--- Fold 4, Epoch 31/150 ---


Distilling: 100%|██████████| 18/18 [00:10<00:00,  1.76it/s]
Validating Student: 100%|██████████| 5/5 [00:02<00:00,  1.98it/s]


Epoch 31: Train Loss: 0.0570 | Val Loss: 0.0616 | Val R2: 0.6768
No improvement. Patience: 1/15
--- Fold 4, Epoch 32/150 ---


Distilling: 100%|██████████| 18/18 [00:10<00:00,  1.71it/s]
Validating Student: 100%|██████████| 5/5 [00:02<00:00,  1.97it/s]


Epoch 32: Train Loss: 0.0526 | Val Loss: 0.0618 | Val R2: 0.6750
No improvement. Patience: 2/15
--- Fold 4, Epoch 33/150 ---


Distilling: 100%|██████████| 18/18 [00:10<00:00,  1.71it/s]
Validating Student: 100%|██████████| 5/5 [00:02<00:00,  1.97it/s]


Epoch 33: Train Loss: 0.0591 | Val Loss: 0.0598 | Val R2: 0.5969
No improvement. Patience: 3/15
--- Fold 4, Epoch 34/150 ---


Distilling: 100%|██████████| 18/18 [00:10<00:00,  1.73it/s]
Validating Student: 100%|██████████| 5/5 [00:02<00:00,  1.96it/s]


Epoch 34: Train Loss: 0.0762 | Val Loss: 0.0505 | Val R2: 0.6659
No improvement. Patience: 4/15
--- Fold 4, Epoch 35/150 ---


Distilling: 100%|██████████| 18/18 [00:10<00:00,  1.75it/s]
Validating Student: 100%|██████████| 5/5 [00:02<00:00,  1.98it/s]


Epoch 35: Train Loss: 0.0519 | Val Loss: 0.0464 | Val R2: 0.6550
No improvement. Patience: 5/15
--- Fold 4, Epoch 36/150 ---


Distilling: 100%|██████████| 18/18 [00:10<00:00,  1.72it/s]
Validating Student: 100%|██████████| 5/5 [00:02<00:00,  1.99it/s]


Epoch 36: Train Loss: 0.0638 | Val Loss: 0.0500 | Val R2: 0.6612
No improvement. Patience: 6/15
--- Fold 4, Epoch 37/150 ---


Distilling: 100%|██████████| 18/18 [00:10<00:00,  1.76it/s]
Validating Student: 100%|██████████| 5/5 [00:02<00:00,  1.96it/s]


Epoch 37: Train Loss: 0.0496 | Val Loss: 0.0727 | Val R2: 0.5052
No improvement. Patience: 7/15
--- Fold 4, Epoch 38/150 ---


Distilling: 100%|██████████| 18/18 [00:10<00:00,  1.72it/s]
Validating Student: 100%|██████████| 5/5 [00:02<00:00,  1.98it/s]


Epoch 38: Train Loss: 0.0498 | Val Loss: 0.0741 | Val R2: 0.6198
No improvement. Patience: 8/15
--- Fold 4, Epoch 39/150 ---


Distilling: 100%|██████████| 18/18 [00:10<00:00,  1.70it/s]
Validating Student: 100%|██████████| 5/5 [00:02<00:00,  2.00it/s]


Epoch 39: Train Loss: 0.0635 | Val Loss: 0.0546 | Val R2: 0.5917
No improvement. Patience: 9/15
--- Fold 4, Epoch 40/150 ---


Distilling: 100%|██████████| 18/18 [00:10<00:00,  1.75it/s]
Validating Student: 100%|██████████| 5/5 [00:02<00:00,  2.01it/s]


Epoch 40: Train Loss: 0.0435 | Val Loss: 0.0604 | Val R2: 0.6671
No improvement. Patience: 10/15
--- Fold 4, Epoch 41/150 ---


Distilling: 100%|██████████| 18/18 [00:10<00:00,  1.73it/s]
Validating Student: 100%|██████████| 5/5 [00:02<00:00,  1.95it/s]


Epoch 41: Train Loss: 0.1296 | Val Loss: 0.0668 | Val R2: 0.5827
No improvement. Patience: 11/15
--- Fold 4, Epoch 42/150 ---


Distilling: 100%|██████████| 18/18 [00:10<00:00,  1.77it/s]
Validating Student: 100%|██████████| 5/5 [00:02<00:00,  2.00it/s]


Epoch 42: Train Loss: 0.0610 | Val Loss: 0.0691 | Val R2: 0.5726
No improvement. Patience: 12/15
--- Fold 4, Epoch 43/150 ---


Distilling: 100%|██████████| 18/18 [00:10<00:00,  1.75it/s]
Validating Student: 100%|██████████| 5/5 [00:02<00:00,  1.96it/s]


Epoch 43: Train Loss: 0.0602 | Val Loss: 0.0633 | Val R2: 0.6581
No improvement. Patience: 13/15
--- Fold 4, Epoch 44/150 ---


Distilling: 100%|██████████| 18/18 [00:10<00:00,  1.75it/s]
Validating Student: 100%|██████████| 5/5 [00:02<00:00,  1.94it/s]


Epoch 44: Train Loss: 0.0438 | Val Loss: 0.0604 | Val R2: 0.6772
No improvement. Patience: 14/15
--- Fold 4, Epoch 45/150 ---


Distilling: 100%|██████████| 18/18 [00:10<00:00,  1.73it/s]
Validating Student: 100%|██████████| 5/5 [00:02<00:00,  1.97it/s]


Epoch 45: Train Loss: 0.0511 | Val Loss: 0.0469 | Val R2: 0.7229
New best model for fold 4 saved with R2: 0.7229
--- Fold 4, Epoch 46/150 ---


Distilling: 100%|██████████| 18/18 [00:10<00:00,  1.73it/s]
Validating Student: 100%|██████████| 5/5 [00:02<00:00,  1.98it/s]


Epoch 46: Train Loss: 0.0497 | Val Loss: 0.0585 | Val R2: 0.6365
No improvement. Patience: 1/15
--- Fold 4, Epoch 47/150 ---


Distilling: 100%|██████████| 18/18 [00:10<00:00,  1.71it/s]
Validating Student: 100%|██████████| 5/5 [00:02<00:00,  1.94it/s]


Epoch 47: Train Loss: 0.0617 | Val Loss: 0.1170 | Val R2: -0.3023
No improvement. Patience: 2/15
--- Fold 4, Epoch 48/150 ---


Distilling: 100%|██████████| 18/18 [00:10<00:00,  1.71it/s]
Validating Student: 100%|██████████| 5/5 [00:02<00:00,  2.01it/s]


Epoch 48: Train Loss: 0.0577 | Val Loss: 0.0688 | Val R2: 0.4941
No improvement. Patience: 3/15
--- Fold 4, Epoch 49/150 ---


Distilling: 100%|██████████| 18/18 [00:10<00:00,  1.75it/s]
Validating Student: 100%|██████████| 5/5 [00:02<00:00,  1.99it/s]


Epoch 49: Train Loss: 0.0521 | Val Loss: 0.0518 | Val R2: 0.7122
No improvement. Patience: 4/15
--- Fold 4, Epoch 50/150 ---


Distilling: 100%|██████████| 18/18 [00:10<00:00,  1.77it/s]
Validating Student: 100%|██████████| 5/5 [00:02<00:00,  1.97it/s]


Epoch 50: Train Loss: 0.0458 | Val Loss: 0.0525 | Val R2: 0.7286
New best model for fold 4 saved with R2: 0.7286
--- Fold 4, Epoch 51/150 ---


Distilling: 100%|██████████| 18/18 [00:10<00:00,  1.71it/s]
Validating Student: 100%|██████████| 5/5 [00:02<00:00,  1.95it/s]


Epoch 51: Train Loss: 0.0460 | Val Loss: 0.0533 | Val R2: 0.6998
No improvement. Patience: 1/15
--- Fold 4, Epoch 52/150 ---


Distilling: 100%|██████████| 18/18 [00:10<00:00,  1.74it/s]
Validating Student: 100%|██████████| 5/5 [00:02<00:00,  2.00it/s]


Epoch 52: Train Loss: 0.0724 | Val Loss: 0.0510 | Val R2: 0.7035
No improvement. Patience: 2/15
--- Fold 4, Epoch 53/150 ---


Distilling: 100%|██████████| 18/18 [00:10<00:00,  1.74it/s]
Validating Student: 100%|██████████| 5/5 [00:02<00:00,  2.00it/s]


Epoch 53: Train Loss: 0.0711 | Val Loss: 0.0524 | Val R2: 0.6727
No improvement. Patience: 3/15
--- Fold 4, Epoch 54/150 ---


Distilling: 100%|██████████| 18/18 [00:10<00:00,  1.75it/s]
Validating Student: 100%|██████████| 5/5 [00:02<00:00,  1.98it/s]


Epoch 54: Train Loss: 0.0410 | Val Loss: 0.0571 | Val R2: 0.6915
No improvement. Patience: 4/15
--- Fold 4, Epoch 55/150 ---


Distilling: 100%|██████████| 18/18 [00:10<00:00,  1.74it/s]
Validating Student: 100%|██████████| 5/5 [00:02<00:00,  2.01it/s]


Epoch 55: Train Loss: 0.0489 | Val Loss: 0.0646 | Val R2: 0.5080
No improvement. Patience: 5/15
--- Fold 4, Epoch 56/150 ---


Distilling: 100%|██████████| 18/18 [00:10<00:00,  1.73it/s]
Validating Student: 100%|██████████| 5/5 [00:02<00:00,  1.98it/s]


Epoch 56: Train Loss: 0.0694 | Val Loss: 0.0489 | Val R2: 0.6585
No improvement. Patience: 6/15
--- Fold 4, Epoch 57/150 ---


Distilling: 100%|██████████| 18/18 [00:10<00:00,  1.72it/s]
Validating Student: 100%|██████████| 5/5 [00:02<00:00,  2.02it/s]


Epoch 57: Train Loss: 0.0522 | Val Loss: 0.0533 | Val R2: 0.6414
No improvement. Patience: 7/15
--- Fold 4, Epoch 58/150 ---


Distilling: 100%|██████████| 18/18 [00:10<00:00,  1.72it/s]
Validating Student: 100%|██████████| 5/5 [00:02<00:00,  1.99it/s]


Epoch 58: Train Loss: 0.0412 | Val Loss: 0.0446 | Val R2: 0.6952
No improvement. Patience: 8/15
--- Fold 4, Epoch 59/150 ---


Distilling: 100%|██████████| 18/18 [00:10<00:00,  1.76it/s]
Validating Student: 100%|██████████| 5/5 [00:02<00:00,  1.97it/s]


Epoch 59: Train Loss: 0.0462 | Val Loss: 0.0504 | Val R2: 0.6386
No improvement. Patience: 9/15
--- Fold 4, Epoch 60/150 ---


Distilling: 100%|██████████| 18/18 [00:10<00:00,  1.73it/s]
Validating Student: 100%|██████████| 5/5 [00:02<00:00,  1.99it/s]


Epoch 60: Train Loss: 0.0442 | Val Loss: 0.0516 | Val R2: 0.6968
No improvement. Patience: 10/15
--- Fold 4, Epoch 61/150 ---


Distilling: 100%|██████████| 18/18 [00:10<00:00,  1.73it/s]
Validating Student: 100%|██████████| 5/5 [00:02<00:00,  1.97it/s]


Epoch 61: Train Loss: 0.0646 | Val Loss: 0.0497 | Val R2: 0.4929
No improvement. Patience: 11/15
--- Fold 4, Epoch 62/150 ---


Distilling: 100%|██████████| 18/18 [00:10<00:00,  1.76it/s]
Validating Student: 100%|██████████| 5/5 [00:02<00:00,  1.99it/s]


Epoch 62: Train Loss: 0.0430 | Val Loss: 0.0531 | Val R2: 0.6724
No improvement. Patience: 12/15
--- Fold 4, Epoch 63/150 ---


Distilling: 100%|██████████| 18/18 [00:10<00:00,  1.72it/s]
Validating Student: 100%|██████████| 5/5 [00:02<00:00,  1.99it/s]


Epoch 63: Train Loss: 0.0471 | Val Loss: 0.0672 | Val R2: 0.5417
No improvement. Patience: 13/15
--- Fold 4, Epoch 64/150 ---


Distilling: 100%|██████████| 18/18 [00:10<00:00,  1.73it/s]
Validating Student: 100%|██████████| 5/5 [00:02<00:00,  1.97it/s]


Epoch 64: Train Loss: 0.0468 | Val Loss: 0.0489 | Val R2: 0.7184
No improvement. Patience: 14/15
--- Fold 4, Epoch 65/150 ---


Distilling: 100%|██████████| 18/18 [00:10<00:00,  1.74it/s]
Validating Student: 100%|██████████| 5/5 [00:02<00:00,  1.96it/s]


Epoch 65: Train Loss: 0.0407 | Val Loss: 0.0460 | Val R2: 0.7068
No improvement. Patience: 15/15
--- Early stopping triggered at epoch 65 ---
Fold 4 complete. Best Validation R2: 0.7286


--- Fold 5, Epoch 1/150 ---


Distilling: 100%|██████████| 18/18 [00:10<00:00,  1.73it/s]
Validating Student: 100%|██████████| 5/5 [00:02<00:00,  1.97it/s]


Epoch 1: Train Loss: 0.5607 | Val Loss: 0.2099 | Val R2: 0.0134
New best model for fold 5 saved with R2: 0.0134
--- Fold 5, Epoch 2/150 ---


Distilling: 100%|██████████| 18/18 [00:10<00:00,  1.75it/s]
Validating Student: 100%|██████████| 5/5 [00:02<00:00,  1.92it/s]


Epoch 2: Train Loss: 0.1673 | Val Loss: 0.1468 | Val R2: 0.1015
New best model for fold 5 saved with R2: 0.1015
--- Fold 5, Epoch 3/150 ---


Distilling: 100%|██████████| 18/18 [00:10<00:00,  1.74it/s]
Validating Student: 100%|██████████| 5/5 [00:02<00:00,  1.97it/s]


Epoch 3: Train Loss: 0.1509 | Val Loss: 0.1389 | Val R2: 0.2810
New best model for fold 5 saved with R2: 0.2810
--- Fold 5, Epoch 4/150 ---


Distilling: 100%|██████████| 18/18 [00:10<00:00,  1.73it/s]
Validating Student: 100%|██████████| 5/5 [00:02<00:00,  1.95it/s]


Epoch 4: Train Loss: 0.1296 | Val Loss: 0.1268 | Val R2: 0.3068
New best model for fold 5 saved with R2: 0.3068
--- Fold 5, Epoch 5/150 ---


Distilling: 100%|██████████| 18/18 [00:10<00:00,  1.73it/s]
Validating Student: 100%|██████████| 5/5 [00:02<00:00,  1.94it/s]


Epoch 5: Train Loss: 0.1237 | Val Loss: 0.1539 | Val R2: 0.1247
No improvement. Patience: 1/15
--- Fold 5, Epoch 6/150 ---


Distilling: 100%|██████████| 18/18 [00:10<00:00,  1.73it/s]
Validating Student: 100%|██████████| 5/5 [00:02<00:00,  1.90it/s]


Epoch 6: Train Loss: 0.1140 | Val Loss: 0.1229 | Val R2: 0.4278
New best model for fold 5 saved with R2: 0.4278
--- Fold 5, Epoch 7/150 ---


Distilling: 100%|██████████| 18/18 [00:10<00:00,  1.70it/s]
Validating Student: 100%|██████████| 5/5 [00:02<00:00,  1.96it/s]


Epoch 7: Train Loss: 0.1091 | Val Loss: 0.1229 | Val R2: 0.4397
New best model for fold 5 saved with R2: 0.4397
--- Fold 5, Epoch 8/150 ---


Distilling: 100%|██████████| 18/18 [00:10<00:00,  1.75it/s]
Validating Student: 100%|██████████| 5/5 [00:02<00:00,  1.98it/s]


Epoch 8: Train Loss: 0.1021 | Val Loss: 0.0927 | Val R2: 0.5168
New best model for fold 5 saved with R2: 0.5168
--- Fold 5, Epoch 9/150 ---


Distilling: 100%|██████████| 18/18 [00:10<00:00,  1.74it/s]
Validating Student: 100%|██████████| 5/5 [00:02<00:00,  1.84it/s]


Epoch 9: Train Loss: 0.1010 | Val Loss: 0.0847 | Val R2: 0.5323
New best model for fold 5 saved with R2: 0.5323
--- Fold 5, Epoch 10/150 ---


Distilling: 100%|██████████| 18/18 [00:10<00:00,  1.73it/s]
Validating Student: 100%|██████████| 5/5 [00:02<00:00,  1.96it/s]


Epoch 10: Train Loss: 0.0897 | Val Loss: 0.0984 | Val R2: 0.4988
No improvement. Patience: 1/15
--- Fold 5, Epoch 11/150 ---


Distilling: 100%|██████████| 18/18 [00:10<00:00,  1.75it/s]
Validating Student: 100%|██████████| 5/5 [00:02<00:00,  1.99it/s]


Epoch 11: Train Loss: 0.0854 | Val Loss: 0.0964 | Val R2: 0.4338
No improvement. Patience: 2/15
--- Fold 5, Epoch 12/150 ---


Distilling: 100%|██████████| 18/18 [00:10<00:00,  1.72it/s]
Validating Student: 100%|██████████| 5/5 [00:02<00:00,  1.99it/s]


Epoch 12: Train Loss: 0.0742 | Val Loss: 0.0893 | Val R2: 0.5056
No improvement. Patience: 3/15
--- Fold 5, Epoch 13/150 ---


Distilling: 100%|██████████| 18/18 [00:10<00:00,  1.74it/s]
Validating Student: 100%|██████████| 5/5 [00:02<00:00,  1.95it/s]


Epoch 13: Train Loss: 0.0743 | Val Loss: 0.0848 | Val R2: 0.4399
No improvement. Patience: 4/15
--- Fold 5, Epoch 14/150 ---


Distilling: 100%|██████████| 18/18 [00:10<00:00,  1.70it/s]
Validating Student: 100%|██████████| 5/5 [00:02<00:00,  1.93it/s]


Epoch 14: Train Loss: 0.0816 | Val Loss: 0.0718 | Val R2: 0.6211
New best model for fold 5 saved with R2: 0.6211
--- Fold 5, Epoch 15/150 ---


Distilling: 100%|██████████| 18/18 [00:10<00:00,  1.74it/s]
Validating Student: 100%|██████████| 5/5 [00:02<00:00,  1.98it/s]


Epoch 15: Train Loss: 0.0630 | Val Loss: 0.0706 | Val R2: 0.5158
No improvement. Patience: 1/15
--- Fold 5, Epoch 16/150 ---


Distilling: 100%|██████████| 18/18 [00:10<00:00,  1.72it/s]
Validating Student: 100%|██████████| 5/5 [00:02<00:00,  1.94it/s]


Epoch 16: Train Loss: 0.0701 | Val Loss: 0.0748 | Val R2: 0.4922
No improvement. Patience: 2/15
--- Fold 5, Epoch 17/150 ---


Distilling: 100%|██████████| 18/18 [00:10<00:00,  1.75it/s]
Validating Student: 100%|██████████| 5/5 [00:02<00:00,  2.00it/s]


Epoch 17: Train Loss: 0.0722 | Val Loss: 0.1032 | Val R2: 0.3537
No improvement. Patience: 3/15
--- Fold 5, Epoch 18/150 ---


Distilling: 100%|██████████| 18/18 [00:10<00:00,  1.70it/s]
Validating Student: 100%|██████████| 5/5 [00:02<00:00,  1.96it/s]


Epoch 18: Train Loss: 0.0644 | Val Loss: 0.0886 | Val R2: 0.5989
No improvement. Patience: 4/15
--- Fold 5, Epoch 19/150 ---


Distilling: 100%|██████████| 18/18 [00:10<00:00,  1.72it/s]
Validating Student: 100%|██████████| 5/5 [00:02<00:00,  1.95it/s]


Epoch 19: Train Loss: 0.0614 | Val Loss: 0.0782 | Val R2: 0.4073
No improvement. Patience: 5/15
--- Fold 5, Epoch 20/150 ---


Distilling: 100%|██████████| 18/18 [00:10<00:00,  1.73it/s]
Validating Student: 100%|██████████| 5/5 [00:02<00:00,  1.95it/s]


Epoch 20: Train Loss: 0.0628 | Val Loss: 0.0772 | Val R2: 0.5148
No improvement. Patience: 6/15
--- Fold 5, Epoch 21/150 ---


Distilling: 100%|██████████| 18/18 [00:10<00:00,  1.75it/s]
Validating Student: 100%|██████████| 5/5 [00:02<00:00,  1.91it/s]


Epoch 21: Train Loss: 0.0559 | Val Loss: 0.0670 | Val R2: 0.6695
New best model for fold 5 saved with R2: 0.6695
--- Fold 5, Epoch 22/150 ---


Distilling: 100%|██████████| 18/18 [00:10<00:00,  1.75it/s]
Validating Student: 100%|██████████| 5/5 [00:02<00:00,  1.99it/s]


Epoch 22: Train Loss: 0.0617 | Val Loss: 0.0604 | Val R2: 0.6233
No improvement. Patience: 1/15
--- Fold 5, Epoch 23/150 ---


Distilling: 100%|██████████| 18/18 [00:10<00:00,  1.74it/s]
Validating Student: 100%|██████████| 5/5 [00:02<00:00,  1.95it/s]


Epoch 23: Train Loss: 0.0590 | Val Loss: 0.0716 | Val R2: 0.4563
No improvement. Patience: 2/15
--- Fold 5, Epoch 24/150 ---


Distilling: 100%|██████████| 18/18 [00:10<00:00,  1.73it/s]
Validating Student: 100%|██████████| 5/5 [00:02<00:00,  1.97it/s]


Epoch 24: Train Loss: 0.0571 | Val Loss: 0.0750 | Val R2: 0.5822
No improvement. Patience: 3/15
--- Fold 5, Epoch 25/150 ---


Distilling: 100%|██████████| 18/18 [00:10<00:00,  1.75it/s]
Validating Student: 100%|██████████| 5/5 [00:02<00:00,  1.95it/s]


Epoch 25: Train Loss: 0.0561 | Val Loss: 0.0710 | Val R2: 0.6347
No improvement. Patience: 4/15
--- Fold 5, Epoch 26/150 ---


Distilling: 100%|██████████| 18/18 [00:10<00:00,  1.72it/s]
Validating Student: 100%|██████████| 5/5 [00:02<00:00,  1.95it/s]


Epoch 26: Train Loss: 0.0567 | Val Loss: 0.0698 | Val R2: 0.5127
No improvement. Patience: 5/15
--- Fold 5, Epoch 27/150 ---


Distilling: 100%|██████████| 18/18 [00:10<00:00,  1.75it/s]
Validating Student: 100%|██████████| 5/5 [00:02<00:00,  1.95it/s]


Epoch 27: Train Loss: 0.0567 | Val Loss: 0.0680 | Val R2: 0.6630
No improvement. Patience: 6/15
--- Fold 5, Epoch 28/150 ---


Distilling: 100%|██████████| 18/18 [00:10<00:00,  1.73it/s]
Validating Student: 100%|██████████| 5/5 [00:02<00:00,  1.94it/s]


Epoch 28: Train Loss: 0.0562 | Val Loss: 0.0630 | Val R2: 0.6810
New best model for fold 5 saved with R2: 0.6810
--- Fold 5, Epoch 29/150 ---


Distilling: 100%|██████████| 18/18 [00:10<00:00,  1.70it/s]
Validating Student: 100%|██████████| 5/5 [00:02<00:00,  1.92it/s]


Epoch 29: Train Loss: 0.0649 | Val Loss: 0.0696 | Val R2: 0.5320
No improvement. Patience: 1/15
--- Fold 5, Epoch 30/150 ---


Distilling: 100%|██████████| 18/18 [00:10<00:00,  1.70it/s]
Validating Student: 100%|██████████| 5/5 [00:02<00:00,  1.93it/s]


Epoch 30: Train Loss: 0.0518 | Val Loss: 0.0657 | Val R2: 0.6367
No improvement. Patience: 2/15
--- Fold 5, Epoch 31/150 ---


Distilling: 100%|██████████| 18/18 [00:10<00:00,  1.70it/s]
Validating Student: 100%|██████████| 5/5 [00:02<00:00,  1.98it/s]


Epoch 31: Train Loss: 0.0546 | Val Loss: 0.0733 | Val R2: 0.5379
No improvement. Patience: 3/15
--- Fold 5, Epoch 32/150 ---


Distilling: 100%|██████████| 18/18 [00:10<00:00,  1.72it/s]
Validating Student: 100%|██████████| 5/5 [00:02<00:00,  1.94it/s]


Epoch 32: Train Loss: 0.0600 | Val Loss: 0.0791 | Val R2: 0.5932
No improvement. Patience: 4/15
--- Fold 5, Epoch 33/150 ---


Distilling: 100%|██████████| 18/18 [00:10<00:00,  1.70it/s]
Validating Student: 100%|██████████| 5/5 [00:02<00:00,  1.95it/s]


Epoch 33: Train Loss: 0.0559 | Val Loss: 0.0727 | Val R2: 0.6444
No improvement. Patience: 5/15
--- Fold 5, Epoch 34/150 ---


Distilling: 100%|██████████| 18/18 [00:10<00:00,  1.72it/s]
Validating Student: 100%|██████████| 5/5 [00:02<00:00,  1.96it/s]


Epoch 34: Train Loss: 0.0520 | Val Loss: 0.0647 | Val R2: 0.6341
No improvement. Patience: 6/15
--- Fold 5, Epoch 35/150 ---


Distilling: 100%|██████████| 18/18 [00:10<00:00,  1.73it/s]
Validating Student: 100%|██████████| 5/5 [00:02<00:00,  1.92it/s]


Epoch 35: Train Loss: 0.0504 | Val Loss: 0.0583 | Val R2: 0.6571
No improvement. Patience: 7/15
--- Fold 5, Epoch 36/150 ---


Distilling: 100%|██████████| 18/18 [00:10<00:00,  1.68it/s]
Validating Student: 100%|██████████| 5/5 [00:02<00:00,  1.87it/s]


Epoch 36: Train Loss: 0.0515 | Val Loss: 0.0786 | Val R2: 0.5599
No improvement. Patience: 8/15
--- Fold 5, Epoch 37/150 ---


Distilling: 100%|██████████| 18/18 [00:10<00:00,  1.72it/s]
Validating Student: 100%|██████████| 5/5 [00:02<00:00,  1.96it/s]


Epoch 37: Train Loss: 0.0545 | Val Loss: 0.0670 | Val R2: 0.6007
No improvement. Patience: 9/15
--- Fold 5, Epoch 38/150 ---


Distilling: 100%|██████████| 18/18 [00:10<00:00,  1.72it/s]
Validating Student: 100%|██████████| 5/5 [00:02<00:00,  1.95it/s]


Epoch 38: Train Loss: 0.0577 | Val Loss: 0.0650 | Val R2: 0.6279
No improvement. Patience: 10/15
--- Fold 5, Epoch 39/150 ---


Distilling: 100%|██████████| 18/18 [00:10<00:00,  1.71it/s]
Validating Student: 100%|██████████| 5/5 [00:02<00:00,  1.94it/s]


Epoch 39: Train Loss: 0.0563 | Val Loss: 0.0704 | Val R2: 0.5895
No improvement. Patience: 11/15
--- Fold 5, Epoch 40/150 ---


Distilling: 100%|██████████| 18/18 [00:10<00:00,  1.71it/s]
Validating Student: 100%|██████████| 5/5 [00:02<00:00,  1.83it/s]


Epoch 40: Train Loss: 0.0440 | Val Loss: 0.0747 | Val R2: 0.5951
No improvement. Patience: 12/15
--- Fold 5, Epoch 41/150 ---


Distilling: 100%|██████████| 18/18 [00:10<00:00,  1.70it/s]
Validating Student: 100%|██████████| 5/5 [00:02<00:00,  1.95it/s]


Epoch 41: Train Loss: 0.0462 | Val Loss: 0.0714 | Val R2: 0.5757
No improvement. Patience: 13/15
--- Fold 5, Epoch 42/150 ---


Distilling: 100%|██████████| 18/18 [00:10<00:00,  1.68it/s]
Validating Student: 100%|██████████| 5/5 [00:02<00:00,  1.93it/s]


Epoch 42: Train Loss: 0.0498 | Val Loss: 0.0719 | Val R2: 0.5323
No improvement. Patience: 14/15
--- Fold 5, Epoch 43/150 ---


Distilling: 100%|██████████| 18/18 [00:10<00:00,  1.70it/s]
Validating Student: 100%|██████████| 5/5 [00:02<00:00,  1.96it/s]

Epoch 43: Train Loss: 0.0652 | Val Loss: 0.0881 | Val R2: 0.4755
No improvement. Patience: 15/15
--- Early stopping triggered at epoch 43 ---
Fold 5 complete. Best Validation R2: 0.6810


--- Student K-Fold Cross-Validation Complete ---
R2 scores for each fold: [0.6994709968566895, 0.7902425527572632, 0.7762737274169922, 0.7286093235015869, 0.6809571981430054]
Average R2: 0.7351
Std Dev R2: 0.0424





In [10]:
# --- 5. [新] 最终训练函数 (无 KFold, 无验证) ---
def main_final_training(args):
    # 1. 设置
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    print(f"Using device: {device}")
    
    # 2. 加载数据
    df = pd.read_csv(args.data_csv, index_col='image_path')
    
    # 获取类别数量 (用于初始化 Teacher)
    num_states = df['State_encoded'].nunique()
    num_species = df['Species_encoded'].nunique()
    print(f"Found {num_states} states and {num_species} species.")
    print(f"--- Starting FINAL STUDENT training using ALL {len(df)} samples ---")

    # 3. [关键] 加载训练好的 Teacher Model (加载一次)
    print(f"Loading trained Teacher model from: {args.teacher_model_path}")
    teacher_model = TeacherModel(num_states, num_species).to(device)
    teacher_model.load_state_dict(torch.load(args.teacher_model_path))
    teacher_model.eval() # 永久设置为评估模式
    print("Teacher model loaded successfully.")

    # 4. 图像预处理 (只使用训练增强)
    train_transforms = transforms.Compose([
        transforms.Resize((args.img_size, args.img_size)),
        transforms.RandomHorizontalFlip(p=0.5),
        transforms.RandomVerticalFlip(p=0.5),
        transforms.RandomRotation(90),
        transforms.RandomAffine(degrees=0, translate=(0.15, 0.15), shear=15),
        transforms.ColorJitter(brightness=0.3, contrast=0.3, saturation=0.3, hue=0.1),
        transforms.ToTensor(),
        transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
    ])

    # 5. 创建完整的数据集 (不再有 train/val 拆分)
    final_train_dataset = PastureDataset(df, args.img_dir, train_transforms, args.img_size)
    
    # 适当增大 Batch Size (例如 16 -> 24)
    final_batch_size = int(args.batch_size * 1.5)
    print(f"Using final batch size: {final_batch_size}")
    
    final_train_loader = DataLoader(
        final_train_dataset, 
        batch_size=final_batch_size, 
        shuffle=True, 
        num_workers=args.num_workers
    )

    # 6. 初始化 Student 模型
    student_model = StudentModel().to(device) # (确保这是 V3 架构)
    
    # 7. 初始化损失函数
    criterion_train = StudentLoss(alpha=args.alpha) # (只剩训练损失)

    # 8. 设置差分学习率 (使用 V3 架构的模块名)
    head_param_names = [
        'patch_projector',
        'query_tokens',
        'transformer_decoder',
        'prediction_head'
    ]
    head_params = []
    backbone_params = []

    for name, param in student_model.named_parameters():
        if not param.requires_grad:
            continue
        is_head = any(name.startswith(head_name) for head_name in head_param_names)
        if is_head:
            head_params.append(param)
        else:
            backbone_params.append(param)
            
    param_groups = [
        {'params': backbone_params, 'lr': args.lr}, # e.g., 1e-4
        {'params': head_params, 'lr': args.lr * 10} # e.g., 1e-3
    ]

    optimizer = optim.AdamW(param_groups, lr=args.lr, weight_decay=1e-3)
    
    # 9. [关键] 设置 Warmup + Cosine 调度器
    from torch.optim.lr_scheduler import CosineAnnealingLR, LinearLR, SequentialLR
    
    # 1. 预热调度器：在 warmup_epochs 轮内从 0.1*LR 升到 LR
    scheduler_warmup = LinearLR(optimizer, start_factor=0.1, total_iters=args.warmup_epochs)
    
    # 2. 余弦调度器：在 (total - warmup) 轮内从 LR 降到 0
    cosine_epochs = args.final_epochs - args.warmup_epochs
    scheduler_cosine = CosineAnnealingLR(optimizer, T_max=cosine_epochs, eta_min=1e-7)

    # 3. 组合它们
    scheduler = SequentialLR(optimizer, schedulers=[scheduler_warmup, scheduler_cosine], milestones=[args.warmup_epochs])
    
    print(f"Training for {args.final_epochs} epochs ({args.warmup_epochs} warmup + {cosine_epochs} cosine)...")
    
    # 10. 最终训练循环
    for epoch in range(args.final_epochs):
        print(f"--- Final Epoch {epoch+1}/{args.final_epochs} ---")
        
        # [注意] 我们只调用 train_one_epoch_student
        train_loss = train_one_epoch_student(
            student_model, teacher_model, final_train_loader, criterion_train, optimizer, device
        )
        
        # 更新学习率
        scheduler.step()
        
        current_lr_head = optimizer.param_groups[1]['lr']
        print(f"Epoch {epoch+1}: Train Loss: {train_loss:.4f} | Head LR: {current_lr_head:.6f}")

    # 11. 保存最终模型
    save_path = os.path.join(args.output_dir, "FINAL_student_model.pth")
    torch.save(student_model.state_dict(), save_path)
    print(f"\n--- Final Student Training Complete ---")
    print(f"Final StudentModel saved to: {save_path}")

In [11]:
# --- 5. [已修改] 运行器 (Student 版) ---
project_root = 'CSIRO---Image2Biomass-Prediction'
if project_root not in sys.path:
    sys.path.append(project_root)

from KnowledgeDistillation.teacher_model import TeacherModel
from KnowledgeDistillation.student_model import StudentModel
from KnowledgeDistillation.loss import WeightedMSELoss, StudentLoss

if __name__ == "__main__":
    parser = argparse.ArgumentParser(description="Train Student Model via Distillation") 

    # --- 路径 (保持不变) ---
    parser.add_argument('--data_csv', type=str,
                        default=os.path.join(project_root, 'csiro-biomass/preprocessing_output/train_processed.csv'))
    parser.add_argument('--img_dir', type=str,
                        default=os.path.join(project_root, 'csiro-biomass/train'))
    output_path = os.path.join(project_root, 'KnowledgeDistillation/student_model_output')
    parser.add_argument('--output_dir', type=str,
                        default=output_path)
    parser.add_argument('--teacher_model_path', type=str,
                        default=os.path.join(project_root, 'KnowledgeDistillation/teacher_model_output/FINAL_teacher_model.pth'))

    # --- 训练超参数 ---
    parser.add_argument('--img_size', type=int, default=260)
    parser.add_argument('--lr', type=float, default=1e-4, 
                        help='Base learning rate (Backbone)')
    parser.add_argument('--batch_size', type=int, default=16,
                        help='Base batch size (will be increased by 1.5x for final training)')
    parser.add_argument('--num_workers', type=int, default=2)
    
    # --- [新] 最终训练轮数 (不再使用 early stopping) ---
    parser.add_argument('--final_epochs', type=int, default=47,
                        help='Total epochs for final training')
    parser.add_argument('--warmup_epochs', type=int, default=5,
                        help='Epochs for LR warmup')

    # --- [新] 蒸馏超参数 (使用您认为最佳的 alpha) ---
    parser.add_argument('--alpha', type=float, default=0.6, 
                        help='Weight for Hard Loss (Student vs Labels). Soft Loss = (1-alpha)')
    
    # --- [删除] K-Fold 和早停参数 ---
    # parser.add_argument('--epochs', type=int, default=150) # (被 final_epochs 替代)
    # parser.add_argument('--early_stopping_patience', type=int, default=15) # (已移除)
    
    # ------------------------
    args = parser.parse_args(args=[])
    os.makedirs(args.output_dir, exist_ok=True)
    print(f"Student models will be saved to: {args.output_dir}")
    print(f"Reading data from: {args.data_csv}")

    # [切换]
    # main(args) # <-- 注释掉 K-Fold
    main_final_training(args) # <-- 调用新的最终训练函数

Student models will be saved to: CSIRO---Image2Biomass-Prediction/KnowledgeDistillation/student_model_output
Reading data from: CSIRO---Image2Biomass-Prediction/csiro-biomass/preprocessing_output/train_processed.csv
Using device: cuda
Found 4 states and 15 species.
--- Starting FINAL STUDENT training using ALL 357 samples ---
Loading trained Teacher model from: CSIRO---Image2Biomass-Prediction/KnowledgeDistillation/teacher_model_output/FINAL_teacher_model.pth
Teacher model loaded successfully.
Using final batch size: 24
Training for 47 epochs (5 warmup + 42 cosine)...
--- Final Epoch 1/47 ---


Distilling: 100%|██████████| 15/15 [00:13<00:00,  1.11it/s]


Epoch 1: Train Loss: 0.7699 | Head LR: 0.000280
--- Final Epoch 2/47 ---


Distilling: 100%|██████████| 15/15 [00:13<00:00,  1.10it/s]


Epoch 2: Train Loss: 0.1758 | Head LR: 0.000460
--- Final Epoch 3/47 ---


Distilling: 100%|██████████| 15/15 [00:13<00:00,  1.10it/s]


Epoch 3: Train Loss: 0.1502 | Head LR: 0.000640
--- Final Epoch 4/47 ---


Distilling: 100%|██████████| 15/15 [00:13<00:00,  1.12it/s]


Epoch 4: Train Loss: 0.1545 | Head LR: 0.000820
--- Final Epoch 5/47 ---


Distilling: 100%|██████████| 15/15 [00:13<00:00,  1.12it/s]


Epoch 5: Train Loss: 0.1216 | Head LR: 0.001000
--- Final Epoch 6/47 ---


Distilling: 100%|██████████| 15/15 [00:13<00:00,  1.12it/s]


Epoch 6: Train Loss: 0.1178 | Head LR: 0.000999
--- Final Epoch 7/47 ---


Distilling: 100%|██████████| 15/15 [00:13<00:00,  1.11it/s]


Epoch 7: Train Loss: 0.1081 | Head LR: 0.000994
--- Final Epoch 8/47 ---


Distilling: 100%|██████████| 15/15 [00:13<00:00,  1.12it/s]


Epoch 8: Train Loss: 0.0920 | Head LR: 0.000987
--- Final Epoch 9/47 ---


Distilling: 100%|██████████| 15/15 [00:13<00:00,  1.13it/s]


Epoch 9: Train Loss: 0.1029 | Head LR: 0.000978
--- Final Epoch 10/47 ---


Distilling: 100%|██████████| 15/15 [00:13<00:00,  1.11it/s]


Epoch 10: Train Loss: 0.1306 | Head LR: 0.000965
--- Final Epoch 11/47 ---


Distilling: 100%|██████████| 15/15 [00:13<00:00,  1.11it/s]


Epoch 11: Train Loss: 0.0972 | Head LR: 0.000950
--- Final Epoch 12/47 ---


Distilling: 100%|██████████| 15/15 [00:13<00:00,  1.10it/s]


Epoch 12: Train Loss: 0.0776 | Head LR: 0.000933
--- Final Epoch 13/47 ---


Distilling: 100%|██████████| 15/15 [00:13<00:00,  1.12it/s]


Epoch 13: Train Loss: 0.0985 | Head LR: 0.000913
--- Final Epoch 14/47 ---


Distilling: 100%|██████████| 15/15 [00:13<00:00,  1.13it/s]


Epoch 14: Train Loss: 0.0851 | Head LR: 0.000891
--- Final Epoch 15/47 ---


Distilling: 100%|██████████| 15/15 [00:13<00:00,  1.11it/s]


Epoch 15: Train Loss: 0.0945 | Head LR: 0.000867
--- Final Epoch 16/47 ---


Distilling: 100%|██████████| 15/15 [00:13<00:00,  1.12it/s]


Epoch 16: Train Loss: 0.0713 | Head LR: 0.000840
--- Final Epoch 17/47 ---


Distilling: 100%|██████████| 15/15 [00:13<00:00,  1.11it/s]


Epoch 17: Train Loss: 0.0634 | Head LR: 0.000812
--- Final Epoch 18/47 ---


Distilling: 100%|██████████| 15/15 [00:13<00:00,  1.12it/s]


Epoch 18: Train Loss: 0.0880 | Head LR: 0.000782
--- Final Epoch 19/47 ---


Distilling: 100%|██████████| 15/15 [00:13<00:00,  1.12it/s]


Epoch 19: Train Loss: 0.0723 | Head LR: 0.000750
--- Final Epoch 20/47 ---


Distilling: 100%|██████████| 15/15 [00:13<00:00,  1.11it/s]


Epoch 20: Train Loss: 0.0588 | Head LR: 0.000717
--- Final Epoch 21/47 ---


Distilling: 100%|██████████| 15/15 [00:13<00:00,  1.13it/s]


Epoch 21: Train Loss: 0.0600 | Head LR: 0.000683
--- Final Epoch 22/47 ---


Distilling: 100%|██████████| 15/15 [00:13<00:00,  1.12it/s]


Epoch 22: Train Loss: 0.0775 | Head LR: 0.000647
--- Final Epoch 23/47 ---


Distilling: 100%|██████████| 15/15 [00:12<00:00,  1.16it/s]


Epoch 23: Train Loss: 0.0542 | Head LR: 0.000611
--- Final Epoch 24/47 ---


Distilling: 100%|██████████| 15/15 [00:13<00:00,  1.15it/s]


Epoch 24: Train Loss: 0.0618 | Head LR: 0.000575
--- Final Epoch 25/47 ---


Distilling: 100%|██████████| 15/15 [00:13<00:00,  1.14it/s]


Epoch 25: Train Loss: 0.0679 | Head LR: 0.000537
--- Final Epoch 26/47 ---


Distilling: 100%|██████████| 15/15 [00:13<00:00,  1.15it/s]


Epoch 26: Train Loss: 0.0503 | Head LR: 0.000500
--- Final Epoch 27/47 ---


Distilling: 100%|██████████| 15/15 [00:13<00:00,  1.15it/s]


Epoch 27: Train Loss: 0.0494 | Head LR: 0.000463
--- Final Epoch 28/47 ---


Distilling: 100%|██████████| 15/15 [00:12<00:00,  1.16it/s]


Epoch 28: Train Loss: 0.0516 | Head LR: 0.000426
--- Final Epoch 29/47 ---


Distilling: 100%|██████████| 15/15 [00:12<00:00,  1.16it/s]


Epoch 29: Train Loss: 0.0493 | Head LR: 0.000389
--- Final Epoch 30/47 ---


Distilling: 100%|██████████| 15/15 [00:13<00:00,  1.14it/s]


Epoch 30: Train Loss: 0.0727 | Head LR: 0.000353
--- Final Epoch 31/47 ---


Distilling: 100%|██████████| 15/15 [00:13<00:00,  1.15it/s]


Epoch 31: Train Loss: 0.0584 | Head LR: 0.000317
--- Final Epoch 32/47 ---


Distilling: 100%|██████████| 15/15 [00:13<00:00,  1.14it/s]


Epoch 32: Train Loss: 0.0504 | Head LR: 0.000283
--- Final Epoch 33/47 ---


Distilling: 100%|██████████| 15/15 [00:13<00:00,  1.13it/s]


Epoch 33: Train Loss: 0.0505 | Head LR: 0.000250
--- Final Epoch 34/47 ---


Distilling: 100%|██████████| 15/15 [00:13<00:00,  1.15it/s]


Epoch 34: Train Loss: 0.0474 | Head LR: 0.000218
--- Final Epoch 35/47 ---


Distilling: 100%|██████████| 15/15 [00:12<00:00,  1.16it/s]


Epoch 35: Train Loss: 0.0473 | Head LR: 0.000188
--- Final Epoch 36/47 ---


Distilling: 100%|██████████| 15/15 [00:13<00:00,  1.14it/s]


Epoch 36: Train Loss: 0.0400 | Head LR: 0.000160
--- Final Epoch 37/47 ---


Distilling: 100%|██████████| 15/15 [00:12<00:00,  1.17it/s]


Epoch 37: Train Loss: 0.0402 | Head LR: 0.000134
--- Final Epoch 38/47 ---


Distilling: 100%|██████████| 15/15 [00:13<00:00,  1.11it/s]


Epoch 38: Train Loss: 0.0430 | Head LR: 0.000109
--- Final Epoch 39/47 ---


Distilling: 100%|██████████| 15/15 [00:12<00:00,  1.17it/s]


Epoch 39: Train Loss: 0.0393 | Head LR: 0.000087
--- Final Epoch 40/47 ---


Distilling: 100%|██████████| 15/15 [00:13<00:00,  1.15it/s]


Epoch 40: Train Loss: 0.0399 | Head LR: 0.000067
--- Final Epoch 41/47 ---


Distilling: 100%|██████████| 15/15 [00:13<00:00,  1.14it/s]


Epoch 41: Train Loss: 0.0404 | Head LR: 0.000050
--- Final Epoch 42/47 ---


Distilling: 100%|██████████| 15/15 [00:13<00:00,  1.14it/s]


Epoch 42: Train Loss: 0.0486 | Head LR: 0.000035
--- Final Epoch 43/47 ---


Distilling: 100%|██████████| 15/15 [00:13<00:00,  1.13it/s]


Epoch 43: Train Loss: 0.0482 | Head LR: 0.000022
--- Final Epoch 44/47 ---


Distilling: 100%|██████████| 15/15 [00:13<00:00,  1.14it/s]


Epoch 44: Train Loss: 0.0386 | Head LR: 0.000013
--- Final Epoch 45/47 ---


Distilling: 100%|██████████| 15/15 [00:13<00:00,  1.13it/s]


Epoch 45: Train Loss: 0.0371 | Head LR: 0.000006
--- Final Epoch 46/47 ---


Distilling: 100%|██████████| 15/15 [00:12<00:00,  1.16it/s]


Epoch 46: Train Loss: 0.0428 | Head LR: 0.000001
--- Final Epoch 47/47 ---


Distilling: 100%|██████████| 15/15 [00:13<00:00,  1.15it/s]

Epoch 47: Train Loss: 0.0399 | Head LR: 0.000000

--- Final Student Training Complete ---
Final StudentModel saved to: CSIRO---Image2Biomass-Prediction/KnowledgeDistillation/student_model_output/FINAL_student_model.pth



