In [2]:
import os
import csv
import pandas as pd
import numpy as np
import torch
import torch.nn as nn
import torch.nn.functional as F
import torchaudio
from torch.utils.data import Dataset, DataLoader, random_split
from torchvision.transforms import v2 as T
from tqdm.notebook import tqdm
import matplotlib.pyplot as plt
from sklearn.metrics import (accuracy_score, precision_score, recall_score, 
                             f1_score, confusion_matrix)
import seaborn as sns


device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(f"Sử dụng thiết bị: {device}")

class DynamicGraphAttention(nn.Module):
    def __init__(self, in_channels, num_nodes=32, heads=8, dropout=0.1):  # Tăng num_nodes/heads mặc định
        super().__init__()
        self.num_nodes = num_nodes
        self.heads = heads
        self.head_dim = in_channels // heads
        self.dropout = nn.Dropout(dropout)

        self.query = nn.Linear(in_channels, in_channels)
        self.key = nn.Linear(in_channels, in_channels)
        self.value = nn.Linear(in_channels, in_channels)
        self.out = nn.Linear(in_channels, in_channels)
        self.scale = nn.Parameter(torch.tensor(self.head_dim**-0.5))

        # Positional encoding (simple learnable)
        self.pos_enc = nn.Parameter(torch.randn(1, num_nodes, in_channels))

    def forward(self, x):
        B, C, H, W = x.size()
        x_flat = x.flatten(2).transpose(1, 2)  # (B, H*W, C)
        
        num_pixels = H * W
        N = min(self.num_nodes, num_pixels)
        
        # Importance sampling: Chọn top-k pixels dựa trên L2 norm (thay vì random/linspace)
        norms = torch.norm(x_flat, dim=-1)  # (B, H*W)
        _, indices = torch.topk(norms, N, dim=-1)  # Top-k indices
        indices = indices.sort(dim=-1).indices  # Sort để ổn định
        x_nodes = torch.gather(x_flat, 1, indices.unsqueeze(-1).expand(-1, -1, C))  # (B, N, C)
        
        # Thêm positional encoding
        x_nodes = x_nodes + self.pos_enc.expand(B, -1, -1)
        
        q = self.query(x_nodes).view(B, N, self.heads, self.head_dim).transpose(1, 2)
        k = self.key(x_nodes).view(B, N, self.heads, self.head_dim).transpose(1, 2)
        v = self.value(x_nodes).view(B, N, self.heads, self.head_dim).transpose(1, 2)
        
        attn_weights = torch.matmul(q, k.transpose(-2, -1)) * self.scale
        attn_weights = F.softmax(attn_weights, dim=-1)
        attn_weights = self.dropout(attn_weights)  # Thêm dropout
        attn_output = torch.matmul(attn_weights, v)
        attn_output = attn_output.transpose(1, 2).contiguous().view(B, N, C)
        
        out_nodes = self.out(attn_output)
        
        out_flat = torch.zeros_like(x_flat)
        out_flat.scatter_(1, indices.unsqueeze(-1).expand(-1, -1, C), out_nodes)
        out = out_flat.transpose(1, 2).view(B, C, H, W)
        
        return x + out  # Residual connection

class TemporalAttentionAggregator(nn.Module):
    def __init__(self, in_channels, num_heads=4):
        super().__init__()
        self.attention_net = nn.MultiheadAttention(embed_dim=in_channels, num_heads=num_heads)  # Thay MLP bằng MultiheadAttention
        self.norm = nn.LayerNorm(in_channels)

    def forward(self, x):
        if len(x.shape) == 4:  # (B, C, H, W) -> Giả sử single frame, flatten spatial
            B, C, H, W = x.size()
            x_flat = x.flatten(2).transpose(1, 2)  # (B, H*W, C)
        elif len(x.shape) == 5:  # (B, T, C, H, W) -> Flatten temporal-spatial
            B, T, C, H, W = x.size()
            x_flat = x.flatten(3).transpose(2, 3).reshape(B, T * H * W, C)  # (B, T*H*W, C)
        else:
            raise ValueError("Input must be 4D or 5D tensor")
        
        # Temporal-spatial attention
        x_flat = self.norm(x_flat)
        attn_input = x_flat.permute(1, 0, 2)  # (S, B, E)
        attn_output, _ = self.attention_net(attn_input, attn_input, attn_input)
        attn_output = attn_output.permute(1, 0, 2)  # (B, S, E)
        
        # Aggregate
        aggregated_vector = torch.mean(attn_output, dim=1)  # Mean pooling thay sum để ổn định
        return aggregated_vector

class InvertedResidual(nn.Module):
    def __init__(self, in_channels, out_channels, stride, expansion=6):  # Tăng expansion mặc định
        super().__init__()
        hidden_dim = in_channels * expansion
        self.use_residual = in_channels == out_channels and stride == 1
        
        self.conv = nn.Sequential(
            nn.Conv2d(in_channels, hidden_dim, 1, bias=False),
            nn.BatchNorm2d(hidden_dim),
            nn.SiLU(inplace=True),  # Thay ReLU6 bằng SiLU (Swish)
            nn.Conv2d(hidden_dim, hidden_dim, 3, stride, 1, groups=hidden_dim, bias=False),
            nn.BatchNorm2d(hidden_dim),
            nn.SiLU(inplace=True),
            nn.Conv2d(hidden_dim, out_channels, 1, bias=False),
            nn.BatchNorm2d(out_channels),
        )
        
        # Thêm SE block
        self.se = nn.Sequential(
            nn.AdaptiveAvgPool2d(1),
            nn.Conv2d(out_channels, out_channels // 16, 1),
            nn.SiLU(),
            nn.Conv2d(out_channels // 16, out_channels, 1),
            nn.Sigmoid()
        )

    def forward(self, x):
        out = self.conv(x)
        out = out * self.se(out)  # Áp dụng SE
        return x + out if self.use_residual else out

class PestLarvaNet_Massive(nn.Module):
    def __init__(self, num_classes, input_channels=1):
        super().__init__()
        self.config = [[1, 40, 3, 1], [6, 64, 4, 2], [6, 96, 6, 2], [6, 160, 5, 2], [6, 256, 4, 1]]
        self.stem = nn.Sequential(
            nn.Conv2d(input_channels, 40, 3, 2, 1, bias=False),
            nn.BatchNorm2d(40), nn.SiLU(inplace=True))
        
        layers = []
        in_channels = 40
        for t, c, n, s in self.config:
            for i in range(n):
                stride = s if i == 0 else 1
                layers.append(InvertedResidual(in_channels, c, stride, expansion=t))
                in_channels = c
            if c == 64:
                layers.append(DynamicGraphAttention(in_channels, num_nodes=48, heads=8))
        self.body = nn.Sequential(*layers)
        
        self.head_conv = nn.Conv2d(256, 768, 1, bias=False)
        self.aggregator = TemporalAttentionAggregator(768, num_heads=12)
        self.classifier = nn.Sequential(
            nn.Linear(768, 384), nn.SiLU(inplace=True), nn.Dropout(0.5),
            nn.Linear(384, num_classes))
            
    def forward(self, x):
        x = self.stem(x); x = self.body(x); x = self.head_conv(x)
        x = self.aggregator(x); x = self.classifier(x); return x

def count_parameters(model):
    total = sum(p.numel() for p in model.parameters())
    trainable = sum(p.numel() for p in model.parameters() if p.requires_grad)
    return total, trainable

class InsectSpectrogramDataset(Dataset):
    """Dataset tùy chỉnh để đọc các file spectrogram .pt"""
    def __init__(self, metadata_file, data_dir, transform=None):
        self.metadata = pd.read_csv(metadata_file)
        self.data_dir = data_dir
        self.transform = transform
        self.file_column = 'file_name'
        self.label_column = 'label'
        
        self.class_names = sorted(self.metadata[self.label_column].unique())
        self.class_to_id = {name: i for i, name in enumerate(self.class_names)}
        self.id_to_class = {i: name for name, i in self.class_to_id.items()}

    def __len__(self):
        return len(self.metadata)

    def __getitem__(self, idx):
        file_name = self.metadata.iloc[idx][self.file_column]
        file_path = os.path.join(self.data_dir, file_name)
        label_name = self.metadata.iloc[idx][self.label_column]
        label = self.class_to_id[label_name]
        
        try:
            spectrogram = torch.load(file_path)
            if spectrogram.dim() == 2:
                spectrogram = spectrogram.unsqueeze(0)
            if self.transform:
                spectrogram = self.transform(spectrogram)
            return spectrogram, label
        except Exception as e:
            print(f"Lỗi khi tải file {file_path}: {e}")
            return torch.zeros(1, 128, 259), -1 # Trả về mẫu lỗi

METADATA_PATH = r'F:/metadata_Mel.csv' 
DATA_DIR = r'F:\MelSpectrograms'

class TimeShift(torch.nn.Module):
    """ Dịch chuyển spectrogram theo trục thời gian một cách ngẫu nhiên. """
    def __init__(self, p=0.5, max_shift_percent=0.4):
        super().__init__()
        self.p = p
        self.max_shift_percent = max_shift_percent

    def forward(self, spec):
        if torch.rand(1) < self.p:
            time_steps = spec.shape[-1]
            # Chọn một lượng dịch chuyển ngẫu nhiên
            shift_amount = int(torch.rand(1) * self.max_shift_percent * time_steps)
            # Dịch ngẫu nhiên sang trái hoặc phải
            if torch.rand(1) < 0.5:
                shift_amount = -shift_amount
            return torch.roll(spec, shifts=shift_amount, dims=-1)
        return spec

class SpecNoise(torch.nn.Module):
    """ Thêm nhiễu Gaussian trực tiếp vào spectrogram. """
    def __init__(self, p=0.5, noise_level=0.05):
        super().__init__()
        self.p = p
        self.noise_level = noise_level

    def forward(self, spec):
        if torch.rand(1) < self.p:
            noise = torch.randn_like(spec) * self.noise_level
            return spec + noise
        return spec

class MinMaxNormalize(torch.nn.Module):
    def forward(self, spec):
        min_val = spec.min()
        max_val = spec.max()
        if max_val - min_val != 0:
            return (spec - min_val) / (max_val - min_val)
        return spec

train_transform = T.Compose([
    # Kỹ thuật 1: SpecAugment (Time & Frequency Masking)
    torchaudio.transforms.TimeMasking(time_mask_param=80),
    torchaudio.transforms.FrequencyMasking(freq_mask_param=25),
    
    # Kỹ thuật 2: Time Shifting
    TimeShift(p=0.5, max_shift_percent=0.4),
    
    # Kỹ thuật 3: Spectrogram Noise
    SpecNoise(p=0.3, noise_level=0.03),

    # Normalize
    MinMaxNormalize(),
])

# Tập validation không sử dụng augmentation
val_transform = T.Compose([
    MinMaxNormalize(),
])

print("Đã định nghĩa xong pipeline Augmentation cho Spectrogram.")

def mixup_data(x, y, alpha=0.4, use_cuda=True):
    """
    Trộn dữ liệu trong một batch.
    Trả về input đã trộn, và hai bộ nhãn gốc cùng với hệ số trộn lambda.
    """
    if alpha > 0:
        lam = np.random.beta(alpha, alpha)
    else:
        lam = 1

    batch_size = x.size()[0]
    if use_cuda:
        index = torch.randperm(batch_size).cuda()
    else:
        index = torch.randperm(batch_size)

    mixed_x = lam * x + (1 - lam) * x[index, :]
    y_a, y_b = y, y[index]
    return mixed_x, y_a, y_b, lam


def mixup_criterion(criterion, pred, y_a, y_b, lam):
    """
    Hàm tính loss đặc biệt cho Mixup.
    """
    return lam * criterion(pred, y_a) + (1 - lam) * criterion(pred, y_b)

if not os.path.exists(METADATA_PATH) or not os.path.isdir(DATA_DIR):
    print("!!! LỖI: Vui lòng kiểm tra lại đường dẫn METADATA_PATH và DATA_DIR.")
else:
    full_dataset = InsectSpectrogramDataset(METADATA_PATH, DATA_DIR)
    num_classes = len(full_dataset.class_names)
    print(f"Tải dữ liệu thành công: {len(full_dataset)} mẫu, {num_classes} lớp.")

    train_size = int(0.8 * len(full_dataset))
    val_size = len(full_dataset) - train_size
    generator = torch.Generator().manual_seed(42)
    train_dataset, val_dataset = random_split(full_dataset, [train_size, val_size], generator)

    train_dataset.dataset.transform = train_transform
    val_dataset.dataset.transform = val_transform

    BATCH_SIZE = 16
    train_loader = DataLoader(train_dataset, batch_size=BATCH_SIZE, shuffle=True, num_workers=0)
    val_loader = DataLoader(val_dataset, batch_size=BATCH_SIZE, shuffle=False, num_workers=0)

EPOCHS = 100
LEARNING_RATE = 0.0001
WEIGHT_DECAY = 1e-4

CHECKPOINT_DIR = r'F:/checkpoints_massive'
LOG_CSV_PATH = r'F:/training_log_massive.csv'

os.makedirs(CHECKPOINT_DIR, exist_ok=True)

model = PestLarvaNet_Massive(num_classes=num_classes).to(device)
total_params, _ = count_parameters(model)
print(f"Khởi tạo mô hình PestLarvaNet-GMA với {total_params/1e6:.2f}M tham số.")

criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.AdamW(model.parameters(), lr=LEARNING_RATE, weight_decay=WEIGHT_DECAY)
scheduler = torch.optim.lr_scheduler.CosineAnnealingLR(optimizer, T_max=EPOCHS)
log_header = [
    'epoch', 'train_loss', 'val_loss', 'learning_rate', 'accuracy',
    'micro_f1', 'micro_precision', 'micro_recall'
]
with open(LOG_CSV_PATH, 'w', newline='', encoding='utf-8') as f:
    csv.writer(f).writerow(log_header)

print(f"\nBắt đầu huấn luyện... Checkpoints sẽ được lưu tại '{CHECKPOINT_DIR}' và log tại '{LOG_CSV_PATH}'")

# --- Vòng lặp huấn luyện chính ---
best_val_f1 = 0.0

for epoch in range(EPOCHS):
    # ---------------------------
    # ----- TRAINING PHASE ------
    # ---------------------------
    model.train()
    running_loss = 0.0
    progress_bar = tqdm(train_loader, desc=f"Epoch {epoch+1}/{EPOCHS} [Training]")
    
    for inputs, labels in progress_bar:
        if -1 in labels: continue # Bỏ qua các mẫu bị lỗi
        inputs, labels = inputs.to(device), labels.to(device)
        
        # Áp dụng Mixup với xác suất 50%
        if torch.rand(1).item() < 0.5:
            inputs, targets_a, targets_b, lam = mixup_data(inputs, labels, alpha=0.4)
            optimizer.zero_grad()
            outputs = model(inputs)
            loss = mixup_criterion(criterion, outputs, targets_a, targets_b, lam)
        else:
            # Chạy như bình thường nếu không dùng Mixup
            optimizer.zero_grad()
            outputs = model(inputs)
            loss = criterion(outputs, labels)

        loss.backward()
        optimizer.step()
        
        running_loss += loss.item() * inputs.size(0)
        progress_bar.set_postfix(loss=f"{loss.item():.4f}")

    # ---------------------------
    # ---- VALIDATION PHASE -----
    # ---------------------------
    model.eval()
    val_loss = 0.0
    val_preds, val_labels = [], []
    with torch.no_grad():
        for inputs, labels in tqdm(val_loader, desc=f"Epoch {epoch+1}/{EPOCHS} [Validation]"):
            if -1 in labels: continue
            inputs, labels = inputs.to(device), labels.to(device)
            
            outputs = model(inputs)
            loss = criterion(outputs, labels)
            val_loss += loss.item() * inputs.size(0)
            
            _, predicted = torch.max(outputs.data, 1)
            val_preds.extend(predicted.cpu().numpy())
            val_labels.extend(labels.cpu().numpy())
            
    # --- TÍNH TOÁN LOSS VÀ METRICS ---
    epoch_train_loss = running_loss / len(train_dataset)
    epoch_val_loss = val_loss / len(val_dataset)
    
    # Chỉ tính các chỉ số micro
    accuracy = accuracy_score(val_labels, val_preds)
    micro_f1 = f1_score(val_labels, val_preds, average='micro', zero_division=0)
    micro_precision = precision_score(val_labels, val_preds, average='micro', zero_division=0)
    micro_recall = recall_score(val_labels, val_preds, average='micro', zero_division=0)
    
    current_lr = scheduler.get_last_lr()[0]
    scheduler.step()

    # --- IN KẾT QUẢ VÀ LƯU LOG ---
    # In kết quả ra màn hình với micro F1-score
    print(f"\n--- Epoch {epoch+1}/{EPOCHS} Summary ---")
    print(f"  Train Loss: {epoch_train_loss:.4f} | Val Loss: {epoch_val_loss:.4f}")
    print(f"  Accuracy: {accuracy:.4f} | Micro F1: {micro_f1:.4f}")
    print(f"  Learning Rate: {current_lr:.6f}")

    # Cập nhật log_data để khớp với log_header mới
    log_data = [
        epoch + 1, epoch_train_loss, epoch_val_loss, current_lr, accuracy,
        micro_f1, micro_precision, micro_recall
    ]
    with open(LOG_CSV_PATH, 'a', newline='', encoding='utf-8') as f:
        writer = csv.writer(f)
        writer.writerow(log_data)
        
    # --- LƯU CHECKPOINT ---
    # Lưu Checkpoint mỗi epoch
    checkpoint_path = os.path.join(CHECKPOINT_DIR, f'checkpoint_epoch_{epoch+1}.pth')
    torch.save({
        'epoch': epoch + 1,
        'model_state_dict': model.state_dict(),
        'optimizer_state_dict': optimizer.state_dict(),
    }, checkpoint_path)

    # Lưu lại model tốt nhất (dựa trên micro F1-score)
    if micro_f1 > best_val_f1:
        best_val_f1 = micro_f1
        best_model_path = os.path.join(CHECKPOINT_DIR, 'best_model.pth')
        torch.save(model.state_dict(), best_model_path)
        print(f"  => 🎉 New best model saved to '{best_model_path}' (Micro F1: {best_val_f1:.4f})\n")

print("\n✅ Hoàn tất huấn luyện!")

Sử dụng thiết bị: cuda
Đã định nghĩa xong pipeline Augmentation cho Spectrogram.
Tải dữ liệu thành công: 165982 mẫu, 12 lớp.
Khởi tạo mô hình PestLarvaNet-GMA với 8.07M tham số.

Bắt đầu huấn luyện... Checkpoints sẽ được lưu tại 'F:/checkpoints_massive' và log tại 'F:/training_log_massive.csv'


Epoch 1/100 [Training]:   0%|          | 0/8300 [00:00<?, ?it/s]

Epoch 1/100 [Validation]:   0%|          | 0/2075 [00:00<?, ?it/s]


--- Epoch 1/100 Summary ---
  Train Loss: 1.1867 | Val Loss: 0.7176
  Accuracy: 0.7608 | Micro F1: 0.7608
  Learning Rate: 0.000100
  => 🎉 New best model saved to 'F:/checkpoints_massive\best_model.pth' (Micro F1: 0.7608)



Epoch 2/100 [Training]:   0%|          | 0/8300 [00:00<?, ?it/s]

Epoch 2/100 [Validation]:   0%|          | 0/2075 [00:00<?, ?it/s]


--- Epoch 2/100 Summary ---
  Train Loss: 0.8919 | Val Loss: 0.6340
  Accuracy: 0.7804 | Micro F1: 0.7804
  Learning Rate: 0.000100
  => 🎉 New best model saved to 'F:/checkpoints_massive\best_model.pth' (Micro F1: 0.7804)



Epoch 3/100 [Training]:   0%|          | 0/8300 [00:00<?, ?it/s]

Epoch 3/100 [Validation]:   0%|          | 0/2075 [00:00<?, ?it/s]


--- Epoch 3/100 Summary ---
  Train Loss: 0.7686 | Val Loss: 0.5160
  Accuracy: 0.8203 | Micro F1: 0.8203
  Learning Rate: 0.000100
  => 🎉 New best model saved to 'F:/checkpoints_massive\best_model.pth' (Micro F1: 0.8203)



Epoch 4/100 [Training]:   0%|          | 0/8300 [00:00<?, ?it/s]

Epoch 4/100 [Validation]:   0%|          | 0/2075 [00:00<?, ?it/s]


--- Epoch 4/100 Summary ---
  Train Loss: 0.7111 | Val Loss: 0.5175
  Accuracy: 0.8176 | Micro F1: 0.8176
  Learning Rate: 0.000100


Epoch 5/100 [Training]:   0%|          | 0/8300 [00:00<?, ?it/s]

Epoch 5/100 [Validation]:   0%|          | 0/2075 [00:00<?, ?it/s]


--- Epoch 5/100 Summary ---
  Train Loss: 0.6547 | Val Loss: 0.4381
  Accuracy: 0.8558 | Micro F1: 0.8558
  Learning Rate: 0.000100
  => 🎉 New best model saved to 'F:/checkpoints_massive\best_model.pth' (Micro F1: 0.8558)



Epoch 6/100 [Training]:   0%|          | 0/8300 [00:00<?, ?it/s]

Epoch 6/100 [Validation]:   0%|          | 0/2075 [00:00<?, ?it/s]


--- Epoch 6/100 Summary ---
  Train Loss: 0.6212 | Val Loss: 0.3988
  Accuracy: 0.8655 | Micro F1: 0.8655
  Learning Rate: 0.000099
  => 🎉 New best model saved to 'F:/checkpoints_massive\best_model.pth' (Micro F1: 0.8655)



Epoch 7/100 [Training]:   0%|          | 0/8300 [00:00<?, ?it/s]

Epoch 7/100 [Validation]:   0%|          | 0/2075 [00:00<?, ?it/s]


--- Epoch 7/100 Summary ---
  Train Loss: 0.5667 | Val Loss: 0.3814
  Accuracy: 0.8702 | Micro F1: 0.8702
  Learning Rate: 0.000099
  => 🎉 New best model saved to 'F:/checkpoints_massive\best_model.pth' (Micro F1: 0.8702)



Epoch 8/100 [Training]:   0%|          | 0/8300 [00:00<?, ?it/s]

Epoch 8/100 [Validation]:   0%|          | 0/2075 [00:00<?, ?it/s]


--- Epoch 8/100 Summary ---
  Train Loss: 0.5458 | Val Loss: 0.4075
  Accuracy: 0.8689 | Micro F1: 0.8689
  Learning Rate: 0.000099


Epoch 9/100 [Training]:   0%|          | 0/8300 [00:00<?, ?it/s]

Epoch 9/100 [Validation]:   0%|          | 0/2075 [00:00<?, ?it/s]


--- Epoch 9/100 Summary ---
  Train Loss: 0.5131 | Val Loss: 0.3820
  Accuracy: 0.8766 | Micro F1: 0.8766
  Learning Rate: 0.000098
  => 🎉 New best model saved to 'F:/checkpoints_massive\best_model.pth' (Micro F1: 0.8766)



Epoch 10/100 [Training]:   0%|          | 0/8300 [00:00<?, ?it/s]

Epoch 10/100 [Validation]:   0%|          | 0/2075 [00:00<?, ?it/s]


--- Epoch 10/100 Summary ---
  Train Loss: 0.4967 | Val Loss: 0.4119
  Accuracy: 0.8697 | Micro F1: 0.8697
  Learning Rate: 0.000098


Epoch 11/100 [Training]:   0%|          | 0/8300 [00:00<?, ?it/s]

Epoch 11/100 [Validation]:   0%|          | 0/2075 [00:00<?, ?it/s]


--- Epoch 11/100 Summary ---
  Train Loss: 0.4683 | Val Loss: 0.4007
  Accuracy: 0.8716 | Micro F1: 0.8716
  Learning Rate: 0.000098


Epoch 12/100 [Training]:   0%|          | 0/8300 [00:00<?, ?it/s]

Epoch 12/100 [Validation]:   0%|          | 0/2075 [00:00<?, ?it/s]


--- Epoch 12/100 Summary ---
  Train Loss: 0.4420 | Val Loss: 0.4030
  Accuracy: 0.8738 | Micro F1: 0.8738
  Learning Rate: 0.000097


Epoch 13/100 [Training]:   0%|          | 0/8300 [00:00<?, ?it/s]

Epoch 13/100 [Validation]:   0%|          | 0/2075 [00:00<?, ?it/s]


--- Epoch 13/100 Summary ---
  Train Loss: 0.4329 | Val Loss: 0.4117
  Accuracy: 0.8711 | Micro F1: 0.8711
  Learning Rate: 0.000096


Epoch 14/100 [Training]:   0%|          | 0/8300 [00:00<?, ?it/s]

Epoch 14/100 [Validation]:   0%|          | 0/2075 [00:00<?, ?it/s]


--- Epoch 14/100 Summary ---
  Train Loss: 0.4222 | Val Loss: 0.4187
  Accuracy: 0.8739 | Micro F1: 0.8739
  Learning Rate: 0.000096


Epoch 15/100 [Training]:   0%|          | 0/8300 [00:00<?, ?it/s]

Epoch 15/100 [Validation]:   0%|          | 0/2075 [00:00<?, ?it/s]


--- Epoch 15/100 Summary ---
  Train Loss: 0.4048 | Val Loss: 0.4259
  Accuracy: 0.8685 | Micro F1: 0.8685
  Learning Rate: 0.000095


Epoch 16/100 [Training]:   0%|          | 0/8300 [00:00<?, ?it/s]

Epoch 16/100 [Validation]:   0%|          | 0/2075 [00:00<?, ?it/s]


--- Epoch 16/100 Summary ---
  Train Loss: 0.4020 | Val Loss: 0.4465
  Accuracy: 0.8756 | Micro F1: 0.8756
  Learning Rate: 0.000095


Epoch 17/100 [Training]:   0%|          | 0/8300 [00:00<?, ?it/s]

Epoch 17/100 [Validation]:   0%|          | 0/2075 [00:00<?, ?it/s]


--- Epoch 17/100 Summary ---
  Train Loss: 0.3969 | Val Loss: 0.4357
  Accuracy: 0.8741 | Micro F1: 0.8741
  Learning Rate: 0.000094


Epoch 18/100 [Training]:   0%|          | 0/8300 [00:00<?, ?it/s]

Epoch 18/100 [Validation]:   0%|          | 0/2075 [00:00<?, ?it/s]


--- Epoch 18/100 Summary ---
  Train Loss: 0.3782 | Val Loss: 0.4493
  Accuracy: 0.8747 | Micro F1: 0.8747
  Learning Rate: 0.000093


Epoch 19/100 [Training]:   0%|          | 0/8300 [00:00<?, ?it/s]

Epoch 19/100 [Validation]:   0%|          | 0/2075 [00:00<?, ?it/s]


--- Epoch 19/100 Summary ---
  Train Loss: 0.3714 | Val Loss: 0.4643
  Accuracy: 0.8789 | Micro F1: 0.8789
  Learning Rate: 0.000092
  => 🎉 New best model saved to 'F:/checkpoints_massive\best_model.pth' (Micro F1: 0.8789)



Epoch 20/100 [Training]:   0%|          | 0/8300 [00:00<?, ?it/s]

Epoch 20/100 [Validation]:   0%|          | 0/2075 [00:00<?, ?it/s]


--- Epoch 20/100 Summary ---
  Train Loss: 0.3654 | Val Loss: 0.5155
  Accuracy: 0.8747 | Micro F1: 0.8747
  Learning Rate: 0.000091


Epoch 21/100 [Training]:   0%|          | 0/8300 [00:00<?, ?it/s]

Epoch 21/100 [Validation]:   0%|          | 0/2075 [00:00<?, ?it/s]


--- Epoch 21/100 Summary ---
  Train Loss: 0.3572 | Val Loss: 0.4613
  Accuracy: 0.8743 | Micro F1: 0.8743
  Learning Rate: 0.000090


Epoch 22/100 [Training]:   0%|          | 0/8300 [00:00<?, ?it/s]

Epoch 22/100 [Validation]:   0%|          | 0/2075 [00:00<?, ?it/s]


--- Epoch 22/100 Summary ---
  Train Loss: 0.3607 | Val Loss: 0.4906
  Accuracy: 0.8776 | Micro F1: 0.8776
  Learning Rate: 0.000090


Epoch 23/100 [Training]:   0%|          | 0/8300 [00:00<?, ?it/s]

Epoch 23/100 [Validation]:   0%|          | 0/2075 [00:00<?, ?it/s]


--- Epoch 23/100 Summary ---
  Train Loss: 0.3578 | Val Loss: 0.5000
  Accuracy: 0.8715 | Micro F1: 0.8715
  Learning Rate: 0.000089


Epoch 24/100 [Training]:   0%|          | 0/8300 [00:00<?, ?it/s]

Epoch 24/100 [Validation]:   0%|          | 0/2075 [00:00<?, ?it/s]


--- Epoch 24/100 Summary ---
  Train Loss: 0.3463 | Val Loss: 0.4576
  Accuracy: 0.8786 | Micro F1: 0.8786
  Learning Rate: 0.000088


Epoch 25/100 [Training]:   0%|          | 0/8300 [00:00<?, ?it/s]

Epoch 25/100 [Validation]:   0%|          | 0/2075 [00:00<?, ?it/s]


--- Epoch 25/100 Summary ---
  Train Loss: 0.3470 | Val Loss: 0.4366
  Accuracy: 0.8822 | Micro F1: 0.8822
  Learning Rate: 0.000086
  => 🎉 New best model saved to 'F:/checkpoints_massive\best_model.pth' (Micro F1: 0.8822)



Epoch 26/100 [Training]:   0%|          | 0/8300 [00:00<?, ?it/s]

Epoch 26/100 [Validation]:   0%|          | 0/2075 [00:00<?, ?it/s]


--- Epoch 26/100 Summary ---
  Train Loss: 0.3417 | Val Loss: 0.4767
  Accuracy: 0.8836 | Micro F1: 0.8836
  Learning Rate: 0.000085
  => 🎉 New best model saved to 'F:/checkpoints_massive\best_model.pth' (Micro F1: 0.8836)



Epoch 27/100 [Training]:   0%|          | 0/8300 [00:00<?, ?it/s]

Epoch 27/100 [Validation]:   0%|          | 0/2075 [00:00<?, ?it/s]


--- Epoch 27/100 Summary ---
  Train Loss: 0.3366 | Val Loss: 0.4346
  Accuracy: 0.8865 | Micro F1: 0.8865
  Learning Rate: 0.000084
  => 🎉 New best model saved to 'F:/checkpoints_massive\best_model.pth' (Micro F1: 0.8865)



Epoch 28/100 [Training]:   0%|          | 0/8300 [00:00<?, ?it/s]

Epoch 28/100 [Validation]:   0%|          | 0/2075 [00:00<?, ?it/s]


--- Epoch 28/100 Summary ---
  Train Loss: 0.3380 | Val Loss: 0.4546
  Accuracy: 0.8747 | Micro F1: 0.8747
  Learning Rate: 0.000083


Epoch 29/100 [Training]:   0%|          | 0/8300 [00:00<?, ?it/s]

Epoch 29/100 [Validation]:   0%|          | 0/2075 [00:00<?, ?it/s]


--- Epoch 29/100 Summary ---
  Train Loss: 0.3328 | Val Loss: 0.4600
  Accuracy: 0.8838 | Micro F1: 0.8838
  Learning Rate: 0.000082


Epoch 30/100 [Training]:   0%|          | 0/8300 [00:00<?, ?it/s]

Epoch 30/100 [Validation]:   0%|          | 0/2075 [00:00<?, ?it/s]


--- Epoch 30/100 Summary ---
  Train Loss: 0.3299 | Val Loss: 0.4512
  Accuracy: 0.8825 | Micro F1: 0.8825
  Learning Rate: 0.000081


Epoch 31/100 [Training]:   0%|          | 0/8300 [00:00<?, ?it/s]

Epoch 31/100 [Validation]:   0%|          | 0/2075 [00:00<?, ?it/s]


--- Epoch 31/100 Summary ---
  Train Loss: 0.3191 | Val Loss: 0.4715
  Accuracy: 0.8840 | Micro F1: 0.8840
  Learning Rate: 0.000079


Epoch 32/100 [Training]:   0%|          | 0/8300 [00:00<?, ?it/s]

Epoch 32/100 [Validation]:   0%|          | 0/2075 [00:00<?, ?it/s]


--- Epoch 32/100 Summary ---
  Train Loss: 0.3251 | Val Loss: 0.4862
  Accuracy: 0.8829 | Micro F1: 0.8829
  Learning Rate: 0.000078


Epoch 33/100 [Training]:   0%|          | 0/8300 [00:00<?, ?it/s]

Epoch 33/100 [Validation]:   0%|          | 0/2075 [00:00<?, ?it/s]


--- Epoch 33/100 Summary ---
  Train Loss: 0.3147 | Val Loss: 0.4794
  Accuracy: 0.8864 | Micro F1: 0.8864
  Learning Rate: 0.000077


Epoch 34/100 [Training]:   0%|          | 0/8300 [00:00<?, ?it/s]

Epoch 34/100 [Validation]:   0%|          | 0/2075 [00:00<?, ?it/s]


--- Epoch 34/100 Summary ---
  Train Loss: 0.3155 | Val Loss: 0.4579
  Accuracy: 0.8855 | Micro F1: 0.8855
  Learning Rate: 0.000075


Epoch 35/100 [Training]:   0%|          | 0/8300 [00:00<?, ?it/s]

Epoch 35/100 [Validation]:   0%|          | 0/2075 [00:00<?, ?it/s]


--- Epoch 35/100 Summary ---
  Train Loss: 0.3121 | Val Loss: 0.5355
  Accuracy: 0.8875 | Micro F1: 0.8875
  Learning Rate: 0.000074
  => 🎉 New best model saved to 'F:/checkpoints_massive\best_model.pth' (Micro F1: 0.8875)



Epoch 36/100 [Training]:   0%|          | 0/8300 [00:00<?, ?it/s]

Epoch 36/100 [Validation]:   0%|          | 0/2075 [00:00<?, ?it/s]


--- Epoch 36/100 Summary ---
  Train Loss: 0.3119 | Val Loss: 0.4515
  Accuracy: 0.8880 | Micro F1: 0.8880
  Learning Rate: 0.000073
  => 🎉 New best model saved to 'F:/checkpoints_massive\best_model.pth' (Micro F1: 0.8880)



Epoch 37/100 [Training]:   0%|          | 0/8300 [00:00<?, ?it/s]

Epoch 37/100 [Validation]:   0%|          | 0/2075 [00:00<?, ?it/s]


--- Epoch 37/100 Summary ---
  Train Loss: 0.3063 | Val Loss: 0.4864
  Accuracy: 0.8833 | Micro F1: 0.8833
  Learning Rate: 0.000071


Epoch 38/100 [Training]:   0%|          | 0/8300 [00:00<?, ?it/s]

Epoch 38/100 [Validation]:   0%|          | 0/2075 [00:00<?, ?it/s]


--- Epoch 38/100 Summary ---
  Train Loss: 0.3133 | Val Loss: 0.4806
  Accuracy: 0.8873 | Micro F1: 0.8873
  Learning Rate: 0.000070


Epoch 39/100 [Training]:   0%|          | 0/8300 [00:00<?, ?it/s]

Epoch 39/100 [Validation]:   0%|          | 0/2075 [00:00<?, ?it/s]


--- Epoch 39/100 Summary ---
  Train Loss: 0.3077 | Val Loss: 0.4280
  Accuracy: 0.8869 | Micro F1: 0.8869
  Learning Rate: 0.000068


Epoch 40/100 [Training]:   0%|          | 0/8300 [00:00<?, ?it/s]

Epoch 40/100 [Validation]:   0%|          | 0/2075 [00:00<?, ?it/s]


--- Epoch 40/100 Summary ---
  Train Loss: 0.3049 | Val Loss: 0.4582
  Accuracy: 0.8888 | Micro F1: 0.8888
  Learning Rate: 0.000067
  => 🎉 New best model saved to 'F:/checkpoints_massive\best_model.pth' (Micro F1: 0.8888)



Epoch 41/100 [Training]:   0%|          | 0/8300 [00:00<?, ?it/s]

Epoch 41/100 [Validation]:   0%|          | 0/2075 [00:00<?, ?it/s]


--- Epoch 41/100 Summary ---
  Train Loss: 0.2951 | Val Loss: 0.4860
  Accuracy: 0.8867 | Micro F1: 0.8867
  Learning Rate: 0.000065


Epoch 42/100 [Training]:   0%|          | 0/8300 [00:00<?, ?it/s]

Epoch 42/100 [Validation]:   0%|          | 0/2075 [00:00<?, ?it/s]


--- Epoch 42/100 Summary ---
  Train Loss: 0.2941 | Val Loss: 0.5042
  Accuracy: 0.8798 | Micro F1: 0.8798
  Learning Rate: 0.000064


Epoch 43/100 [Training]:   0%|          | 0/8300 [00:00<?, ?it/s]

Epoch 43/100 [Validation]:   0%|          | 0/2075 [00:00<?, ?it/s]


--- Epoch 43/100 Summary ---
  Train Loss: 0.2958 | Val Loss: 0.5065
  Accuracy: 0.8860 | Micro F1: 0.8860
  Learning Rate: 0.000062


Epoch 44/100 [Training]:   0%|          | 0/8300 [00:00<?, ?it/s]

Epoch 44/100 [Validation]:   0%|          | 0/2075 [00:00<?, ?it/s]


--- Epoch 44/100 Summary ---
  Train Loss: 0.2920 | Val Loss: 0.4686
  Accuracy: 0.8905 | Micro F1: 0.8905
  Learning Rate: 0.000061
  => 🎉 New best model saved to 'F:/checkpoints_massive\best_model.pth' (Micro F1: 0.8905)



Epoch 45/100 [Training]:   0%|          | 0/8300 [00:00<?, ?it/s]

Epoch 45/100 [Validation]:   0%|          | 0/2075 [00:00<?, ?it/s]


--- Epoch 45/100 Summary ---
  Train Loss: 0.2951 | Val Loss: 0.5117
  Accuracy: 0.8853 | Micro F1: 0.8853
  Learning Rate: 0.000059


Epoch 46/100 [Training]:   0%|          | 0/8300 [00:00<?, ?it/s]

Epoch 46/100 [Validation]:   0%|          | 0/2075 [00:00<?, ?it/s]


--- Epoch 46/100 Summary ---
  Train Loss: 0.2841 | Val Loss: 0.4780
  Accuracy: 0.8901 | Micro F1: 0.8901
  Learning Rate: 0.000058


Epoch 47/100 [Training]:   0%|          | 0/8300 [00:00<?, ?it/s]

Epoch 47/100 [Validation]:   0%|          | 0/2075 [00:00<?, ?it/s]


--- Epoch 47/100 Summary ---
  Train Loss: 0.2830 | Val Loss: 0.4677
  Accuracy: 0.8864 | Micro F1: 0.8864
  Learning Rate: 0.000056


Epoch 48/100 [Training]:   0%|          | 0/8300 [00:00<?, ?it/s]

Epoch 48/100 [Validation]:   0%|          | 0/2075 [00:00<?, ?it/s]


--- Epoch 48/100 Summary ---
  Train Loss: 0.2878 | Val Loss: 0.4853
  Accuracy: 0.8892 | Micro F1: 0.8892
  Learning Rate: 0.000055


Epoch 49/100 [Training]:   0%|          | 0/8300 [00:00<?, ?it/s]

Epoch 49/100 [Validation]:   0%|          | 0/2075 [00:00<?, ?it/s]


--- Epoch 49/100 Summary ---
  Train Loss: 0.2904 | Val Loss: 0.5189
  Accuracy: 0.8904 | Micro F1: 0.8904
  Learning Rate: 0.000053


Epoch 50/100 [Training]:   0%|          | 0/8300 [00:00<?, ?it/s]

Epoch 50/100 [Validation]:   0%|          | 0/2075 [00:00<?, ?it/s]


--- Epoch 50/100 Summary ---
  Train Loss: 0.2915 | Val Loss: 0.5209
  Accuracy: 0.8874 | Micro F1: 0.8874
  Learning Rate: 0.000052


Epoch 51/100 [Training]:   0%|          | 0/8300 [00:00<?, ?it/s]

Epoch 51/100 [Validation]:   0%|          | 0/2075 [00:00<?, ?it/s]


--- Epoch 51/100 Summary ---
  Train Loss: 0.2886 | Val Loss: 0.4922
  Accuracy: 0.8899 | Micro F1: 0.8899
  Learning Rate: 0.000050


Epoch 52/100 [Training]:   0%|          | 0/8300 [00:00<?, ?it/s]

Epoch 52/100 [Validation]:   0%|          | 0/2075 [00:00<?, ?it/s]


--- Epoch 52/100 Summary ---
  Train Loss: 0.2817 | Val Loss: 0.4797
  Accuracy: 0.8933 | Micro F1: 0.8933
  Learning Rate: 0.000048
  => 🎉 New best model saved to 'F:/checkpoints_massive\best_model.pth' (Micro F1: 0.8933)



Epoch 53/100 [Training]:   0%|          | 0/8300 [00:00<?, ?it/s]

Epoch 53/100 [Validation]:   0%|          | 0/2075 [00:00<?, ?it/s]


--- Epoch 53/100 Summary ---
  Train Loss: 0.2880 | Val Loss: 0.4812
  Accuracy: 0.8934 | Micro F1: 0.8934
  Learning Rate: 0.000047
  => 🎉 New best model saved to 'F:/checkpoints_massive\best_model.pth' (Micro F1: 0.8934)



Epoch 54/100 [Training]:   0%|          | 0/8300 [00:00<?, ?it/s]

Epoch 54/100 [Validation]:   0%|          | 0/2075 [00:00<?, ?it/s]


--- Epoch 54/100 Summary ---
  Train Loss: 0.2849 | Val Loss: 0.5191
  Accuracy: 0.8910 | Micro F1: 0.8910
  Learning Rate: 0.000045


Epoch 55/100 [Training]:   0%|          | 0/8300 [00:00<?, ?it/s]

Epoch 55/100 [Validation]:   0%|          | 0/2075 [00:00<?, ?it/s]


--- Epoch 55/100 Summary ---
  Train Loss: 0.2825 | Val Loss: 0.4824
  Accuracy: 0.8918 | Micro F1: 0.8918
  Learning Rate: 0.000044


Epoch 56/100 [Training]:   0%|          | 0/8300 [00:00<?, ?it/s]

Epoch 56/100 [Validation]:   0%|          | 0/2075 [00:00<?, ?it/s]


--- Epoch 56/100 Summary ---
  Train Loss: 0.2805 | Val Loss: 0.4966
  Accuracy: 0.8902 | Micro F1: 0.8902
  Learning Rate: 0.000042


Epoch 57/100 [Training]:   0%|          | 0/8300 [00:00<?, ?it/s]

Epoch 57/100 [Validation]:   0%|          | 0/2075 [00:00<?, ?it/s]


--- Epoch 57/100 Summary ---
  Train Loss: 0.2768 | Val Loss: 0.5177
  Accuracy: 0.8923 | Micro F1: 0.8923
  Learning Rate: 0.000041


Epoch 58/100 [Training]:   0%|          | 0/8300 [00:00<?, ?it/s]

Epoch 58/100 [Validation]:   0%|          | 0/2075 [00:00<?, ?it/s]


--- Epoch 58/100 Summary ---
  Train Loss: 0.2765 | Val Loss: 0.5750
  Accuracy: 0.8942 | Micro F1: 0.8942
  Learning Rate: 0.000039
  => 🎉 New best model saved to 'F:/checkpoints_massive\best_model.pth' (Micro F1: 0.8942)



Epoch 59/100 [Training]:   0%|          | 0/8300 [00:00<?, ?it/s]

Epoch 59/100 [Validation]:   0%|          | 0/2075 [00:00<?, ?it/s]


--- Epoch 59/100 Summary ---
  Train Loss: 0.2811 | Val Loss: 0.5146
  Accuracy: 0.8906 | Micro F1: 0.8906
  Learning Rate: 0.000038


Epoch 60/100 [Training]:   0%|          | 0/8300 [00:00<?, ?it/s]

Epoch 60/100 [Validation]:   0%|          | 0/2075 [00:00<?, ?it/s]


--- Epoch 60/100 Summary ---
  Train Loss: 0.2758 | Val Loss: 0.4934
  Accuracy: 0.8905 | Micro F1: 0.8905
  Learning Rate: 0.000036


Epoch 61/100 [Training]:   0%|          | 0/8300 [00:00<?, ?it/s]

Epoch 61/100 [Validation]:   0%|          | 0/2075 [00:00<?, ?it/s]


--- Epoch 61/100 Summary ---
  Train Loss: 0.2717 | Val Loss: 0.4798
  Accuracy: 0.8939 | Micro F1: 0.8939
  Learning Rate: 0.000035


Epoch 62/100 [Training]:   0%|          | 0/8300 [00:00<?, ?it/s]

Epoch 62/100 [Validation]:   0%|          | 0/2075 [00:00<?, ?it/s]


--- Epoch 62/100 Summary ---
  Train Loss: 0.2611 | Val Loss: 0.5396
  Accuracy: 0.8947 | Micro F1: 0.8947
  Learning Rate: 0.000033
  => 🎉 New best model saved to 'F:/checkpoints_massive\best_model.pth' (Micro F1: 0.8947)



Epoch 63/100 [Training]:   0%|          | 0/8300 [00:00<?, ?it/s]

Epoch 63/100 [Validation]:   0%|          | 0/2075 [00:00<?, ?it/s]


--- Epoch 63/100 Summary ---
  Train Loss: 0.2654 | Val Loss: 0.4766
  Accuracy: 0.8923 | Micro F1: 0.8923
  Learning Rate: 0.000032


Epoch 64/100 [Training]:   0%|          | 0/8300 [00:00<?, ?it/s]

Epoch 64/100 [Validation]:   0%|          | 0/2075 [00:00<?, ?it/s]


--- Epoch 64/100 Summary ---
  Train Loss: 0.2668 | Val Loss: 0.5204
  Accuracy: 0.8925 | Micro F1: 0.8925
  Learning Rate: 0.000030


Epoch 65/100 [Training]:   0%|          | 0/8300 [00:00<?, ?it/s]

Epoch 65/100 [Validation]:   0%|          | 0/2075 [00:00<?, ?it/s]


--- Epoch 65/100 Summary ---
  Train Loss: 0.2770 | Val Loss: 0.4931
  Accuracy: 0.8960 | Micro F1: 0.8960
  Learning Rate: 0.000029
  => 🎉 New best model saved to 'F:/checkpoints_massive\best_model.pth' (Micro F1: 0.8960)



Epoch 66/100 [Training]:   0%|          | 0/8300 [00:00<?, ?it/s]

Epoch 66/100 [Validation]:   0%|          | 0/2075 [00:00<?, ?it/s]


--- Epoch 66/100 Summary ---
  Train Loss: 0.2727 | Val Loss: 0.4980
  Accuracy: 0.8957 | Micro F1: 0.8957
  Learning Rate: 0.000027


Epoch 67/100 [Training]:   0%|          | 0/8300 [00:00<?, ?it/s]

Epoch 67/100 [Validation]:   0%|          | 0/2075 [00:00<?, ?it/s]


--- Epoch 67/100 Summary ---
  Train Loss: 0.2632 | Val Loss: 0.5538
  Accuracy: 0.8973 | Micro F1: 0.8973
  Learning Rate: 0.000026
  => 🎉 New best model saved to 'F:/checkpoints_massive\best_model.pth' (Micro F1: 0.8973)



Epoch 68/100 [Training]:   0%|          | 0/8300 [00:00<?, ?it/s]

Epoch 68/100 [Validation]:   0%|          | 0/2075 [00:00<?, ?it/s]


--- Epoch 68/100 Summary ---
  Train Loss: 0.2572 | Val Loss: 0.5366
  Accuracy: 0.8967 | Micro F1: 0.8967
  Learning Rate: 0.000025


Epoch 69/100 [Training]:   0%|          | 0/8300 [00:00<?, ?it/s]

Epoch 69/100 [Validation]:   0%|          | 0/2075 [00:00<?, ?it/s]


--- Epoch 69/100 Summary ---
  Train Loss: 0.2654 | Val Loss: 0.5016
  Accuracy: 0.8981 | Micro F1: 0.8981
  Learning Rate: 0.000023
  => 🎉 New best model saved to 'F:/checkpoints_massive\best_model.pth' (Micro F1: 0.8981)



Epoch 70/100 [Training]:   0%|          | 0/8300 [00:00<?, ?it/s]

Epoch 70/100 [Validation]:   0%|          | 0/2075 [00:00<?, ?it/s]


--- Epoch 70/100 Summary ---
  Train Loss: 0.2686 | Val Loss: 0.5042
  Accuracy: 0.8982 | Micro F1: 0.8982
  Learning Rate: 0.000022
  => 🎉 New best model saved to 'F:/checkpoints_massive\best_model.pth' (Micro F1: 0.8982)



Epoch 71/100 [Training]:   0%|          | 0/8300 [00:00<?, ?it/s]

Epoch 71/100 [Validation]:   0%|          | 0/2075 [00:00<?, ?it/s]


--- Epoch 71/100 Summary ---
  Train Loss: 0.2600 | Val Loss: 0.5016
  Accuracy: 0.8972 | Micro F1: 0.8972
  Learning Rate: 0.000021


Epoch 72/100 [Training]:   0%|          | 0/8300 [00:00<?, ?it/s]

Epoch 72/100 [Validation]:   0%|          | 0/2075 [00:00<?, ?it/s]


--- Epoch 72/100 Summary ---
  Train Loss: 0.2636 | Val Loss: 0.4960
  Accuracy: 0.8940 | Micro F1: 0.8940
  Learning Rate: 0.000019


Epoch 73/100 [Training]:   0%|          | 0/8300 [00:00<?, ?it/s]

Epoch 73/100 [Validation]:   0%|          | 0/2075 [00:00<?, ?it/s]


--- Epoch 73/100 Summary ---
  Train Loss: 0.2561 | Val Loss: 0.5476
  Accuracy: 0.8970 | Micro F1: 0.8970
  Learning Rate: 0.000018


Epoch 74/100 [Training]:   0%|          | 0/8300 [00:00<?, ?it/s]

Epoch 74/100 [Validation]:   0%|          | 0/2075 [00:00<?, ?it/s]


--- Epoch 74/100 Summary ---
  Train Loss: 0.2592 | Val Loss: 0.5052
  Accuracy: 0.8988 | Micro F1: 0.8988
  Learning Rate: 0.000017
  => 🎉 New best model saved to 'F:/checkpoints_massive\best_model.pth' (Micro F1: 0.8988)



Epoch 75/100 [Training]:   0%|          | 0/8300 [00:00<?, ?it/s]

Epoch 75/100 [Validation]:   0%|          | 0/2075 [00:00<?, ?it/s]


--- Epoch 75/100 Summary ---
  Train Loss: 0.2606 | Val Loss: 0.5128
  Accuracy: 0.8968 | Micro F1: 0.8968
  Learning Rate: 0.000016


Epoch 76/100 [Training]:   0%|          | 0/8300 [00:00<?, ?it/s]

Epoch 76/100 [Validation]:   0%|          | 0/2075 [00:00<?, ?it/s]


--- Epoch 76/100 Summary ---
  Train Loss: 0.2541 | Val Loss: 0.5275
  Accuracy: 0.9003 | Micro F1: 0.9003
  Learning Rate: 0.000015
  => 🎉 New best model saved to 'F:/checkpoints_massive\best_model.pth' (Micro F1: 0.9003)



Epoch 77/100 [Training]:   0%|          | 0/8300 [00:00<?, ?it/s]

Epoch 77/100 [Validation]:   0%|          | 0/2075 [00:00<?, ?it/s]


--- Epoch 77/100 Summary ---
  Train Loss: 0.2599 | Val Loss: 0.4909
  Accuracy: 0.8992 | Micro F1: 0.8992
  Learning Rate: 0.000014


Epoch 78/100 [Training]:   0%|          | 0/8300 [00:00<?, ?it/s]

Epoch 78/100 [Validation]:   0%|          | 0/2075 [00:00<?, ?it/s]


--- Epoch 78/100 Summary ---
  Train Loss: 0.2659 | Val Loss: 0.5225
  Accuracy: 0.9007 | Micro F1: 0.9007
  Learning Rate: 0.000012
  => 🎉 New best model saved to 'F:/checkpoints_massive\best_model.pth' (Micro F1: 0.9007)



Epoch 79/100 [Training]:   0%|          | 0/8300 [00:00<?, ?it/s]

Epoch 79/100 [Validation]:   0%|          | 0/2075 [00:00<?, ?it/s]


--- Epoch 79/100 Summary ---
  Train Loss: 0.2535 | Val Loss: 0.5310
  Accuracy: 0.9027 | Micro F1: 0.9027
  Learning Rate: 0.000011
  => 🎉 New best model saved to 'F:/checkpoints_massive\best_model.pth' (Micro F1: 0.9027)



Epoch 80/100 [Training]:   0%|          | 0/8300 [00:00<?, ?it/s]

Epoch 80/100 [Validation]:   0%|          | 0/2075 [00:00<?, ?it/s]


--- Epoch 80/100 Summary ---
  Train Loss: 0.2519 | Val Loss: 0.5382
  Accuracy: 0.8996 | Micro F1: 0.8996
  Learning Rate: 0.000010


Epoch 81/100 [Training]:   0%|          | 0/8300 [00:00<?, ?it/s]

Epoch 81/100 [Validation]:   0%|          | 0/2075 [00:00<?, ?it/s]


--- Epoch 81/100 Summary ---
  Train Loss: 0.2662 | Val Loss: 0.4976
  Accuracy: 0.9022 | Micro F1: 0.9022
  Learning Rate: 0.000010


Epoch 82/100 [Training]:   0%|          | 0/8300 [00:00<?, ?it/s]

Epoch 82/100 [Validation]:   0%|          | 0/2075 [00:00<?, ?it/s]


--- Epoch 82/100 Summary ---
  Train Loss: 0.2551 | Val Loss: 0.5236
  Accuracy: 0.9016 | Micro F1: 0.9016
  Learning Rate: 0.000009


Epoch 83/100 [Training]:   0%|          | 0/8300 [00:00<?, ?it/s]

Epoch 83/100 [Validation]:   0%|          | 0/2075 [00:00<?, ?it/s]


--- Epoch 83/100 Summary ---
  Train Loss: 0.2531 | Val Loss: 0.4798
  Accuracy: 0.9003 | Micro F1: 0.9003
  Learning Rate: 0.000008


Epoch 84/100 [Training]:   0%|          | 0/8300 [00:00<?, ?it/s]

Epoch 84/100 [Validation]:   0%|          | 0/2075 [00:00<?, ?it/s]


--- Epoch 84/100 Summary ---
  Train Loss: 0.2467 | Val Loss: 0.5077
  Accuracy: 0.9019 | Micro F1: 0.9019
  Learning Rate: 0.000007


Epoch 85/100 [Training]:   0%|          | 0/8300 [00:00<?, ?it/s]

Epoch 85/100 [Validation]:   0%|          | 0/2075 [00:00<?, ?it/s]


--- Epoch 85/100 Summary ---
  Train Loss: 0.2524 | Val Loss: 0.5620
  Accuracy: 0.9004 | Micro F1: 0.9004
  Learning Rate: 0.000006


Epoch 86/100 [Training]:   0%|          | 0/8300 [00:00<?, ?it/s]

Epoch 86/100 [Validation]:   0%|          | 0/2075 [00:00<?, ?it/s]


--- Epoch 86/100 Summary ---
  Train Loss: 0.2567 | Val Loss: 0.5106
  Accuracy: 0.8998 | Micro F1: 0.8998
  Learning Rate: 0.000005


Epoch 87/100 [Training]:   0%|          | 0/8300 [00:00<?, ?it/s]

Epoch 87/100 [Validation]:   0%|          | 0/2075 [00:00<?, ?it/s]


--- Epoch 87/100 Summary ---
  Train Loss: 0.2553 | Val Loss: 0.5168
  Accuracy: 0.9024 | Micro F1: 0.9024
  Learning Rate: 0.000005


Epoch 88/100 [Training]:   0%|          | 0/8300 [00:00<?, ?it/s]

Epoch 88/100 [Validation]:   0%|          | 0/2075 [00:00<?, ?it/s]


--- Epoch 88/100 Summary ---
  Train Loss: 0.2565 | Val Loss: 0.5131
  Accuracy: 0.9017 | Micro F1: 0.9017
  Learning Rate: 0.000004


Epoch 89/100 [Training]:   0%|          | 0/8300 [00:00<?, ?it/s]

Epoch 89/100 [Validation]:   0%|          | 0/2075 [00:00<?, ?it/s]


--- Epoch 89/100 Summary ---
  Train Loss: 0.2481 | Val Loss: 0.4929
  Accuracy: 0.9018 | Micro F1: 0.9018
  Learning Rate: 0.000004


Epoch 90/100 [Training]:   0%|          | 0/8300 [00:00<?, ?it/s]

Epoch 90/100 [Validation]:   0%|          | 0/2075 [00:00<?, ?it/s]


--- Epoch 90/100 Summary ---
  Train Loss: 0.2551 | Val Loss: 0.5144
  Accuracy: 0.9026 | Micro F1: 0.9026
  Learning Rate: 0.000003


Epoch 91/100 [Training]:   0%|          | 0/8300 [00:00<?, ?it/s]

Epoch 91/100 [Validation]:   0%|          | 0/2075 [00:00<?, ?it/s]


--- Epoch 91/100 Summary ---
  Train Loss: 0.2569 | Val Loss: 0.5302
  Accuracy: 0.8999 | Micro F1: 0.8999
  Learning Rate: 0.000002


Epoch 92/100 [Training]:   0%|          | 0/8300 [00:00<?, ?it/s]

Epoch 92/100 [Validation]:   0%|          | 0/2075 [00:00<?, ?it/s]


--- Epoch 92/100 Summary ---
  Train Loss: 0.2482 | Val Loss: 0.5221
  Accuracy: 0.9021 | Micro F1: 0.9021
  Learning Rate: 0.000002


Epoch 93/100 [Training]:   0%|          | 0/8300 [00:00<?, ?it/s]

Epoch 93/100 [Validation]:   0%|          | 0/2075 [00:00<?, ?it/s]


--- Epoch 93/100 Summary ---
  Train Loss: 0.2531 | Val Loss: 0.5528
  Accuracy: 0.9026 | Micro F1: 0.9026
  Learning Rate: 0.000002


Epoch 94/100 [Training]:   0%|          | 0/8300 [00:00<?, ?it/s]

Epoch 94/100 [Validation]:   0%|          | 0/2075 [00:00<?, ?it/s]


--- Epoch 94/100 Summary ---
  Train Loss: 0.2485 | Val Loss: 0.5098
  Accuracy: 0.9008 | Micro F1: 0.9008
  Learning Rate: 0.000001


Epoch 95/100 [Training]:   0%|          | 0/8300 [00:00<?, ?it/s]

Epoch 95/100 [Validation]:   0%|          | 0/2075 [00:00<?, ?it/s]


--- Epoch 95/100 Summary ---
  Train Loss: 0.2453 | Val Loss: 0.5203
  Accuracy: 0.9010 | Micro F1: 0.9010
  Learning Rate: 0.000001


Epoch 96/100 [Training]:   0%|          | 0/8300 [00:00<?, ?it/s]

Epoch 96/100 [Validation]:   0%|          | 0/2075 [00:00<?, ?it/s]


--- Epoch 96/100 Summary ---
  Train Loss: 0.2417 | Val Loss: 0.5142
  Accuracy: 0.9027 | Micro F1: 0.9027
  Learning Rate: 0.000001
  => 🎉 New best model saved to 'F:/checkpoints_massive\best_model.pth' (Micro F1: 0.9027)



Epoch 97/100 [Training]:   0%|          | 0/8300 [00:00<?, ?it/s]

Epoch 97/100 [Validation]:   0%|          | 0/2075 [00:00<?, ?it/s]


--- Epoch 97/100 Summary ---
  Train Loss: 0.2534 | Val Loss: 0.5416
  Accuracy: 0.8992 | Micro F1: 0.8992
  Learning Rate: 0.000000


Epoch 98/100 [Training]:   0%|          | 0/8300 [00:00<?, ?it/s]

Epoch 98/100 [Validation]:   0%|          | 0/2075 [00:00<?, ?it/s]


--- Epoch 98/100 Summary ---
  Train Loss: 0.2521 | Val Loss: 0.5379
  Accuracy: 0.9018 | Micro F1: 0.9018
  Learning Rate: 0.000000


Epoch 99/100 [Training]:   0%|          | 0/8300 [00:00<?, ?it/s]

Epoch 99/100 [Validation]:   0%|          | 0/2075 [00:00<?, ?it/s]


--- Epoch 99/100 Summary ---
  Train Loss: 0.2538 | Val Loss: 0.5044
  Accuracy: 0.9029 | Micro F1: 0.9029
  Learning Rate: 0.000000
  => 🎉 New best model saved to 'F:/checkpoints_massive\best_model.pth' (Micro F1: 0.9029)



Epoch 100/100 [Training]:   0%|          | 0/8300 [00:00<?, ?it/s]

Epoch 100/100 [Validation]:   0%|          | 0/2075 [00:00<?, ?it/s]


--- Epoch 100/100 Summary ---
  Train Loss: 0.2511 | Val Loss: 0.5172
  Accuracy: 0.9011 | Micro F1: 0.9011
  Learning Rate: 0.000000

✅ Hoàn tất huấn luyện!
