In [1]:
!git clone https://github.com/deepinsight/insightface.git

Cloning into 'insightface'...
remote: Enumerating objects: 12592, done.[K
remote: Counting objects: 100% (148/148), done.[K
remote: Compressing objects: 100% (59/59), done.[K
remote: Total 12592 (delta 104), reused 89 (delta 89), pack-reused 12444 (from 3)[K
Receiving objects: 100% (12592/12592), 58.40 MiB | 37.40 MiB/s, done.
Resolving deltas: 100% (6532/6532), done.


In [2]:
import torch
import torch.nn as nn
import torch.optim as optim
from torchvision import datasets, transforms
from torch.utils.data import DataLoader, Dataset
import torch.nn.functional as F
import math
from tqdm import tqdm
import numpy as np
import itertools
from collections import defaultdict
import os
from PIL import Image
from pathlib import Path

from insightface.recognition.arcface_torch.backbones.iresnet import iresnet50
import sys
sys.path.append("/kaggle/input/scores")
from torch.cuda.amp import autocast, GradScaler
import arc_scores

os.environ["PYTORCH_CUDA_ALLOC_CONF"] = "expandable_segments:True"

In [5]:
IMAGE_SIZE = 112
BATCH_SIZE = 256
NUM_EPOCHS = 25
FEATURE_DIM = 512

base_lr_backbone = 0.1
base_lr_margin = 0.5
weight_decay = 5e-4

step_milestones = [10, 15, 20]
step_gamma = 0.1

In [6]:
class FastImageFolder(Dataset):
    def __init__(self, root, transform=None, extensions=('.jpg', '.jpeg', '.png', '.bmp')):
        self.root = Path(root)
        self.transform = transform
        self.extensions = extensions
        
        # Scan nhanh và cache paths
        self.samples = []
        self.classes = sorted([d.name for d in self.root.iterdir() if d.is_dir()])
        self.class_to_idx = {cls: idx for idx, cls in enumerate(self.classes)}
        
        print(f"Scanning {root}...")
        for class_name in self.classes:
            class_dir = self.root / class_name
            class_idx = self.class_to_idx[class_name]
            
            # Dùng glob thay vì os.walk - nhanh hơn
            for ext in self.extensions:
                for img_path in class_dir.glob(f'*{ext}'):
                    self.samples.append((str(img_path), class_idx))
        
        print(f"Found {len(self.samples)} images in {len(self.classes)} classes")
    
    def __len__(self):
        return len(self.samples)
    
    def __getitem__(self, idx):
        path, target = self.samples[idx]
        image = Image.open(path).convert('RGB')
        
        if self.transform:
            image = self.transform(image)
        
        return image, target

In [7]:
# =============================================================================
# DATA AUGMENTATION
# =============================================================================
train_transforms = transforms.Compose([
    transforms.Resize((112, 112)),
    transforms.RandomHorizontalFlip(p=0.5),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.5, 0.5, 0.5], std=[0.5, 0.5, 0.5])
])

test_transforms = transforms.Compose([
    transforms.Resize((112, 112)),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.5, 0.5, 0.5], std=[0.5, 0.5, 0.5])
])

In [16]:
# =============================================================================
# DATASET & DATALOADER
# =============================================================================
train_folder = '/kaggle/input/train-ds/train'
train_dataset = FastImageFolder(train_folder, transform=train_transforms)

test_folder = '/kaggle/input/val-ds/val'
test_dataset = FastImageFolder(test_folder, transform=test_transforms)

train_loader = DataLoader(
    train_dataset,
    batch_size=BATCH_SIZE,
    shuffle=True,
    num_workers=4,
    pin_memory=True,
    persistent_workers=True
)

test_loader = DataLoader(
    test_dataset,
    batch_size=BATCH_SIZE,
    shuffle=False,
    num_workers=4,
    pin_memory=True,
    persistent_workers=True
)

NUM_CLASSES = len(train_dataset.classes)
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
n_gpus = torch.cuda.device_count()

print("="*60)
print(f"MULTI-GPU TRAINING SETUP")
print("="*60)
print(f"GPUs available: {n_gpus}")
for i in range(n_gpus):
    print(f"  GPU {i}: {torch.cuda.get_device_name(i)}")
print(f"Dataset: {NUM_CLASSES} classes, {len(train_dataset)} images")
print(f"Batch size: {BATCH_SIZE} (effective: {BATCH_SIZE})")
print(f"Epochs: {NUM_EPOCHS}")
print(f"Steps per epoch: {len(train_loader)}")
print("="*60)

Scanning /kaggle/input/train-ds/train...
Found 1119807 images in 5115 classes
Scanning /kaggle/input/val-ds/val...
Found 114964 images in 555 classes
MULTI-GPU TRAINING SETUP
GPUs available: 2
  GPU 0: Tesla T4
  GPU 1: Tesla T4
Dataset: 5115 classes, 1119807 images
Batch size: 256 (effective: 256)
Epochs: 25
Steps per epoch: 4375


In [9]:
# =============================================================================
# ARCFACE MARGIN
# =============================================================================
class ArcMarginProduct(nn.Module):
    def __init__(self, in_features, out_features, s=64.0, m=0.5):
        super(ArcMarginProduct, self).__init__()
        self.in_features = in_features
        self.out_features = out_features
        self.s = s
        self.m = m

        self.weight = nn.Parameter(torch.FloatTensor(out_features, in_features))
        nn.init.xavier_uniform_(self.weight)

        self.cos_m = math.cos(m)
        self.sin_m = math.sin(m)
        self.th = math.cos(math.pi - m)
        self.mm = math.sin(math.pi - m) * m

    def forward(self, inputs, labels):
        cosine = F.linear(
            F.normalize(inputs, p=2, dim=1),
            F.normalize(self.weight, p=2, dim=1)
        )
        sine = torch.sqrt(torch.clamp(1.0 - torch.pow(cosine, 2), 1e-9, 1))
        phi = cosine * self.cos_m - sine * self.sin_m
        phi = torch.where(cosine > self.th, phi, cosine - self.mm)

        one_hot = torch.zeros_like(cosine)
        one_hot.scatter_(1, labels.view(-1, 1), 1.0)

        output = (one_hot * phi) + ((1.0 - one_hot) * cosine)
        output *= self.s

        return output

In [19]:
# =============================================================================
# EVALUATION FUNCTIONS
# =============================================================================
def generate_balanced_pairs(labels, max_per_class=None, random_state=42):
    rng = np.random.RandomState(random_state)

    label2idx = defaultdict(list)
    for i, lb in enumerate(labels):
        label2idx[lb].append(i)

    pos_pairs = []
    for lb, idxs in label2idx.items():
        if len(idxs) < 2:
            continue

        idxs = np.array(idxs)
        if max_per_class and len(idxs) > max_per_class:
            idxs = rng.choice(idxs, max_per_class, replace=False)

        pos_pairs.extend(list(itertools.combinations(idxs, 2)))

    n_pos = len(pos_pairs)
    labels_unique = list(label2idx.keys())

    neg_pairs = []
    class_pairs = list(itertools.combinations(labels_unique, 2))

    for _ in range(n_pos):
        lb1, lb2 = class_pairs[rng.randint(len(class_pairs))]
        i = rng.choice(label2idx[lb1])
        j = rng.choice(label2idx[lb2])
        neg_pairs.append((i, j))

    pairs = [(i, j, 1) for (i, j) in pos_pairs] + \
            [(i, j, 0) for (i, j) in neg_pairs]

    rng.shuffle(pairs)
    return pairs

In [18]:
def evaluate(embs, labels, max_per_class=50, n_linspace=1000, epsilon=1e-6, random_state=42):
    embs = torch.cat(embs).cpu()
    labels = torch.cat(labels).cpu().numpy()

    pairs = generate_balanced_pairs(labels, max_per_class)
    pairs = np.array(pairs)

    idx_a = pairs[:, 0].astype(int)
    idx_b = pairs[:, 1].astype(int)
    similarity_scores = torch.sum(embs[idx_a] * embs[idx_b], dim=1).numpy()

    targets = pairs[:, 2].astype(int)

    # Best accuracy
    thresholds = np.linspace(
        similarity_scores.min() - epsilon,
        similarity_scores.max() + epsilon,
        n_linspace
    )
    preds = similarity_scores[None, :] >= thresholds[:, None]
    accs = (preds == targets).mean(axis=1)
    best_acc = accs.max()
    best_th = thresholds[accs.argmax()]

    # ROC & TAR
    roc_auc = arc_scores.compute_roc_auc(similarity_scores, targets)["auc"]
    tar_far = arc_scores.tar_at_far(similarity_scores, targets)

    return {
        "accuracy": float(best_acc),
        "roc_auc": float(roc_auc),
        "tar_far": float(tar_far),
        "threshold": float(best_th),
        "pos_samples": len(pairs) // 2,
        "neg_samples": len(pairs) // 2
    }

In [11]:
def step_lr(optimizer, base_lr_backbone, base_lr_margin, epoch,
            milestones=[10, 15], gamma=0.1):
    lr_scale = 1.0
    for milestone in milestones:
        if epoch >= milestone:
            lr_scale *= gamma

    lr_backbone = base_lr_backbone * lr_scale
    lr_margin = base_lr_margin * lr_scale

    optimizer.param_groups[0]["lr"] = lr_backbone
    optimizer.param_groups[1]["lr"] = lr_margin

    return lr_backbone, lr_margin

In [12]:
# =============================================================================
# EARLY STOPPING
# =============================================================================
class EarlyStopping:
    def __init__(self, patience=5, epsilon=0.001, save_path="best.pt"):
        self.patience = patience
        self.save_path = save_path
        self.epsilon = epsilon
        self.best_acc = -1
        self.counter = 0
        self.should_stop = False

    def step(self, val_acc, model, margin):
        if val_acc > self.best_acc + self.epsilon:
            self.best_acc = val_acc
            self.counter = 0

            model_to_save = model.module if hasattr(model, 'module') else model
            margin_to_save = margin.module if hasattr(margin, 'module') else margin

            checkpoint = {
                'model_state_dict': model.state_dict(),
                'margin_state_dict': margin.state_dict(),
                'best_tar_far': self.best_acc
            }
            torch.save(checkpoint, self.save_path)
            print(f"✓ Saved best model: TAR@FAR={val_acc:.4f}")
        else:
            self.counter += 1
            if self.counter >= self.patience:
                self.should_stop = True
                print("⚠ Early stopping triggered!")

In [13]:
# =============================================================================
# MODEL & OPTIMIZER
# =============================================================================
model = iresnet50().to(device)
margin = ArcMarginProduct(
    in_features=FEATURE_DIM,
    out_features=NUM_CLASSES,
    s=64.0,
    m=0.3
).to(device)

if n_gpus > 1:
    print(f"Using DataParallel with {n_gpus} GPUs")
    model = nn.DataParallel(model)
    margin = nn.DataParallel(margin)

model = model.to(device)
margin = margin.to(device)

criterion = nn.CrossEntropyLoss()
optimizer = optim.SGD([
    {"params": model.parameters(), "lr": base_lr_backbone},
    {"params": margin.parameters(), "lr": base_lr_margin}
], momentum=0.9, weight_decay=5e-4)

steps_per_epoch = len(train_loader)

scaler = GradScaler()

Using DataParallel with 2 GPUs


  scaler = GradScaler()


In [16]:
def train():
    train_losses = []
    accs = []
    rocs = []
    tfs = []
    
    early = EarlyStopping(patience=5, epsilon=0.001)
    
    print(f"\nStarting training...")
    print(f"Total epochs: {NUM_EPOCHS}")
    print(f"Using {n_gpus} GPU(s)")
    print(f"LR schedule: Step decay at {step_milestones}")
    print(f"Batch size: {BATCH_SIZE} (per GPU: {BATCH_SIZE // n_gpus if n_gpus > 1 else BATCH_SIZE})\n")
    
    for epoch in range(NUM_EPOCHS):
        model.train()
        margin.train()

        train_loss = 0
        
        # Get initial LR for display
        lr_backbone, lr_margin = step_lr(
            optimizer, base_lr_backbone, base_lr_margin,
            epoch, milestones=step_milestones, gamma=step_gamma
        )
        pbar = tqdm(
            enumerate(train_loader), 
            total=len(train_loader), 
            desc=f"Epoch {epoch+1}/{NUM_EPOCHS} [LR: {lr_backbone:.6f}]"
        )

        for step, (inputs, targets) in pbar:
            inputs = inputs.to(device, non_blocking=True)
            targets = targets.to(device, non_blocking=True)

            optimizer.zero_grad()

            # Forward (with mixed precision if enabled)
            try:
                # Try mixed precision first
                from torch.cuda.amp import autocast, GradScaler
                if not hasattr(train, '_scaler_initialized'):
                    train._scaler = GradScaler()
                    train._scaler_initialized = True
                
                with autocast():
                    outputs = model(inputs)
                    outputs = F.normalize(outputs, p=2, dim=1)
                    logits = margin(outputs, targets)
                    loss = criterion(logits, targets)
                
                # Backward with mixed precision
                train._scaler.scale(loss).backward()
                train._scaler.unscale_(optimizer)
                torch.nn.utils.clip_grad_norm_(model.parameters(), 5.0)
                torch.nn.utils.clip_grad_norm_(margin.parameters(), 5.0)
                train._scaler.step(optimizer)
                train._scaler.update()
                
            except:
                # Fallback to FP32 if mixed precision fails
                outputs = model(inputs)
                outputs = F.normalize(outputs, p=2, dim=1)
                logits = margin(outputs, targets)
                loss = criterion(logits, targets)
                
                # Backward
                loss.backward()
                torch.nn.utils.clip_grad_norm_(model.parameters(), 5.0)
                torch.nn.utils.clip_grad_norm_(margin.parameters(), 5.0)
                optimizer.step()

            train_loss += loss.item()
            pbar.set_postfix({'loss': f'{loss.item():.4f}'})

        avg_train_loss = train_loss / len(train_loader)

        # ==================== EVALUATION ====================
        model.eval()
        margin.eval()

        embs = []
        labels_list = []

        with torch.no_grad():
            for inputs, targets in tqdm(test_loader, desc="Evaluating"):
                inputs = inputs.to(device, non_blocking=True)
                targets = targets.to(device, non_blocking=True)

                outputs = model(inputs)
                outputs = F.normalize(outputs, p=2, dim=1)

                embs.append(outputs.cpu())
                labels_list.append(targets.cpu())

        eval_res = evaluate(embs, labels_list, max_per_class=50, n_linspace=1000)
        tar_far = eval_res["tar_far"]

        # Append
        train_losses.append(avg_train_loss)
        accs.append(eval_res['accuracy'])
        rocs.append(eval_res['roc_auc'])
        tfs.append(tar_far)

        # Get current learning rates
        current_lr_backbone = optimizer.param_groups[0]['lr']
        current_lr_margin = optimizer.param_groups[1]['lr']

        print(f"\n{'='*60}")
        print(f"Epoch {epoch+1}/{NUM_EPOCHS}")
        print(f"Learning Rate: Backbone={current_lr_backbone:.6f}, Margin={current_lr_margin:.6f}")
        print(f"Train Loss: {avg_train_loss:.4f}")
        print(f"Eval Metrics:")
        print(f"  - Accuracy: {eval_res['accuracy']:.4f}")
        print(f"  - ROC AUC: {eval_res['roc_auc']:.4f}")
        print(f"  - TAR@FAR: {eval_res['tar_far']:.4f}")
        print(f"  - Threshold: {eval_res['threshold']:.4f}")
        print(f"{'='*60}\n")

        # Early Stopping (with multi-GPU support)
        early.step(tar_far, model, margin)
        
        # Save checkpoint every 5 epochs (unwrap DataParallel)
        model_to_save = model.module if hasattr(model, 'module') else model
        margin_to_save = margin.module if hasattr(margin, 'module') else margin
        
        torch.save({
            'epoch': epoch,
            'model_state_dict': model_to_save.state_dict(),
            'margin_state_dict': margin_to_save.state_dict(),
            'optimizer_state_dict': optimizer.state_dict(),
            'train_loss': avg_train_loss,
            'tar_far': tar_far,
            'n_gpus': n_gpus
        }, f'checkpoint_epoch_{epoch+1}.pt')
        print(f"✓ Saved checkpoint at epoch {epoch+1}")
        
        if early.should_stop:
            print("⚠ Training stopped early.")
            break

    print(f"\n✓ Training completed! Best TAR@FAR: {early.best_acc:.4f}")

    return train_losses, accs, rocs, tfs

In [None]:
train_losses, accs, rocs, tfs = train()


Starting training...
Total epochs: 10
Using 2 GPU(s)
LR schedule: Step decay at [10, 15, 25]
Batch size: 256 (per GPU: 128)



  train._scaler = GradScaler()
  with autocast():
  with torch.cuda.amp.autocast(self.fp16):
Epoch 1/10 [LR: 0.100000]: 100%|██████████| 4375/4375 [1:41:04<00:00,  1.39s/it, loss=10.6098]
Evaluating: 100%|██████████| 450/450 [05:03<00:00,  1.48it/s]



Epoch 1/10
Learning Rate: Backbone=0.100000, Margin=0.500000
Train Loss: 17.3763
Eval Metrics:
  - Accuracy: 0.9199
  - ROC AUC: 0.9739
  - TAR@FAR: 0.5662
  - Threshold: 0.3203

✓ Saved best model: TAR@FAR=0.5662
✓ Saved checkpoint at epoch 1


  with autocast():
  with torch.cuda.amp.autocast(self.fp16):
Epoch 2/10 [LR: 0.100000]: 100%|██████████| 4375/4375 [1:40:42<00:00,  1.38s/it, loss=10.0458]
Evaluating: 100%|██████████| 450/450 [02:47<00:00,  2.68it/s]



Epoch 2/10
Learning Rate: Backbone=0.100000, Margin=0.500000
Train Loss: 8.8618
Eval Metrics:
  - Accuracy: 0.9333
  - ROC AUC: 0.9804
  - TAR@FAR: 0.6546
  - Threshold: 0.2955

✓ Saved best model: TAR@FAR=0.6546
✓ Saved checkpoint at epoch 2


  with autocast():
  with torch.cuda.amp.autocast(self.fp16):
Epoch 3/10 [LR: 0.100000]:  73%|███████▎  | 3200/4375 [1:13:42<27:07,  1.39s/it, loss=7.5558]

In [14]:
def load_checkpoint(path):
    start_epoch = 0
    resume_path = path

    if os.path.exists(resume_path):
        print(f"==> Loading checkpoint from {resume_path}")
        checkpoint = torch.load(resume_path, map_location=device, weights_only=False)

        model_to_load = model.module if hasattr(model, 'module') else model
        margin_to_load = margin.module if hasattr(margin, 'module') else margin

        model_to_load.load_state_dict(checkpoint['model_state_dict'])
        margin_to_load.load_state_dict(checkpoint['margin_state_dict'])
        optimizer.load_state_dict(checkpoint['optimizer_state_dict'])

        start_epoch = checkpoint['epoch'] + 1
        print(f"==> Resuming from epoch {start_epoch}")

    return start_epoch

In [None]:
def train():
    train_losses = []
    accs = []
    rocs = []
    tfs = []
    
    early = EarlyStopping(patience=5, epsilon=0.001)
    start_epoch = load_checkpoint("/kaggle/input/checkpoints/checkpoint_epoch_2.pt")
    
    print(f"\nStarting training...")
    print(f"Total epochs: {NUM_EPOCHS}")
    print(f"Using {n_gpus} GPU(s)")
    print(f"LR schedule: Step decay at {step_milestones}")
    print(f"Batch size: {BATCH_SIZE} (per GPU: {BATCH_SIZE // n_gpus if n_gpus > 1 else BATCH_SIZE})\n")
    
    for epoch in range(start_epoch, NUM_EPOCHS):
        model.train()
        margin.train()

        train_loss = 0
        
        # Get initial LR for display
        lr_backbone, lr_margin = step_lr(
            optimizer, base_lr_backbone, base_lr_margin,
            epoch, milestones=step_milestones, gamma=step_gamma
        )
        pbar = tqdm(
            enumerate(train_loader), 
            total=len(train_loader), 
            desc=f"Epoch {epoch+1}/{NUM_EPOCHS} [LR: {lr_backbone:.6f}]"
        )

        for step, (inputs, targets) in pbar:
            inputs = inputs.to(device, non_blocking=True)
            targets = targets.to(device, non_blocking=True)

            optimizer.zero_grad()

            # Forward (with mixed precision if enabled)
            try:
                # Try mixed precision first
                from torch.cuda.amp import autocast, GradScaler
                if not hasattr(train, '_scaler_initialized'):
                    train._scaler = GradScaler()
                    train._scaler_initialized = True
                
                with autocast():
                    outputs = model(inputs)
                    outputs = F.normalize(outputs, p=2, dim=1)
                    logits = margin(outputs, targets)
                    loss = criterion(logits, targets)
                
                # Backward with mixed precision
                train._scaler.scale(loss).backward()
                train._scaler.unscale_(optimizer)
                torch.nn.utils.clip_grad_norm_(model.parameters(), 5.0)
                torch.nn.utils.clip_grad_norm_(margin.parameters(), 5.0)
                train._scaler.step(optimizer)
                train._scaler.update()
                
            except:
                # Fallback to FP32 if mixed precision fails
                outputs = model(inputs)
                outputs = F.normalize(outputs, p=2, dim=1)
                logits = margin(outputs, targets)
                loss = criterion(logits, targets)
                
                # Backward
                loss.backward()
                torch.nn.utils.clip_grad_norm_(model.parameters(), 5.0)
                torch.nn.utils.clip_grad_norm_(margin.parameters(), 5.0)
                optimizer.step()

            train_loss += loss.item()
            pbar.set_postfix({'loss': f'{loss.item():.4f}'})

        avg_train_loss = train_loss / len(train_loader)

        # ==================== EVALUATION ====================
        model.eval()
        margin.eval()

        embs = []
        labels_list = []

        with torch.no_grad():
            for inputs, targets in tqdm(test_loader, desc="Evaluating"):
                inputs = inputs.to(device, non_blocking=True)
                targets = targets.to(device, non_blocking=True)

                outputs = model(inputs)
                outputs = F.normalize(outputs, p=2, dim=1)

                embs.append(outputs.cpu())
                labels_list.append(targets.cpu())

        eval_res = evaluate(embs, labels_list, max_per_class=50, n_linspace=1000)
        tar_far = eval_res["tar_far"]

        # Append
        train_losses.append(avg_train_loss)
        accs.append(eval_res['accuracy'])
        rocs.append(eval_res['roc_auc'])
        tfs.append(tar_far)

        # Get current learning rates
        current_lr_backbone = optimizer.param_groups[0]['lr']
        current_lr_margin = optimizer.param_groups[1]['lr']

        print(f"\n{'='*60}")
        print(f"Epoch {epoch+1}/{NUM_EPOCHS}")
        print(f"Learning Rate: Backbone={current_lr_backbone:.6f}, Margin={current_lr_margin:.6f}")
        print(f"Train Loss: {avg_train_loss:.4f}")
        print(f"Eval Metrics:")
        print(f"  - Accuracy: {eval_res['accuracy']:.4f}")
        print(f"  - ROC AUC: {eval_res['roc_auc']:.4f}")
        print(f"  - TAR@FAR: {eval_res['tar_far']:.4f}")
        print(f"  - Threshold: {eval_res['threshold']:.4f}")
        print(f"{'='*60}\n")

        # Early Stopping (with multi-GPU support)
        early.step(tar_far, model, margin)
        
        # Save checkpoint every 5 epochs (unwrap DataParallel)
        model_to_save = model.module if hasattr(model, 'module') else model
        margin_to_save = margin.module if hasattr(margin, 'module') else margin
        
        torch.save({
            'epoch': epoch,
            'model_state_dict': model_to_save.state_dict(),
            'margin_state_dict': margin_to_save.state_dict(),
            'optimizer_state_dict': optimizer.state_dict(),
            'train_loss': avg_train_loss,
            'tar_far': tar_far,
            'n_gpus': n_gpus
        }, f'checkpoint_epoch_{epoch+1}.pt')
        print(f"✓ Saved checkpoint at epoch {epoch+1}")
        
        if early.should_stop:
            print("⚠ Training stopped early.")
            break

    print(f"\n✓ Training completed! Best TAR@FAR: {early.best_acc:.4f}")

    return train_losses, accs, rocs, tfs

train()

==> Loading checkpoint from /kaggle/input/checkpoints/checkpoint_epoch_2.pt
==> Resuming from epoch 2

Starting training...
Total epochs: 25
Using 2 GPU(s)
LR schedule: Step decay at [10, 15, 20]
Batch size: 256 (per GPU: 128)



  train._scaler = GradScaler()
  with autocast():
Epoch 3/25 [LR: 0.100000]: 100%|██████████| 4375/4375 [1:41:37<00:00,  1.39s/it, loss=9.3661]
Evaluating: 100%|██████████| 450/450 [02:50<00:00,  2.63it/s]



Epoch 3/25
Learning Rate: Backbone=0.100000, Margin=0.500000
Train Loss: 7.8022
Eval Metrics:
  - Accuracy: 0.9320
  - ROC AUC: 0.9783
  - TAR@FAR: 0.6743
  - Threshold: 0.2669

✓ Saved best model: TAR@FAR=0.6743
✓ Saved checkpoint at epoch 3


  with autocast():
  with torch.cuda.amp.autocast(self.fp16):
Epoch 4/25 [LR: 0.100000]: 100%|██████████| 4375/4375 [1:41:40<00:00,  1.39s/it, loss=7.4795]
Evaluating: 100%|██████████| 450/450 [02:50<00:00,  2.63it/s]



Epoch 4/25
Learning Rate: Backbone=0.100000, Margin=0.500000
Train Loss: 7.3660
Eval Metrics:
  - Accuracy: 0.9397
  - ROC AUC: 0.9822
  - TAR@FAR: 0.6933
  - Threshold: 0.2959

✓ Saved best model: TAR@FAR=0.6933
✓ Saved checkpoint at epoch 4


  with autocast():
  with torch.cuda.amp.autocast(self.fp16):
Epoch 5/25 [LR: 0.100000]: 100%|██████████| 4375/4375 [1:41:44<00:00,  1.40s/it, loss=7.7111]
Evaluating: 100%|██████████| 450/450 [02:50<00:00,  2.64it/s]



Epoch 5/25
Learning Rate: Backbone=0.100000, Margin=0.500000
Train Loss: 7.1085
Eval Metrics:
  - Accuracy: 0.9354
  - ROC AUC: 0.9806
  - TAR@FAR: 0.6844
  - Threshold: 0.2904

✓ Saved checkpoint at epoch 5


  with autocast():
  with torch.cuda.amp.autocast(self.fp16):
Epoch 6/25 [LR: 0.100000]: 100%|██████████| 4375/4375 [1:41:45<00:00,  1.40s/it, loss=5.8907]
Evaluating: 100%|██████████| 450/450 [02:51<00:00,  2.63it/s]



Epoch 6/25
Learning Rate: Backbone=0.100000, Margin=0.500000
Train Loss: 6.9227
Eval Metrics:
  - Accuracy: 0.9375
  - ROC AUC: 0.9812
  - TAR@FAR: 0.6882
  - Threshold: 0.2754

✓ Saved checkpoint at epoch 6


  with autocast():
  with torch.cuda.amp.autocast(self.fp16):
Epoch 7/25 [LR: 0.100000]: 100%|██████████| 4375/4375 [1:41:44<00:00,  1.40s/it, loss=6.8666]
Evaluating: 100%|██████████| 450/450 [02:50<00:00,  2.64it/s]



Epoch 7/25
Learning Rate: Backbone=0.100000, Margin=0.500000
Train Loss: 6.7780
Eval Metrics:
  - Accuracy: 0.9365
  - ROC AUC: 0.9794
  - TAR@FAR: 0.7053
  - Threshold: 0.2937

✓ Saved best model: TAR@FAR=0.7053
✓ Saved checkpoint at epoch 7


  with autocast():
  with torch.cuda.amp.autocast(self.fp16):
Epoch 8/25 [LR: 0.100000]:  63%|██████▎   | 2777/4375 [1:04:30<37:10,  1.40s/it, loss=5.8560]