In [1]:
!pip install -q /kaggle/input/facenet-pytorch-vggface2/facenet_pytorch-2.2.7-py3-none-any.whl

In [2]:
import os
import sys
import glob
from PIL import Image
import torch
from torch.utils.data import Dataset, Sampler, DataLoader
import random
from collections import defaultdict
from facenet_pytorch import InceptionResnetV1
import torch.nn as nn
import numpy as np
from torchvision import transforms
import torch.nn.functional as F
from tqdm import tqdm
import time
from sklearn.metrics import recall_score, precision_score
import itertools
import copy
import matplotlib.pyplot as plt
from torch.cuda.amp import autocast, GradScaler
SCORE_DIR = "/kaggle/input/arc-scores"
if SCORE_DIR not in sys.path:
    sys.path.append(SCORE_DIR)
import arc_scores
HELPER_DIR = "/kaggle/input/helper-py"
if HELPER_DIR not in sys.path:
    sys.path.append(HELPER_DIR)
import helper
EVULATE_DIR = "/kaggle/input/evulate"
if EVULATE_DIR not in sys.path:
    sys.path.append(EVULATE_DIR)
import evaluate

os.environ["PYTORCH_CUDA_ALLOC_CONF"] = "expandable_segments:True"

In [9]:
device = torch.device('cuda' if torch.cuda.is_available() else "cpu")
print(device)

cuda


In [3]:
class FaceFolderDataset(Dataset):
    def __init__(self, root, transform=None):
        self.root = root # directory
        self.transform = transform
        self.samples = []   # (img_path, label)
        self.labels = []

        persons = sorted(os.listdir(root))
        for label, person in enumerate(persons):
            self.labels.append(label)
            folder = os.path.join(root, person)
            if not os.path.isdir(folder):
                continue
            imgs = glob.glob(os.path.join(folder, "*"))
            for img_path in imgs:
                self.samples.append((img_path, label))

    def __len__(self):
        return len(self.samples)

    # allow to use [] to access the index
    def __getitem__(self, idx):
        path, label = self.samples[idx]
        img = Image.open(path).convert("RGB")
        if self.transform:
            img = self.transform(img)
        return img, label

In [4]:
class PKSampler(Sampler):
    def __init__(self, dataset, P, K, samples_per_class_per_epoch=5):
        """
        dataset: FaceFolderDataset object
        P: số classes mỗi batch
        K: số samples mỗi class
        """
        self.P = P
        self.K = K
        self.samples_per_class = samples_per_class_per_epoch
        self.label2indices = defaultdict(list)

        # lấy labels từ samples
        for idx, (_, label) in enumerate(dataset.samples):
            self.label2indices[label].append(idx)

        self.labels = list(self.label2indices.keys())
        self.n_classes = len(self.labels)

        # Tính số batches
        self.batches_per_epoch = (self.n_classes * self.samples_per_class + P - 1) // P

        total_images = sum(len(indices) for indices in self.label2indices.values())

        print(f"\n{'='*60}")
        print(f"PKSampler Info:")
        print(f"{'='*60}")
        print(f"Total images: {total_images}")
        print(f"Total classes: {self.n_classes}")
        print(f"Avg images/class: {total_images/self.n_classes:.1f}")
        print(f"Samples per class per epoch: {self.samples_per_class}")
        print(f"Batches per epoch: {self.batches_per_epoch}")
        print(f"Total samples per epoch: {self.batches_per_epoch * P * K}")
        print(f"Coverage: {(self.batches_per_epoch * P * K) / total_images * 100:.1f}%")
        print(f"Expected time per epoch: ~{self.batches_per_epoch * 0.15:.1f}s (assuming 0.15s/batch)")
        print(f"{'='*60}\n")

    def __iter__(self):
        # Mỗi class lặp lại samples_per_class lần
        class_pool = []
        for _ in range(self.samples_per_class):
            shuffled = self.labels.copy()
            random.shuffle(shuffled)
            class_pool.extend(shuffled)

        random.shuffle(class_pool)

        for batch_idx in range(self.batches_per_epoch):
            start = batch_idx * self.P
            batch_labels = class_pool[start:start + self.P]

            if len(batch_labels) < self.P:
                break

            batch = []
            for label in batch_labels:
                indices = self.label2indices[label]

                if len(indices) >= self.K:
                    selected = random.sample(indices, k=self.K)
                else:
                    selected = random.choices(indices, k=self.K)

                batch.extend(selected)

            yield from batch

    def __len__(self):
        return self.batches_per_epoch * self.P * self.K

In [16]:
class BatchHardTripletLoss(nn.Module):
    def __init__(self, margin=0.2, mode='semi_hard'):
        super().__init__()
        self.margin = margin
        self.mode = mode

    def update_strategy(self, mode):
        self.mode = mode

    def forward(self, embs, labels):
        dist = torch.cdist(embs, embs)

        labels = labels.view(-1, 1)
        pos_mask = labels.eq(labels.t())
        neg_mask = ~pos_mask

        eye = torch.eye(len(labels), dtype=torch.bool, device=labels.device)
        pos_mask[eye] = False

        # Kiểm tra có positive không (mỗi sample cần ít nhất 1 positive)
        valid_samples = pos_mask.any(dim=1)

        if self.mode == "semi_hard":
            # Dùng CLOSEST positive (theo paper gốc FaceNet)
            pos_dist = dist.clone()
            pos_dist[~pos_mask] = 1e9
            closest_pos = pos_dist.min(dim=1)[0]  # MIN - positive gần nhất

            neg_dist = dist.clone()
            neg_dist[~neg_mask] = 1e9

            # Semi-hard negative: d(anchor, pos) < d(anchor, neg) < d(anchor, pos) + margin
            semi_mask = (neg_dist > closest_pos.unsqueeze(1)) & \
                        (neg_dist < (closest_pos + self.margin).unsqueeze(1))

            semi_neg = neg_dist.clone()
            semi_neg[~semi_mask] = 1e9
            semi_neg = semi_neg.min(dim=1)[0]

            # Fallback: nếu không tìm được semi-hard, dùng hardest negative
            hardest_neg = neg_dist.min(dim=1)[0]
            semi_neg = torch.where(semi_neg >= 1e6, hardest_neg, semi_neg)

            loss = torch.relu(closest_pos - semi_neg + self.margin)

            # Chỉ tính loss cho samples có positive
            if valid_samples.all():
                return loss.mean()
            else:
                return loss[valid_samples].mean() if valid_samples.any() else torch.tensor(0.0, device=embs.device)

        # Batch-hard mode: dùng hardest positive và hardest negative
        # Hardest positive: positive xa nhất
        pos_dist = dist.clone()
        pos_dist[~pos_mask] = -1e9
        hardest_pos = pos_dist.max(dim=1)[0]

        # Hardest negative: negative gần nhất
        neg_dist = dist.clone()
        neg_dist[~neg_mask] = 1e9
        hardest_neg = neg_dist.min(dim=1)[0]

        loss = torch.relu(hardest_pos - hardest_neg + self.margin)

        # Chỉ tính loss cho samples có positive
        if valid_samples.all():
            return loss.mean()
        else:
            return loss[valid_samples].mean() if valid_samples.any() else torch.tensor(0.0, device=embs.device)

In [5]:
train_transform = transforms.Compose([
    transforms.Resize((160, 160)),
    transforms.RandomHorizontalFlip(p=0.5),
    transforms.ToTensor(),
    transforms.Normalize([0.485,0.456,0.406],
                         [0.229,0.224,0.225])
])
val_transform = transforms.Compose([
    transforms.Resize((160, 160)),
    transforms.ToTensor(),
    transforms.Normalize([0.485,0.456,0.406],
                         [0.229,0.224,0.225])
])

In [6]:
train_path = '/kaggle/input/train-ds/train'
val_path = '/kaggle/input/val-ds/val'

P, K = 256, 4

train_dataset = FaceFolderDataset(train_path, transform=train_transform)
val_dataset = FaceFolderDataset(val_path, transform=val_transform)

# CÁCH 1: Truyền dataset object
train_sampler = PKSampler(
    dataset=train_dataset,
    P=P,
    K=K,
    samples_per_class_per_epoch=10  # Mỗi class sample 5 lần/epoch
)


train_loader = DataLoader(
    dataset=train_dataset,
    batch_size=P*K,
    sampler=train_sampler,
    num_workers=2,
    pin_memory=True,
    persistent_workers=True
)
val_loader = DataLoader(
    dataset=val_dataset,
    batch_size=512,
    num_workers=2,
    shuffle=False,
    pin_memory=True,
    persistent_workers=True
)


PKSampler Info:
Total images: 1119807
Total classes: 5115
Avg images/class: 218.9
Samples per class per epoch: 10
Batches per epoch: 200
Total samples per epoch: 204800
Coverage: 18.3%
Expected time per epoch: ~30.0s (assuming 0.15s/batch)



In [7]:
class EarlyStopping:
    def __init__(self, patience=5, epsilon=0.001, save_path="best.pt"):
        self.patience = patience
        self.save_path = save_path
        self.epsilon = epsilon
        self.best_acc = -1
        self.counter = 0
        self.should_stop = False
        self.phase = 1

    def step(self, val_acc, model):
        if val_acc > self.best_acc + self.epsilon:
            self.best_acc = val_acc
            self.counter = 0

            model_to_save = model.module if hasattr(model, 'module') else model

            checkpoint = {
                'model_state_dict': model_to_save.state_dict(),
                'best_tar_far': self.best_acc
            }
            torch.save(checkpoint, self.save_path)
            print(f"Saved best model: TAR@FAR1e-3={val_acc:.4f}")
        else:
            self.counter += 1
            if self.counter >= self.patience:
                if self.phase == 1:
                    print("!!! UPDATE STRATEGY TO HARD MINING !!!")
                    self.phase = 2
                    self.counter = 0
                    self.patience = 5
                elif self.phase == 2:
                    self.should_stop = True
                    print("Early stopping triggered!")

In [18]:
NUM_EPOCHS = 30
learning_rate = 5e-6

model = InceptionResnetV1(pretrained='casia-webface')
model = model.to(device)

optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate, weight_decay=5e-4, betas=(0.9, 0.999))
criterion = BatchHardTripletLoss(margin=0.3, mode='semi_hard')

In [19]:
def train():
    train_losses = []
    test_losses = []
    tarfar3s = []
    tarfar4s = []
    accs = []
    rocs = []

    # early stopping
    early = EarlyStopping(patience=3, save_path="best.pt")
    is_update = False

    # FP16 scaler
    scaler = GradScaler()

    for epoch in range(NUM_EPOCHS):
        model.train()
        train_loss = []

        pbar = tqdm(
            enumerate(train_loader),
            total=len(train_loader),
            desc=f"Epoch {epoch+1}/{NUM_EPOCHS} [LR: {learning_rate:.6f}]"
        )

        for step, (inputs, targets) in pbar:
            inputs, targets = inputs.to(device), targets.to(device)

            optimizer.zero_grad()

            # ===========================
            #       FP16 TRAINING
            # ===========================
            with autocast(dtype=torch.float16):
                outputs = model(inputs)
                outputs = F.normalize(outputs, p=2, dim=1)
                loss = criterion(outputs, targets)

            # Scaler backward
            scaler.scale(loss).backward()

            # Clip grad norm (có scale)
            scaler.unscale_(optimizer)
            torch.nn.utils.clip_grad_norm_(model.parameters(), 5.0)

            # Step optimizer
            scaler.step(optimizer)
            scaler.update()

            train_loss.append(loss.item())
            pbar.set_postfix({'loss': f'{loss.item():.4f}'})

        train_loss = np.mean(train_loss)

        # ==================== EVALUATION ====================
        model.eval()
        test_loss = []
        embs = []
        labels = []

        with torch.no_grad():
            # Evaluation không cần autocast
            for inputs, targets in tqdm(val_loader):
                inputs, targets = inputs.to(device), targets.to(device)

                with autocast(dtype=torch.float16):
                    outputs = model(inputs)
                    outputs = F.normalize(outputs, p=2, dim=1)
                    loss = criterion(outputs, targets)

                test_loss.append(loss.item())
                embs.append(outputs.cpu())
                labels.append(targets.cpu())

        test_loss = np.mean(test_loss)

        eval_res = evaluate.evaluate(embs, labels, max_per_class=50, n_linspace=1000)

        # early stopping
        early.step(eval_res["tar_far_3"], model)

        # save ckpt + print
        helper.save_checkpoint(model, epoch, optimizer, train_loss,
                               eval_res["tar_far_3"], eval_res["tar_far_4"])
        helper.print_results(optimizer, epoch, NUM_EPOCHS, train_loss, eval_res)

        # save scores
        train_losses.append(train_loss)
        test_losses.append(test_loss)
        tarfar3s.append(eval_res["tar_far_3"])
        tarfar4s.append(eval_res["tar_far_4"])
        accs.append(eval_res["accuracy"])
        rocs.append(eval_res["roc_auc"])

        # Switch to HARD mining
        if early.phase == 2 and not is_update:
            is_update = True
            criterion.update_strategy("hard")
            print("Switch loss to HARD mining!")

        if early.should_stop:
            break

    print(f"\nTraining completed! Best TAR@FAR1e-3: {early.best_acc:.4f}")

    return train_losses, test_losses, tarfar3s, tarfar4s, accs, rocs

In [20]:
train_losses, test_losses, tarfar3s, tarfar4s, accs, rocs = train()

  scaler = GradScaler()
  with autocast(dtype=torch.float16):
Epoch 1/30 [LR: 0.000005]: 100%|█████████▉| 199/200 [12:44<00:03,  3.84s/it, loss=0.1444]
  with autocast(dtype=torch.float16):
100%|██████████| 225/225 [06:26<00:00,  1.72s/it]


Saved best model: TAR@FAR1e-3=0.6980
Saved checkpoint at epoch 1

Epoch 1/30
Learning Rate: Backbone=0.000005
Train Loss: 0.1559
Eval Metrics:
  - Accuracy: 0.9464
  - ROC AUC: 0.9856
  - TAR@FAR1e-3: 0.6980
  - TAR@FAR1e-4: 0.4892
  - Threshold: 0.2405



  with autocast(dtype=torch.float16):
Epoch 2/30 [LR: 0.000005]: 100%|█████████▉| 199/200 [13:44<00:04,  4.14s/it, loss=0.1292]
  with autocast(dtype=torch.float16):
100%|██████████| 225/225 [02:22<00:00,  1.58it/s]


Saved best model: TAR@FAR1e-3=0.7214
Saved checkpoint at epoch 2

Epoch 2/30
Learning Rate: Backbone=0.000005
Train Loss: 0.1415
Eval Metrics:
  - Accuracy: 0.9493
  - ROC AUC: 0.9869
  - TAR@FAR1e-3: 0.7214
  - TAR@FAR1e-4: 0.5279
  - Threshold: 0.2442



  with autocast(dtype=torch.float16):
Epoch 3/30 [LR: 0.000005]: 100%|█████████▉| 199/200 [12:31<00:03,  3.78s/it, loss=0.1263]
  with autocast(dtype=torch.float16):
100%|██████████| 225/225 [02:23<00:00,  1.57it/s]


Saved best model: TAR@FAR1e-3=0.7318
Saved checkpoint at epoch 3

Epoch 3/30
Learning Rate: Backbone=0.000005
Train Loss: 0.1339
Eval Metrics:
  - Accuracy: 0.9508
  - ROC AUC: 0.9874
  - TAR@FAR1e-3: 0.7318
  - TAR@FAR1e-4: 0.5423
  - Threshold: 0.2481



  with autocast(dtype=torch.float16):
Epoch 4/30 [LR: 0.000005]: 100%|█████████▉| 199/200 [12:21<00:03,  3.73s/it, loss=0.1265]
  with autocast(dtype=torch.float16):
100%|██████████| 225/225 [02:22<00:00,  1.58it/s]


Saved best model: TAR@FAR1e-3=0.7393
Saved checkpoint at epoch 4

Epoch 4/30
Learning Rate: Backbone=0.000005
Train Loss: 0.1305
Eval Metrics:
  - Accuracy: 0.9521
  - ROC AUC: 0.9878
  - TAR@FAR1e-3: 0.7393
  - TAR@FAR1e-4: 0.5535
  - Threshold: 0.2520



  with autocast(dtype=torch.float16):
Epoch 5/30 [LR: 0.000005]: 100%|█████████▉| 199/200 [12:18<00:03,  3.71s/it, loss=0.1225]
  with autocast(dtype=torch.float16):
100%|██████████| 225/225 [02:43<00:00,  1.38it/s]


Saved best model: TAR@FAR1e-3=0.7443
Saved checkpoint at epoch 5

Epoch 5/30
Learning Rate: Backbone=0.000005
Train Loss: 0.1270
Eval Metrics:
  - Accuracy: 0.9525
  - ROC AUC: 0.9879
  - TAR@FAR1e-3: 0.7443
  - TAR@FAR1e-4: 0.5670
  - Threshold: 0.2578



  with autocast(dtype=torch.float16):
Epoch 6/30 [LR: 0.000005]: 100%|█████████▉| 199/200 [13:19<00:04,  4.02s/it, loss=0.1134]
  with autocast(dtype=torch.float16):
100%|██████████| 225/225 [02:26<00:00,  1.53it/s]


Saved best model: TAR@FAR1e-3=0.7474
Saved checkpoint at epoch 6

Epoch 6/30
Learning Rate: Backbone=0.000005
Train Loss: 0.1250
Eval Metrics:
  - Accuracy: 0.9531
  - ROC AUC: 0.9882
  - TAR@FAR1e-3: 0.7474
  - TAR@FAR1e-4: 0.5681
  - Threshold: 0.2591



  with autocast(dtype=torch.float16):
Epoch 7/30 [LR: 0.000005]: 100%|█████████▉| 199/200 [12:26<00:03,  3.75s/it, loss=0.1352]
  with autocast(dtype=torch.float16):
100%|██████████| 225/225 [02:47<00:00,  1.34it/s]


Saved best model: TAR@FAR1e-3=0.7525
Saved checkpoint at epoch 7

Epoch 7/30
Learning Rate: Backbone=0.000005
Train Loss: 0.1229
Eval Metrics:
  - Accuracy: 0.9534
  - ROC AUC: 0.9883
  - TAR@FAR1e-3: 0.7525
  - TAR@FAR1e-4: 0.5784
  - Threshold: 0.2587



  with autocast(dtype=torch.float16):
Epoch 8/30 [LR: 0.000005]: 100%|█████████▉| 199/200 [12:26<00:03,  3.75s/it, loss=0.1236]
  with autocast(dtype=torch.float16):
100%|██████████| 225/225 [03:09<00:00,  1.19it/s]


Saved best model: TAR@FAR1e-3=0.7552
Saved checkpoint at epoch 8

Epoch 8/30
Learning Rate: Backbone=0.000005
Train Loss: 0.1205
Eval Metrics:
  - Accuracy: 0.9539
  - ROC AUC: 0.9885
  - TAR@FAR1e-3: 0.7552
  - TAR@FAR1e-4: 0.5811
  - Threshold: 0.2637



  with autocast(dtype=torch.float16):
Epoch 9/30 [LR: 0.000005]: 100%|█████████▉| 199/200 [12:24<00:03,  3.74s/it, loss=0.1125]
  with autocast(dtype=torch.float16):
100%|██████████| 225/225 [02:59<00:00,  1.26it/s]


Saved checkpoint at epoch 9

Epoch 9/30
Learning Rate: Backbone=0.000005
Train Loss: 0.1198
Eval Metrics:
  - Accuracy: 0.9541
  - ROC AUC: 0.9886
  - TAR@FAR1e-3: 0.7559
  - TAR@FAR1e-4: 0.5826
  - Threshold: 0.2610



  with autocast(dtype=torch.float16):
Epoch 10/30 [LR: 0.000005]: 100%|█████████▉| 199/200 [12:33<00:03,  3.79s/it, loss=0.1199]
  with autocast(dtype=torch.float16):
100%|██████████| 225/225 [03:08<00:00,  1.19it/s]


Saved best model: TAR@FAR1e-3=0.7594
Saved checkpoint at epoch 10

Epoch 10/30
Learning Rate: Backbone=0.000005
Train Loss: 0.1181
Eval Metrics:
  - Accuracy: 0.9545
  - ROC AUC: 0.9887
  - TAR@FAR1e-3: 0.7594
  - TAR@FAR1e-4: 0.5908
  - Threshold: 0.2623



  with autocast(dtype=torch.float16):
Epoch 11/30 [LR: 0.000005]: 100%|█████████▉| 199/200 [12:28<00:03,  3.76s/it, loss=0.1061]
  with autocast(dtype=torch.float16):
100%|██████████| 225/225 [03:05<00:00,  1.21it/s]


Saved best model: TAR@FAR1e-3=0.7629
Saved checkpoint at epoch 11

Epoch 11/30
Learning Rate: Backbone=0.000005
Train Loss: 0.1173
Eval Metrics:
  - Accuracy: 0.9545
  - ROC AUC: 0.9887
  - TAR@FAR1e-3: 0.7629
  - TAR@FAR1e-4: 0.5917
  - Threshold: 0.2639



  with autocast(dtype=torch.float16):
Epoch 12/30 [LR: 0.000005]: 100%|█████████▉| 199/200 [12:31<00:03,  3.78s/it, loss=0.1156]
  with autocast(dtype=torch.float16):
100%|██████████| 225/225 [02:57<00:00,  1.27it/s]


Saved best model: TAR@FAR1e-3=0.7643
Saved checkpoint at epoch 12

Epoch 12/30
Learning Rate: Backbone=0.000005
Train Loss: 0.1152
Eval Metrics:
  - Accuracy: 0.9548
  - ROC AUC: 0.9888
  - TAR@FAR1e-3: 0.7643
  - TAR@FAR1e-4: 0.5967
  - Threshold: 0.2619



  with autocast(dtype=torch.float16):
Epoch 13/30 [LR: 0.000005]: 100%|█████████▉| 199/200 [12:28<00:03,  3.76s/it, loss=0.1063]
  with autocast(dtype=torch.float16):
100%|██████████| 225/225 [02:57<00:00,  1.27it/s]


Saved checkpoint at epoch 13

Epoch 13/30
Learning Rate: Backbone=0.000005
Train Loss: 0.1153
Eval Metrics:
  - Accuracy: 0.9551
  - ROC AUC: 0.9888
  - TAR@FAR1e-3: 0.7646
  - TAR@FAR1e-4: 0.6016
  - Threshold: 0.2670



  with autocast(dtype=torch.float16):
Epoch 14/30 [LR: 0.000005]: 100%|█████████▉| 199/200 [12:29<00:03,  3.77s/it, loss=0.1100]
  with autocast(dtype=torch.float16):
100%|██████████| 225/225 [02:58<00:00,  1.26it/s]


Saved best model: TAR@FAR1e-3=0.7671
Saved checkpoint at epoch 14

Epoch 14/30
Learning Rate: Backbone=0.000005
Train Loss: 0.1138
Eval Metrics:
  - Accuracy: 0.9552
  - ROC AUC: 0.9889
  - TAR@FAR1e-3: 0.7671
  - TAR@FAR1e-4: 0.6017
  - Threshold: 0.2605



  with autocast(dtype=torch.float16):
Epoch 15/30 [LR: 0.000005]: 100%|█████████▉| 199/200 [12:30<00:03,  3.77s/it, loss=0.1128]
  with autocast(dtype=torch.float16):
100%|██████████| 225/225 [02:58<00:00,  1.26it/s]


Saved checkpoint at epoch 15

Epoch 15/30
Learning Rate: Backbone=0.000005
Train Loss: 0.1122
Eval Metrics:
  - Accuracy: 0.9553
  - ROC AUC: 0.9889
  - TAR@FAR1e-3: 0.7675
  - TAR@FAR1e-4: 0.6025
  - Threshold: 0.2681



  with autocast(dtype=torch.float16):
Epoch 16/30 [LR: 0.000005]: 100%|█████████▉| 199/200 [12:32<00:03,  3.78s/it, loss=0.1154]
  with autocast(dtype=torch.float16):
100%|██████████| 225/225 [03:05<00:00,  1.21it/s]


Saved best model: TAR@FAR1e-3=0.7702
Saved checkpoint at epoch 16

Epoch 16/30
Learning Rate: Backbone=0.000005
Train Loss: 0.1119
Eval Metrics:
  - Accuracy: 0.9554
  - ROC AUC: 0.9889
  - TAR@FAR1e-3: 0.7702
  - TAR@FAR1e-4: 0.6163
  - Threshold: 0.2682



  with autocast(dtype=torch.float16):
Epoch 17/30 [LR: 0.000005]: 100%|█████████▉| 199/200 [12:42<00:03,  3.83s/it, loss=0.1139]
  with autocast(dtype=torch.float16):
100%|██████████| 225/225 [02:59<00:00,  1.25it/s]


Saved checkpoint at epoch 17

Epoch 17/30
Learning Rate: Backbone=0.000005
Train Loss: 0.1109
Eval Metrics:
  - Accuracy: 0.9555
  - ROC AUC: 0.9890
  - TAR@FAR1e-3: 0.7701
  - TAR@FAR1e-4: 0.6067
  - Threshold: 0.2684



  with autocast(dtype=torch.float16):
Epoch 18/30 [LR: 0.000005]: 100%|█████████▉| 199/200 [12:23<00:03,  3.73s/it, loss=0.1134]
  with autocast(dtype=torch.float16):
100%|██████████| 225/225 [03:02<00:00,  1.23it/s]


Saved best model: TAR@FAR1e-3=0.7714
Saved checkpoint at epoch 18

Epoch 18/30
Learning Rate: Backbone=0.000005
Train Loss: 0.1102
Eval Metrics:
  - Accuracy: 0.9557
  - ROC AUC: 0.9891
  - TAR@FAR1e-3: 0.7714
  - TAR@FAR1e-4: 0.6134
  - Threshold: 0.2709



  with autocast(dtype=torch.float16):
Epoch 19/30 [LR: 0.000005]: 100%|█████████▉| 199/200 [12:20<00:03,  3.72s/it, loss=0.1042]
  with autocast(dtype=torch.float16):
100%|██████████| 225/225 [03:04<00:00,  1.22it/s]


Saved checkpoint at epoch 19

Epoch 19/30
Learning Rate: Backbone=0.000005
Train Loss: 0.1096
Eval Metrics:
  - Accuracy: 0.9556
  - ROC AUC: 0.9890
  - TAR@FAR1e-3: 0.7722
  - TAR@FAR1e-4: 0.6078
  - Threshold: 0.2711



  with autocast(dtype=torch.float16):
Epoch 20/30 [LR: 0.000005]: 100%|█████████▉| 199/200 [12:20<00:03,  3.72s/it, loss=0.1077]
  with autocast(dtype=torch.float16):
100%|██████████| 225/225 [03:00<00:00,  1.25it/s]


Saved best model: TAR@FAR1e-3=0.7727
Saved checkpoint at epoch 20

Epoch 20/30
Learning Rate: Backbone=0.000005
Train Loss: 0.1089
Eval Metrics:
  - Accuracy: 0.9558
  - ROC AUC: 0.9891
  - TAR@FAR1e-3: 0.7727
  - TAR@FAR1e-4: 0.6158
  - Threshold: 0.2689



  with autocast(dtype=torch.float16):
Epoch 21/30 [LR: 0.000005]: 100%|█████████▉| 199/200 [12:32<00:03,  3.78s/it, loss=0.1030]
  with autocast(dtype=torch.float16):
100%|██████████| 225/225 [02:47<00:00,  1.34it/s]


Saved checkpoint at epoch 21

Epoch 21/30
Learning Rate: Backbone=0.000005
Train Loss: 0.1078
Eval Metrics:
  - Accuracy: 0.9562
  - ROC AUC: 0.9892
  - TAR@FAR1e-3: 0.7733
  - TAR@FAR1e-4: 0.6105
  - Threshold: 0.2699



  with autocast(dtype=torch.float16):
Epoch 22/30 [LR: 0.000005]: 100%|█████████▉| 199/200 [12:24<00:03,  3.74s/it, loss=0.1039]
  with autocast(dtype=torch.float16):
100%|██████████| 225/225 [02:53<00:00,  1.29it/s]


Saved best model: TAR@FAR1e-3=0.7752
Saved checkpoint at epoch 22

Epoch 22/30
Learning Rate: Backbone=0.000005
Train Loss: 0.1079
Eval Metrics:
  - Accuracy: 0.9562
  - ROC AUC: 0.9891
  - TAR@FAR1e-3: 0.7752
  - TAR@FAR1e-4: 0.6132
  - Threshold: 0.2714



  with autocast(dtype=torch.float16):
Epoch 23/30 [LR: 0.000005]: 100%|█████████▉| 199/200 [17:09<00:05,  5.17s/it, loss=0.0992]
  with autocast(dtype=torch.float16):
100%|██████████| 225/225 [02:26<00:00,  1.54it/s]


Saved checkpoint at epoch 23

Epoch 23/30
Learning Rate: Backbone=0.000005
Train Loss: 0.1064
Eval Metrics:
  - Accuracy: 0.9563
  - ROC AUC: 0.9893
  - TAR@FAR1e-3: 0.7762
  - TAR@FAR1e-4: 0.6174
  - Threshold: 0.2723



  with autocast(dtype=torch.float16):
Epoch 24/30 [LR: 0.000005]: 100%|█████████▉| 199/200 [12:32<00:03,  3.78s/it, loss=0.1061]
  with autocast(dtype=torch.float16):
100%|██████████| 225/225 [02:22<00:00,  1.58it/s]


Saved best model: TAR@FAR1e-3=0.7768
Saved checkpoint at epoch 24

Epoch 24/30
Learning Rate: Backbone=0.000005
Train Loss: 0.1064
Eval Metrics:
  - Accuracy: 0.9562
  - ROC AUC: 0.9892
  - TAR@FAR1e-3: 0.7768
  - TAR@FAR1e-4: 0.6129
  - Threshold: 0.2699



  with autocast(dtype=torch.float16):
Epoch 25/30 [LR: 0.000005]: 100%|█████████▉| 199/200 [12:20<00:03,  3.72s/it, loss=0.0984]
  with autocast(dtype=torch.float16):
100%|██████████| 225/225 [02:54<00:00,  1.29it/s]


Saved checkpoint at epoch 25

Epoch 25/30
Learning Rate: Backbone=0.000005
Train Loss: 0.1057
Eval Metrics:
  - Accuracy: 0.9563
  - ROC AUC: 0.9892
  - TAR@FAR1e-3: 0.7763
  - TAR@FAR1e-4: 0.6182
  - Threshold: 0.2707



  with autocast(dtype=torch.float16):
Epoch 26/30 [LR: 0.000005]: 100%|█████████▉| 199/200 [12:23<00:03,  3.74s/it, loss=0.0936]
  with autocast(dtype=torch.float16):
100%|██████████| 225/225 [03:03<00:00,  1.23it/s]


Saved checkpoint at epoch 26

Epoch 26/30
Learning Rate: Backbone=0.000005
Train Loss: 0.1051
Eval Metrics:
  - Accuracy: 0.9565
  - ROC AUC: 0.9893
  - TAR@FAR1e-3: 0.7767
  - TAR@FAR1e-4: 0.6192
  - Threshold: 0.2718



  with autocast(dtype=torch.float16):
Epoch 27/30 [LR: 0.000005]: 100%|█████████▉| 199/200 [12:26<00:03,  3.75s/it, loss=0.1059]
  with autocast(dtype=torch.float16):
100%|██████████| 225/225 [02:58<00:00,  1.26it/s]


Saved best model: TAR@FAR1e-3=0.7787
Saved checkpoint at epoch 27

Epoch 27/30
Learning Rate: Backbone=0.000005
Train Loss: 0.1048
Eval Metrics:
  - Accuracy: 0.9565
  - ROC AUC: 0.9892
  - TAR@FAR1e-3: 0.7787
  - TAR@FAR1e-4: 0.6169
  - Threshold: 0.2690



  with autocast(dtype=torch.float16):
Epoch 28/30 [LR: 0.000005]: 100%|█████████▉| 199/200 [12:15<00:03,  3.69s/it, loss=0.0998]
  with autocast(dtype=torch.float16):
100%|██████████| 225/225 [03:18<00:00,  1.13it/s]


Saved checkpoint at epoch 28

Epoch 28/30
Learning Rate: Backbone=0.000005
Train Loss: 0.1039
Eval Metrics:
  - Accuracy: 0.9567
  - ROC AUC: 0.9893
  - TAR@FAR1e-3: 0.7782
  - TAR@FAR1e-4: 0.6114
  - Threshold: 0.2667



  with autocast(dtype=torch.float16):
Epoch 29/30 [LR: 0.000005]: 100%|█████████▉| 199/200 [12:28<00:03,  3.76s/it, loss=0.1028]
  with autocast(dtype=torch.float16):
100%|██████████| 225/225 [03:01<00:00,  1.24it/s]


Saved best model: TAR@FAR1e-3=0.7799
Saved checkpoint at epoch 29

Epoch 29/30
Learning Rate: Backbone=0.000005
Train Loss: 0.1036
Eval Metrics:
  - Accuracy: 0.9570
  - ROC AUC: 0.9894
  - TAR@FAR1e-3: 0.7799
  - TAR@FAR1e-4: 0.6212
  - Threshold: 0.2705



  with autocast(dtype=torch.float16):
Epoch 30/30 [LR: 0.000005]: 100%|█████████▉| 199/200 [12:36<00:03,  3.80s/it, loss=0.1032]
  with autocast(dtype=torch.float16):
100%|██████████| 225/225 [02:59<00:00,  1.25it/s]


Saved best model: TAR@FAR1e-3=0.7820
Saved checkpoint at epoch 30

Epoch 30/30
Learning Rate: Backbone=0.000005
Train Loss: 0.1027
Eval Metrics:
  - Accuracy: 0.9568
  - ROC AUC: 0.9893
  - TAR@FAR1e-3: 0.7820
  - TAR@FAR1e-4: 0.6306
  - Threshold: 0.2706


Training completed! Best TAR@FAR1e-3: 0.7820
