In [1]:
!pip install -q /kaggle/input/facenet-pytorch-vggface2/facenet_pytorch-2.2.7-py3-none-any.whl

In [2]:
import os
import sys
import glob
from PIL import Image
import torch
from torch.utils.data import Dataset, Sampler, DataLoader
import random
from collections import defaultdict
from facenet_pytorch import InceptionResnetV1
import torch.nn as nn
import numpy as np
from torchvision import transforms
import torch.nn.functional as F
from tqdm import tqdm
import time
from sklearn.metrics import recall_score, precision_score
import itertools
import copy
import matplotlib.pyplot as plt
from torch.cuda.amp import autocast, GradScaler
SCORE_DIR = "/kaggle/input/arc-scores"
if SCORE_DIR not in sys.path:
    sys.path.append(SCORE_DIR)
import arc_scores
HELPER_DIR = "/kaggle/input/helper-py"
if HELPER_DIR not in sys.path:
    sys.path.append(HELPER_DIR)
import helper
EVULATE_DIR = "/kaggle/input/evulate"
if EVULATE_DIR not in sys.path:
    sys.path.append(EVULATE_DIR)
import evaluate

os.environ["PYTORCH_CUDA_ALLOC_CONF"] = "expandable_segments:True"

In [3]:
device = torch.device('cuda' if torch.cuda.is_available() else "cpu")
print(device)

cuda


In [4]:
class FaceFolderDataset(Dataset):
    def __init__(self, root, transform=None):
        self.root = root # directory
        self.transform = transform
        self.samples = []   # (img_path, label)
        self.labels = []

        persons = sorted(os.listdir(root))
        for label, person in enumerate(persons):
            self.labels.append(label)
            folder = os.path.join(root, person)
            if not os.path.isdir(folder):
                continue
            imgs = glob.glob(os.path.join(folder, "*"))
            for img_path in imgs:
                self.samples.append((img_path, label))

    def __len__(self):
        return len(self.samples)

    # allow to use [] to access the index
    def __getitem__(self, idx):
        path, label = self.samples[idx]
        img = Image.open(path).convert("RGB")
        if self.transform:
            img = self.transform(img)
        return img, label

In [5]:
class PKSampler(Sampler):
    def __init__(self, dataset, P, K, samples_per_class_per_epoch=5):
        """
        dataset: FaceFolderDataset object
        P: số classes mỗi batch
        K: số samples mỗi class
        """
        self.P = P
        self.K = K
        self.samples_per_class = samples_per_class_per_epoch
        self.label2indices = defaultdict(list)

        # lấy labels từ samples
        for idx, (_, label) in enumerate(dataset.samples):
            self.label2indices[label].append(idx)

        self.labels = list(self.label2indices.keys())
        self.n_classes = len(self.labels)

        # Tính số batches
        self.batches_per_epoch = (self.n_classes * self.samples_per_class + P - 1) // P

        total_images = sum(len(indices) for indices in self.label2indices.values())

        print(f"\n{'='*60}")
        print(f"PKSampler Info:")
        print(f"{'='*60}")
        print(f"Total images: {total_images}")
        print(f"Total classes: {self.n_classes}")
        print(f"Avg images/class: {total_images/self.n_classes:.1f}")
        print(f"Samples per class per epoch: {self.samples_per_class}")
        print(f"Batches per epoch: {self.batches_per_epoch}")
        print(f"Total samples per epoch: {self.batches_per_epoch * P * K}")
        print(f"Coverage: {(self.batches_per_epoch * P * K) / total_images * 100:.1f}%")
        print(f"Expected time per epoch: ~{self.batches_per_epoch * 0.15:.1f}s (assuming 0.15s/batch)")
        print(f"{'='*60}\n")

    def __iter__(self):
        # Mỗi class lặp lại samples_per_class lần
        class_pool = []
        for _ in range(self.samples_per_class):
            shuffled = self.labels.copy()
            random.shuffle(shuffled)
            class_pool.extend(shuffled)

        random.shuffle(class_pool)

        for batch_idx in range(self.batches_per_epoch):
            start = batch_idx * self.P
            batch_labels = class_pool[start:start + self.P]

            if len(batch_labels) < self.P:
                break

            batch = []
            for label in batch_labels:
                indices = self.label2indices[label]

                if len(indices) >= self.K:
                    selected = random.sample(indices, k=self.K)
                else:
                    selected = random.choices(indices, k=self.K)

                batch.extend(selected)

            yield from batch

    def __len__(self):
        return self.batches_per_epoch * self.P * self.K

In [6]:
class BatchHardTripletLoss(nn.Module):
    def __init__(self, margin=0.2, mode='semi_hard'):
        super().__init__()
        self.margin = margin
        self.mode = mode

    def update_strategy(self, mode):
        self.mode = mode

    def forward(self, embs, labels):
        dist = torch.cdist(embs, embs)

        labels = labels.view(-1, 1)
        pos_mask = labels.eq(labels.t())
        neg_mask = ~pos_mask

        eye = torch.eye(len(labels), dtype=torch.bool, device=labels.device)
        pos_mask[eye] = False

        # Kiểm tra có positive không (mỗi sample cần ít nhất 1 positive)
        valid_samples = pos_mask.any(dim=1)

        if self.mode == "semi_hard":
            # Dùng CLOSEST positive (theo paper gốc FaceNet)
            pos_dist = dist.clone()
            pos_dist[~pos_mask] = 1e9
            closest_pos = pos_dist.min(dim=1)[0]  # MIN - positive gần nhất

            neg_dist = dist.clone()
            neg_dist[~neg_mask] = 1e9

            # Semi-hard negative: d(anchor, pos) < d(anchor, neg) < d(anchor, pos) + margin
            semi_mask = (neg_dist > closest_pos.unsqueeze(1)) & \
                        (neg_dist < (closest_pos + self.margin).unsqueeze(1))

            semi_neg = neg_dist.clone()
            semi_neg[~semi_mask] = 1e9
            semi_neg = semi_neg.min(dim=1)[0]

            # Fallback: nếu không tìm được semi-hard, dùng hardest negative
            hardest_neg = neg_dist.min(dim=1)[0]
            semi_neg = torch.where(semi_neg >= 1e6, hardest_neg, semi_neg)

            loss = torch.relu(closest_pos - semi_neg + self.margin)

            # Chỉ tính loss cho samples có positive
            if valid_samples.all():
                return loss.mean()
            else:
                return loss[valid_samples].mean() if valid_samples.any() else torch.tensor(0.0, device=embs.device)

        # Batch-hard mode: dùng hardest positive và hardest negative
        # Hardest positive: positive xa nhất
        pos_dist = dist.clone()
        pos_dist[~pos_mask] = -1e9
        hardest_pos = pos_dist.max(dim=1)[0]

        # Hardest negative: negative gần nhất
        neg_dist = dist.clone()
        neg_dist[~neg_mask] = 1e9
        hardest_neg = neg_dist.min(dim=1)[0]

        loss = torch.relu(hardest_pos - hardest_neg + self.margin)

        # Chỉ tính loss cho samples có positive
        if valid_samples.all():
            return loss.mean()
        else:
            return loss[valid_samples].mean() if valid_samples.any() else torch.tensor(0.0, device=embs.device)

In [7]:
# def pairwise_distance_torch(embeddings, device):
#     """Computes the pairwise distance matrix with numerical stability.
#     output[i, j] = || feature[i, :] - feature[j, :] ||_2
#     Args:
#       embeddings: 2-D Tensor of size [number of data, feature dimension].
#     Returns:
#       pairwise_distances: 2-D Tensor of size [number of data, number of data].
#     """

#     # pairwise distance matrix with precise embeddings
#     precise_embeddings = embeddings.to(dtype=torch.float32)

#     c1 = torch.pow(precise_embeddings, 2).sum(axis=-1)
#     c2 = torch.pow(precise_embeddings.transpose(0, 1), 2).sum(axis=0)
#     c3 = precise_embeddings @ precise_embeddings.transpose(0, 1)

#     c1 = c1.reshape((c1.shape[0], 1))
#     c2 = c2.reshape((1, c2.shape[0]))
#     c12 = c1 + c2
#     pairwise_distances_squared = c12 - 2.0 * c3

#     # Deal with numerical inaccuracies. Set small negatives to zero.
#     pairwise_distances_squared = torch.max(pairwise_distances_squared, torch.tensor([0.]).to(device))
#     # Get the mask where the zero distances are at.
#     error_mask = pairwise_distances_squared.clone()
#     error_mask[error_mask > 0.0] = 1.
#     error_mask[error_mask <= 0.0] = 0.

#     pairwise_distances = torch.mul(pairwise_distances_squared, error_mask)

#     # Explicitly set diagonals to zero.
#     mask_offdiagonals = torch.ones((pairwise_distances.shape[0], pairwise_distances.shape[1])) - torch.diag(torch.ones(pairwise_distances.shape[0]))
#     pairwise_distances = torch.mul(pairwise_distances.to(device), mask_offdiagonals.to(device))
#     return pairwise_distances

# def TripletSemiHardLoss(y_true, y_pred, device, margin=0.5):
#     """Computes the triplet loss_functions with semi-hard negative mining.
#        The loss_functions encourages the positive distances (between a pair of embeddings
#        with the same labels) to be smaller than the minimum negative distance
#        among which are at least greater than the positive distance plus the
#        margin constant (called semi-hard negative) in the mini-batch.
#        If no such negative exists, uses the largest negative distance instead.
#        See: https://arxiv.org/abs/1503.03832.
#        We expect labels `y_true` to be provided as 1-D integer `Tensor` with shape
#        [batch_size] of multi-class integer labels. And embeddings `y_pred` must be
#        2-D float `Tensor` of l2 normalized embedding vectors.
#        Args:
#          margin: Float, margin term in the loss_functions definition. Default value is 1.0.
#          name: Optional name for the op.
#        """

#     labels, embeddings = y_true, y_pred

#     # Reshape label tensor to [batch_size, 1].
#     lshape = labels.shape
#     labels = torch.reshape(labels, [lshape[0], 1])

#     pdist_matrix = pairwise_distance_torch(embeddings, device)

#     # Build pairwise binary adjacency matrix.
#     adjacency = torch.eq(labels, labels.transpose(0, 1))
#     # Invert so we can select negatives only.
#     adjacency_not = adjacency.logical_not()

#     batch_size = labels.shape[0]

#     # Compute the mask.
#     pdist_matrix_tile = pdist_matrix.repeat(batch_size, 1)
#     adjacency_not_tile = adjacency_not.repeat(batch_size, 1)

#     transpose_reshape = pdist_matrix.transpose(0, 1).reshape(-1, 1)
#     greater = pdist_matrix_tile > transpose_reshape

#     mask = adjacency_not_tile & greater

#     # final mask
#     mask_step = mask.to(dtype=torch.float32)
#     mask_step = mask_step.sum(axis=1)
#     mask_step = mask_step > 0.0
#     mask_final = mask_step.reshape(batch_size, batch_size)
#     mask_final = mask_final.transpose(0, 1)

#     adjacency_not = adjacency_not.to(dtype=torch.float32)
#     mask = mask.to(dtype=torch.float32)

#     # negatives_outside: smallest D_an where D_an > D_ap.
#     axis_maximums = torch.max(pdist_matrix_tile, dim=1, keepdim=True)
#     masked_minimums = torch.min(torch.mul(pdist_matrix_tile - axis_maximums[0], mask), dim=1, keepdim=True)[0] + axis_maximums[0]
#     negatives_outside = masked_minimums.reshape([batch_size, batch_size])
#     negatives_outside = negatives_outside.transpose(0, 1)

#     # negatives_inside: largest D_an.
#     axis_minimums = torch.min(pdist_matrix, dim=1, keepdim=True)
#     masked_maximums = torch.max(torch.mul(pdist_matrix - axis_minimums[0], adjacency_not), dim=1, keepdim=True)[0] + axis_minimums[0]
#     negatives_inside = masked_maximums.repeat(1, batch_size)

#     semi_hard_negatives = torch.where(mask_final, negatives_outside, negatives_inside)

#     loss_mat = margin + pdist_matrix - semi_hard_negatives

#     mask_positives = adjacency.to(dtype=torch.float32) - torch.diag(torch.ones(batch_size)).to(device)
#     num_positives = mask_positives.sum()

#     triplet_loss = (torch.max(torch.mul(loss_mat, mask_positives), torch.tensor([0.]).to(device))).sum() / num_positives
#     triplet_loss = triplet_loss.to(dtype=embeddings.dtype)
#     return triplet_loss


# class TripletLoss(nn.Module):
#     def __init__(self, device):
#         super().__init__()
#         self.device = device

#     def forward(self, input, target, **kwargs):
#         return TripletSemiHardLoss(target, input, self.device)

In [8]:
train_transform = transforms.Compose([
    transforms.Resize((160, 160)),
    transforms.RandomHorizontalFlip(p=0.5),
    transforms.ToTensor(),
    transforms.Normalize([0.485,0.456,0.406],
                         [0.229,0.224,0.225])
])
val_transform = transforms.Compose([
    transforms.Resize((160, 160)),
    transforms.ToTensor(),
    transforms.Normalize([0.485,0.456,0.406],
                         [0.229,0.224,0.225])
])

In [9]:
train_path = '/kaggle/input/train-ds/train'
val_path = '/kaggle/input/val-ds/val'

P, K = 256, 4

train_dataset = FaceFolderDataset(train_path, transform=train_transform)
val_dataset = FaceFolderDataset(val_path, transform=val_transform)

# CÁCH 1: Truyền dataset object
train_sampler = PKSampler(
    dataset=train_dataset,
    P=P,
    K=K,
    samples_per_class_per_epoch=10  # Mỗi class sample 5 lần/epoch
)


train_loader = DataLoader(
    dataset=train_dataset,
    batch_size=P*K,
    sampler=train_sampler,
    num_workers=2,
    pin_memory=True,
    persistent_workers=True
)
val_loader = DataLoader(
    dataset=val_dataset,
    batch_size=512,
    num_workers=2,
    shuffle=False,
    pin_memory=True,
    persistent_workers=True
)


PKSampler Info:
Total images: 1119807
Total classes: 5115
Avg images/class: 218.9
Samples per class per epoch: 10
Batches per epoch: 200
Total samples per epoch: 204800
Coverage: 18.3%
Expected time per epoch: ~30.0s (assuming 0.15s/batch)



In [10]:
class EarlyStopping:
    def __init__(self, patience=5, epsilon=0.001, save_path="best.pt"):
        self.patience = patience
        self.save_path = save_path
        self.epsilon = epsilon
        self.best_acc = -1
        self.counter = 0
        self.should_stop = False
        self.phase = 1

    def step(self, val_acc, model):
        if val_acc > self.best_acc + self.epsilon:
            self.best_acc = val_acc
            self.counter = 0

            model_to_save = model.module if hasattr(model, 'module') else model

            checkpoint = {
                'model_state_dict': model_to_save.state_dict(),
                'best_tar_far': self.best_acc
            }
            torch.save(checkpoint, self.save_path)
            print(f"Saved best model: TAR@FAR1e-3={val_acc:.4f}")
        else:
            self.counter += 1
            if self.counter >= self.patience:
                if self.phase == 1:
                    print("!!! UPDATE STRATEGY TO HARD MINING !!!")
                    self.phase = 2
                    self.counter = 0
                    self.patience = 5
                elif self.phase == 2:
                    self.should_stop = True
                    print("Early stopping triggered!")

In [11]:
class CNN(nn.Module):
    def __init__(self, embedding_dim=4096):
        super().__init__()

        # Phần CNN trích feature map
        self.cnn = nn.Sequential(
            # 160 -> 151
            nn.Conv2d(3, 64, kernel_size=10),
            nn.ReLU(inplace=True),
            nn.MaxPool2d(2),  # 151 -> 75

            # 75 -> 69
            nn.Conv2d(64, 128, kernel_size=7),
            nn.ReLU(inplace=True),
            nn.MaxPool2d(2),  # 69 -> 34

            # 34 -> 31
            nn.Conv2d(128, 128, kernel_size=4),
            nn.ReLU(inplace=True),
            nn.MaxPool2d(2),  # 31 -> 15

            # 15 -> 12
            nn.Conv2d(128, 256, kernel_size=4),
            nn.ReLU(inplace=True)
        )

        # Tính kích thước sau CNN để gán cho FC
        # Với input 160x160 -> output 256x12x12
        self.fc = nn.Sequential(
            nn.Flatten(),
            nn.Linear(256 * 12 * 12, embedding_dim),
            nn.PReLU(),  # tốt hơn Sigmoid với face embedding
            nn.Linear(embedding_dim, embedding_dim),
        )

    def forward(self, x):
        x = self.cnn(x)
        x = self.fc(x)
        # Chuẩn hóa L2 để tính khoảng cách trong không gian metric
        return F.normalize(x, p=2, dim=1)


In [12]:
NUM_EPOCHS = 60
learning_rate = 1e-4

# model = InceptionResnetV1(pretrained='casia-webface')
model = CNN(embedding_dim=512).to(device)

optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate, weight_decay=5e-4, betas=(0.9, 0.999))
criterion = BatchHardTripletLoss(margin=0.5, mode='semi_hard')

In [13]:
def train():
    train_losses = []
    test_losses = []
    tarfar3s = []
    tarfar4s = []
    accs = []
    rocs = []

    # early stopping
    early = EarlyStopping(patience=3, save_path="best.pt")
    is_update = False

    # FP16 scaler
    scaler = GradScaler()

    for epoch in range(NUM_EPOCHS):
        model.train()
        train_loss = []

        pbar = tqdm(
            enumerate(train_loader),
            total=len(train_loader),
            desc=f"Epoch {epoch+1}/{NUM_EPOCHS} [LR: {learning_rate:.6f}]"
        )

        for step, (inputs, targets) in pbar:
            inputs, targets = inputs.to(device), targets.to(device)

            optimizer.zero_grad()

            # ===========================
            #       FP16 TRAINING
            # ===========================
            with autocast(dtype=torch.float16):
                outputs = model(inputs)
                outputs = F.normalize(outputs, p=2, dim=1)
                loss = criterion(outputs, targets)

            # Scaler backward
            scaler.scale(loss).backward()

            # Clip grad norm (có scale)
            scaler.unscale_(optimizer)
            torch.nn.utils.clip_grad_norm_(model.parameters(), 5.0)

            # Step optimizer
            scaler.step(optimizer)
            scaler.update()

            train_loss.append(loss.item())
            pbar.set_postfix({'loss': f'{loss.item():.4f}'})

        train_loss = np.mean(train_loss)

        # ==================== EVALUATION ====================
        model.eval()
        test_loss = []
        embs = []
        labels = []

        with torch.no_grad():
            # Evaluation không cần autocast
            for inputs, targets in tqdm(val_loader):
                inputs, targets = inputs.to(device), targets.to(device)

                with autocast(dtype=torch.float16):
                    outputs = model(inputs)
                    outputs = F.normalize(outputs, p=2, dim=1)
                    loss = criterion(outputs, targets)

                test_loss.append(loss.item())
                embs.append(outputs.cpu())
                labels.append(targets.cpu())

        test_loss = np.mean(test_loss)

        eval_res = evaluate.evaluate(embs, labels, max_per_class=50, n_linspace=1000)

        # early stopping
        early.step(eval_res["tar_far_3"], model)

        # save ckpt + print
        helper.save_checkpoint(model, epoch, optimizer, train_loss,
                               eval_res["tar_far_3"], eval_res["tar_far_4"])
        helper.print_results(optimizer, epoch, NUM_EPOCHS, train_loss, eval_res)

        # save scores
        train_losses.append(train_loss)
        test_losses.append(test_loss)
        tarfar3s.append(eval_res["tar_far_3"])
        tarfar4s.append(eval_res["tar_far_4"])
        accs.append(eval_res["accuracy"])
        rocs.append(eval_res["roc_auc"])

        # Switch to HARD mining
        if early.phase == 2 and not is_update:
            is_update = True
            criterion.update_strategy("hard")
            print("Switch loss to HARD mining!")

        if early.should_stop:
            break

    print(f"\nTraining completed! Best TAR@FAR1e-3: {early.best_acc:.4f}")

    return train_losses, test_losses, tarfar3s, tarfar4s, accs, rocs

In [14]:
train_losses, test_losses, tarfar3s, tarfar4s, accs, rocs = train()

  scaler = GradScaler()
  with autocast(dtype=torch.float16):
Epoch 1/60 [LR: 0.000100]: 100%|█████████▉| 199/200 [12:17<00:03,  3.71s/it, loss=0.4702]
  with autocast(dtype=torch.float16):
100%|██████████| 225/225 [07:02<00:00,  1.88s/it]


Saved best model: TAR@FAR1e-3=0.0921
Saved checkpoint at epoch 1

Epoch 1/60
Learning Rate: Backbone=0.000100
Train Loss: 0.4819
Eval Metrics:
  - Accuracy: 0.7777
  - ROC AUC: 0.8563
  - TAR@FAR1e-3: 0.0921
  - TAR@FAR1e-4: 0.0297
  - Threshold: 0.3959



  with autocast(dtype=torch.float16):
Epoch 2/60 [LR: 0.000100]: 100%|█████████▉| 199/200 [11:01<00:03,  3.32s/it, loss=0.4603]
  with autocast(dtype=torch.float16):
100%|██████████| 225/225 [03:03<00:00,  1.23it/s]


Saved best model: TAR@FAR1e-3=0.1552
Saved checkpoint at epoch 2

Epoch 2/60
Learning Rate: Backbone=0.000100
Train Loss: 0.4634
Eval Metrics:
  - Accuracy: 0.8195
  - ROC AUC: 0.9006
  - TAR@FAR1e-3: 0.1552
  - TAR@FAR1e-4: 0.0619
  - Threshold: 0.3438



  with autocast(dtype=torch.float16):
Epoch 3/60 [LR: 0.000100]: 100%|█████████▉| 199/200 [10:33<00:03,  3.18s/it, loss=0.4427]
  with autocast(dtype=torch.float16):
100%|██████████| 225/225 [03:11<00:00,  1.17it/s]


Saved best model: TAR@FAR1e-3=0.1816
Saved checkpoint at epoch 3

Epoch 3/60
Learning Rate: Backbone=0.000100
Train Loss: 0.4507
Eval Metrics:
  - Accuracy: 0.8364
  - ROC AUC: 0.9161
  - TAR@FAR1e-3: 0.1816
  - TAR@FAR1e-4: 0.0745
  - Threshold: 0.3023



  with autocast(dtype=torch.float16):
Epoch 4/60 [LR: 0.000100]: 100%|█████████▉| 199/200 [09:57<00:03,  3.00s/it, loss=0.4410]
  with autocast(dtype=torch.float16):
100%|██████████| 225/225 [03:02<00:00,  1.23it/s]


Saved best model: TAR@FAR1e-3=0.2199
Saved checkpoint at epoch 4

Epoch 4/60
Learning Rate: Backbone=0.000100
Train Loss: 0.4401
Eval Metrics:
  - Accuracy: 0.8485
  - ROC AUC: 0.9270
  - TAR@FAR1e-3: 0.2199
  - TAR@FAR1e-4: 0.0924
  - Threshold: 0.3021



  with autocast(dtype=torch.float16):
Epoch 5/60 [LR: 0.000100]: 100%|█████████▉| 199/200 [08:47<00:02,  2.65s/it, loss=0.4299]
  with autocast(dtype=torch.float16):
100%|██████████| 225/225 [03:03<00:00,  1.23it/s]


Saved best model: TAR@FAR1e-3=0.2479
Saved checkpoint at epoch 5

Epoch 5/60
Learning Rate: Backbone=0.000100
Train Loss: 0.4307
Eval Metrics:
  - Accuracy: 0.8558
  - ROC AUC: 0.9331
  - TAR@FAR1e-3: 0.2479
  - TAR@FAR1e-4: 0.1044
  - Threshold: 0.2688



  with autocast(dtype=torch.float16):
Epoch 6/60 [LR: 0.000100]: 100%|█████████▉| 199/200 [08:57<00:02,  2.70s/it, loss=0.4190]
  with autocast(dtype=torch.float16):
100%|██████████| 225/225 [03:46<00:00,  1.01s/it]


Saved best model: TAR@FAR1e-3=0.2619
Saved checkpoint at epoch 6

Epoch 6/60
Learning Rate: Backbone=0.000100
Train Loss: 0.4227
Eval Metrics:
  - Accuracy: 0.8636
  - ROC AUC: 0.9386
  - TAR@FAR1e-3: 0.2619
  - TAR@FAR1e-4: 0.1211
  - Threshold: 0.2863



  with autocast(dtype=torch.float16):
Epoch 7/60 [LR: 0.000100]: 100%|█████████▉| 199/200 [09:12<00:02,  2.78s/it, loss=0.4179]
  with autocast(dtype=torch.float16):
100%|██████████| 225/225 [03:09<00:00,  1.19it/s]


Saved best model: TAR@FAR1e-3=0.3008
Saved checkpoint at epoch 7

Epoch 7/60
Learning Rate: Backbone=0.000100
Train Loss: 0.4155
Eval Metrics:
  - Accuracy: 0.8666
  - ROC AUC: 0.9412
  - TAR@FAR1e-3: 0.3008
  - TAR@FAR1e-4: 0.1392
  - Threshold: 0.2779



  with autocast(dtype=torch.float16):
Epoch 8/60 [LR: 0.000100]: 100%|█████████▉| 199/200 [09:11<00:02,  2.77s/it, loss=0.3982]
  with autocast(dtype=torch.float16):
100%|██████████| 225/225 [03:28<00:00,  1.08it/s]


Saved best model: TAR@FAR1e-3=0.3063
Saved checkpoint at epoch 8

Epoch 8/60
Learning Rate: Backbone=0.000100
Train Loss: 0.4087
Eval Metrics:
  - Accuracy: 0.8685
  - ROC AUC: 0.9420
  - TAR@FAR1e-3: 0.3063
  - TAR@FAR1e-4: 0.1481
  - Threshold: 0.2816



  with autocast(dtype=torch.float16):
Epoch 9/60 [LR: 0.000100]: 100%|█████████▉| 199/200 [09:00<00:02,  2.71s/it, loss=0.3940]
  with autocast(dtype=torch.float16):
100%|██████████| 225/225 [03:39<00:00,  1.03it/s]


Saved best model: TAR@FAR1e-3=0.3335
Saved checkpoint at epoch 9

Epoch 9/60
Learning Rate: Backbone=0.000100
Train Loss: 0.4029
Eval Metrics:
  - Accuracy: 0.8762
  - ROC AUC: 0.9488
  - TAR@FAR1e-3: 0.3335
  - TAR@FAR1e-4: 0.1820
  - Threshold: 0.2728



  with autocast(dtype=torch.float16):
Epoch 10/60 [LR: 0.000100]: 100%|█████████▉| 199/200 [09:16<00:02,  2.79s/it, loss=0.3984]
  with autocast(dtype=torch.float16):
100%|██████████| 225/225 [03:30<00:00,  1.07it/s]


Saved best model: TAR@FAR1e-3=0.3451
Saved checkpoint at epoch 10

Epoch 10/60
Learning Rate: Backbone=0.000100
Train Loss: 0.3967
Eval Metrics:
  - Accuracy: 0.8809
  - ROC AUC: 0.9507
  - TAR@FAR1e-3: 0.3451
  - TAR@FAR1e-4: 0.1704
  - Threshold: 0.2610



  with autocast(dtype=torch.float16):
Epoch 11/60 [LR: 0.000100]: 100%|█████████▉| 199/200 [09:04<00:02,  2.74s/it, loss=0.3980]
  with autocast(dtype=torch.float16):
100%|██████████| 225/225 [02:59<00:00,  1.25it/s]


Saved checkpoint at epoch 11

Epoch 11/60
Learning Rate: Backbone=0.000100
Train Loss: 0.3914
Eval Metrics:
  - Accuracy: 0.8796
  - ROC AUC: 0.9490
  - TAR@FAR1e-3: 0.3348
  - TAR@FAR1e-4: 0.1525
  - Threshold: 0.2707



  with autocast(dtype=torch.float16):
Epoch 12/60 [LR: 0.000100]: 100%|█████████▉| 199/200 [08:21<00:02,  2.52s/it, loss=0.3797]
  with autocast(dtype=torch.float16):
100%|██████████| 225/225 [02:32<00:00,  1.48it/s]


Saved best model: TAR@FAR1e-3=0.3585
Saved checkpoint at epoch 12

Epoch 12/60
Learning Rate: Backbone=0.000100
Train Loss: 0.3872
Eval Metrics:
  - Accuracy: 0.8849
  - ROC AUC: 0.9537
  - TAR@FAR1e-3: 0.3585
  - TAR@FAR1e-4: 0.1910
  - Threshold: 0.2595



  with autocast(dtype=torch.float16):
Epoch 13/60 [LR: 0.000100]: 100%|█████████▉| 199/200 [08:09<00:02,  2.46s/it, loss=0.3903]
  with autocast(dtype=torch.float16):
100%|██████████| 225/225 [02:57<00:00,  1.26it/s]


Saved best model: TAR@FAR1e-3=0.3667
Saved checkpoint at epoch 13

Epoch 13/60
Learning Rate: Backbone=0.000100
Train Loss: 0.3823
Eval Metrics:
  - Accuracy: 0.8864
  - ROC AUC: 0.9538
  - TAR@FAR1e-3: 0.3667
  - TAR@FAR1e-4: 0.2003
  - Threshold: 0.2628



  with autocast(dtype=torch.float16):
Epoch 14/60 [LR: 0.000100]: 100%|█████████▉| 199/200 [08:01<00:02,  2.42s/it, loss=0.3805]
  with autocast(dtype=torch.float16):
100%|██████████| 225/225 [02:46<00:00,  1.35it/s]


Saved best model: TAR@FAR1e-3=0.3926
Saved checkpoint at epoch 14

Epoch 14/60
Learning Rate: Backbone=0.000100
Train Loss: 0.3784
Eval Metrics:
  - Accuracy: 0.8889
  - ROC AUC: 0.9558
  - TAR@FAR1e-3: 0.3926
  - TAR@FAR1e-4: 0.2119
  - Threshold: 0.2550



  with autocast(dtype=torch.float16):
Epoch 15/60 [LR: 0.000100]: 100%|█████████▉| 199/200 [07:39<00:02,  2.31s/it, loss=0.3790]
  with autocast(dtype=torch.float16):
100%|██████████| 225/225 [02:28<00:00,  1.52it/s]


Saved best model: TAR@FAR1e-3=0.3968
Saved checkpoint at epoch 15

Epoch 15/60
Learning Rate: Backbone=0.000100
Train Loss: 0.3748
Eval Metrics:
  - Accuracy: 0.8927
  - ROC AUC: 0.9587
  - TAR@FAR1e-3: 0.3968
  - TAR@FAR1e-4: 0.2293
  - Threshold: 0.2609



  with autocast(dtype=torch.float16):
Epoch 16/60 [LR: 0.000100]: 100%|█████████▉| 199/200 [07:32<00:02,  2.27s/it, loss=0.3749]
  with autocast(dtype=torch.float16):
100%|██████████| 225/225 [02:35<00:00,  1.45it/s]


Saved best model: TAR@FAR1e-3=0.4060
Saved checkpoint at epoch 16

Epoch 16/60
Learning Rate: Backbone=0.000100
Train Loss: 0.3705
Eval Metrics:
  - Accuracy: 0.8947
  - ROC AUC: 0.9600
  - TAR@FAR1e-3: 0.4060
  - TAR@FAR1e-4: 0.2302
  - Threshold: 0.2529



  with autocast(dtype=torch.float16):
Epoch 17/60 [LR: 0.000100]: 100%|█████████▉| 199/200 [07:28<00:02,  2.26s/it, loss=0.3777]
  with autocast(dtype=torch.float16):
100%|██████████| 225/225 [02:32<00:00,  1.47it/s]


Saved best model: TAR@FAR1e-3=0.4201
Saved checkpoint at epoch 17

Epoch 17/60
Learning Rate: Backbone=0.000100
Train Loss: 0.3673
Eval Metrics:
  - Accuracy: 0.8960
  - ROC AUC: 0.9606
  - TAR@FAR1e-3: 0.4201
  - TAR@FAR1e-4: 0.2251
  - Threshold: 0.2594



  with autocast(dtype=torch.float16):
Epoch 18/60 [LR: 0.000100]: 100%|█████████▉| 199/200 [07:27<00:02,  2.25s/it, loss=0.3534]
  with autocast(dtype=torch.float16):
100%|██████████| 225/225 [02:29<00:00,  1.50it/s]


Saved checkpoint at epoch 18

Epoch 18/60
Learning Rate: Backbone=0.000100
Train Loss: 0.3632
Eval Metrics:
  - Accuracy: 0.8964
  - ROC AUC: 0.9600
  - TAR@FAR1e-3: 0.4085
  - TAR@FAR1e-4: 0.2140
  - Threshold: 0.2532



  with autocast(dtype=torch.float16):
Epoch 19/60 [LR: 0.000100]: 100%|█████████▉| 199/200 [07:22<00:02,  2.22s/it, loss=0.3601]
  with autocast(dtype=torch.float16):
100%|██████████| 225/225 [02:32<00:00,  1.48it/s]


Saved best model: TAR@FAR1e-3=0.4285
Saved checkpoint at epoch 19

Epoch 19/60
Learning Rate: Backbone=0.000100
Train Loss: 0.3613
Eval Metrics:
  - Accuracy: 0.8951
  - ROC AUC: 0.9593
  - TAR@FAR1e-3: 0.4285
  - TAR@FAR1e-4: 0.2454
  - Threshold: 0.2429



  with autocast(dtype=torch.float16):
Epoch 20/60 [LR: 0.000100]: 100%|█████████▉| 199/200 [08:16<00:02,  2.49s/it, loss=0.3616]
  with autocast(dtype=torch.float16):
100%|██████████| 225/225 [02:34<00:00,  1.45it/s]


Saved best model: TAR@FAR1e-3=0.4383
Saved checkpoint at epoch 20

Epoch 20/60
Learning Rate: Backbone=0.000100
Train Loss: 0.3574
Eval Metrics:
  - Accuracy: 0.8989
  - ROC AUC: 0.9622
  - TAR@FAR1e-3: 0.4383
  - TAR@FAR1e-4: 0.2587
  - Threshold: 0.2610



  with autocast(dtype=torch.float16):
Epoch 21/60 [LR: 0.000100]: 100%|█████████▉| 199/200 [07:36<00:02,  2.29s/it, loss=0.3533]
  with autocast(dtype=torch.float16):
100%|██████████| 225/225 [02:35<00:00,  1.45it/s]


Saved best model: TAR@FAR1e-3=0.4467
Saved checkpoint at epoch 21

Epoch 21/60
Learning Rate: Backbone=0.000100
Train Loss: 0.3546
Eval Metrics:
  - Accuracy: 0.9005
  - ROC AUC: 0.9628
  - TAR@FAR1e-3: 0.4467
  - TAR@FAR1e-4: 0.2568
  - Threshold: 0.2554



  with autocast(dtype=torch.float16):
Epoch 22/60 [LR: 0.000100]: 100%|█████████▉| 199/200 [07:12<00:02,  2.17s/it, loss=0.3464]
  with autocast(dtype=torch.float16):
100%|██████████| 225/225 [03:08<00:00,  1.20it/s]


Saved checkpoint at epoch 22

Epoch 22/60
Learning Rate: Backbone=0.000100
Train Loss: 0.3525
Eval Metrics:
  - Accuracy: 0.9000
  - ROC AUC: 0.9623
  - TAR@FAR1e-3: 0.4387
  - TAR@FAR1e-4: 0.2547
  - Threshold: 0.2441



  with autocast(dtype=torch.float16):
Epoch 23/60 [LR: 0.000100]: 100%|█████████▉| 199/200 [07:21<00:02,  2.22s/it, loss=0.3556]
  with autocast(dtype=torch.float16):
100%|██████████| 225/225 [03:05<00:00,  1.21it/s]


Saved best model: TAR@FAR1e-3=0.4522
Saved checkpoint at epoch 23

Epoch 23/60
Learning Rate: Backbone=0.000100
Train Loss: 0.3503
Eval Metrics:
  - Accuracy: 0.9024
  - ROC AUC: 0.9635
  - TAR@FAR1e-3: 0.4522
  - TAR@FAR1e-4: 0.2537
  - Threshold: 0.2447



  with autocast(dtype=torch.float16):
Epoch 24/60 [LR: 0.000100]: 100%|█████████▉| 199/200 [07:13<00:02,  2.18s/it, loss=0.3391]
  with autocast(dtype=torch.float16):
100%|██████████| 225/225 [02:50<00:00,  1.32it/s]


Saved best model: TAR@FAR1e-3=0.4547
Saved checkpoint at epoch 24

Epoch 24/60
Learning Rate: Backbone=0.000100
Train Loss: 0.3481
Eval Metrics:
  - Accuracy: 0.9002
  - ROC AUC: 0.9623
  - TAR@FAR1e-3: 0.4547
  - TAR@FAR1e-4: 0.2714
  - Threshold: 0.2475



  with autocast(dtype=torch.float16):
Epoch 25/60 [LR: 0.000100]: 100%|█████████▉| 199/200 [07:14<00:02,  2.18s/it, loss=0.3530]
  with autocast(dtype=torch.float16):
100%|██████████| 225/225 [02:28<00:00,  1.52it/s]


Saved best model: TAR@FAR1e-3=0.4635
Saved checkpoint at epoch 25

Epoch 25/60
Learning Rate: Backbone=0.000100
Train Loss: 0.3455
Eval Metrics:
  - Accuracy: 0.9045
  - ROC AUC: 0.9645
  - TAR@FAR1e-3: 0.4635
  - TAR@FAR1e-4: 0.2855
  - Threshold: 0.2464



  with autocast(dtype=torch.float16):
Epoch 26/60 [LR: 0.000100]: 100%|█████████▉| 199/200 [07:14<00:02,  2.19s/it, loss=0.3403]
  with autocast(dtype=torch.float16):
100%|██████████| 225/225 [02:30<00:00,  1.50it/s]


Saved checkpoint at epoch 26

Epoch 26/60
Learning Rate: Backbone=0.000100
Train Loss: 0.3429
Eval Metrics:
  - Accuracy: 0.9042
  - ROC AUC: 0.9642
  - TAR@FAR1e-3: 0.4585
  - TAR@FAR1e-4: 0.2661
  - Threshold: 0.2557



  with autocast(dtype=torch.float16):
Epoch 27/60 [LR: 0.000100]: 100%|█████████▉| 199/200 [07:26<00:02,  2.25s/it, loss=0.3487]
  with autocast(dtype=torch.float16):
100%|██████████| 225/225 [02:26<00:00,  1.54it/s]


Saved best model: TAR@FAR1e-3=0.4748
Saved checkpoint at epoch 27

Epoch 27/60
Learning Rate: Backbone=0.000100
Train Loss: 0.3409
Eval Metrics:
  - Accuracy: 0.9058
  - ROC AUC: 0.9658
  - TAR@FAR1e-3: 0.4748
  - TAR@FAR1e-4: 0.2871
  - Threshold: 0.2502



  with autocast(dtype=torch.float16):
Epoch 28/60 [LR: 0.000100]: 100%|█████████▉| 199/200 [07:13<00:02,  2.18s/it, loss=0.3327]
  with autocast(dtype=torch.float16):
100%|██████████| 225/225 [02:25<00:00,  1.54it/s]


Saved checkpoint at epoch 28

Epoch 28/60
Learning Rate: Backbone=0.000100
Train Loss: 0.3396
Eval Metrics:
  - Accuracy: 0.9054
  - ROC AUC: 0.9650
  - TAR@FAR1e-3: 0.4753
  - TAR@FAR1e-4: 0.2829
  - Threshold: 0.2583



  with autocast(dtype=torch.float16):
Epoch 29/60 [LR: 0.000100]: 100%|█████████▉| 199/200 [07:35<00:02,  2.29s/it, loss=0.3319]
  with autocast(dtype=torch.float16):
100%|██████████| 225/225 [03:17<00:00,  1.14it/s]


Saved checkpoint at epoch 29

Epoch 29/60
Learning Rate: Backbone=0.000100
Train Loss: 0.3379
Eval Metrics:
  - Accuracy: 0.9050
  - ROC AUC: 0.9651
  - TAR@FAR1e-3: 0.4686
  - TAR@FAR1e-4: 0.2807
  - Threshold: 0.2566



  with autocast(dtype=torch.float16):
Epoch 30/60 [LR: 0.000100]: 100%|█████████▉| 199/200 [07:04<00:02,  2.13s/it, loss=0.3472]
  with autocast(dtype=torch.float16):
100%|██████████| 225/225 [02:24<00:00,  1.56it/s]


!!! UPDATE STRATEGY TO HARD MINING !!!
Saved checkpoint at epoch 30

Epoch 30/60
Learning Rate: Backbone=0.000100
Train Loss: 0.3356
Eval Metrics:
  - Accuracy: 0.9050
  - ROC AUC: 0.9645
  - TAR@FAR1e-3: 0.4743
  - TAR@FAR1e-4: 0.2833
  - Threshold: 0.2522

Switch loss to HARD mining!


  with autocast(dtype=torch.float16):
Epoch 31/60 [LR: 0.000100]: 100%|█████████▉| 199/200 [07:04<00:02,  2.13s/it, loss=0.5012]
  with autocast(dtype=torch.float16):
100%|██████████| 225/225 [02:58<00:00,  1.26it/s]


Saved checkpoint at epoch 31

Epoch 31/60
Learning Rate: Backbone=0.000100
Train Loss: 0.5096
Eval Metrics:
  - Accuracy: 0.8112
  - ROC AUC: 0.8941
  - TAR@FAR1e-3: 0.1508
  - TAR@FAR1e-4: 0.0600
  - Threshold: 1.0000



  with autocast(dtype=torch.float16):
Epoch 32/60 [LR: 0.000100]: 100%|█████████▉| 199/200 [07:04<00:02,  2.13s/it, loss=0.5008]
  with autocast(dtype=torch.float16):
100%|██████████| 225/225 [02:25<00:00,  1.55it/s]


Saved checkpoint at epoch 32

Epoch 32/60
Learning Rate: Backbone=0.000100
Train Loss: 0.5010
Eval Metrics:
  - Accuracy: 0.8028
  - ROC AUC: 0.8860
  - TAR@FAR1e-3: 0.1290
  - TAR@FAR1e-4: 0.0522
  - Threshold: 1.0000



  with autocast(dtype=torch.float16):
Epoch 33/60 [LR: 0.000100]: 100%|█████████▉| 199/200 [07:06<00:02,  2.14s/it, loss=0.5007]
  with autocast(dtype=torch.float16):
100%|██████████| 225/225 [02:48<00:00,  1.33it/s]


Saved checkpoint at epoch 33

Epoch 33/60
Learning Rate: Backbone=0.000100
Train Loss: 0.5008
Eval Metrics:
  - Accuracy: 0.8114
  - ROC AUC: 0.8949
  - TAR@FAR1e-3: 0.1360
  - TAR@FAR1e-4: 0.0507
  - Threshold: 1.0000



  with autocast(dtype=torch.float16):
Epoch 34/60 [LR: 0.000100]: 100%|█████████▉| 199/200 [07:08<00:02,  2.15s/it, loss=0.5007]
  with autocast(dtype=torch.float16):
100%|██████████| 225/225 [02:35<00:00,  1.45it/s]


Saved checkpoint at epoch 34

Epoch 34/60
Learning Rate: Backbone=0.000100
Train Loss: 0.5007
Eval Metrics:
  - Accuracy: 0.8190
  - ROC AUC: 0.9020
  - TAR@FAR1e-3: 0.1436
  - TAR@FAR1e-4: 0.0581
  - Threshold: 1.0000



  with autocast(dtype=torch.float16):
Epoch 35/60 [LR: 0.000100]: 100%|█████████▉| 199/200 [07:13<00:02,  2.18s/it, loss=0.5007]
  with autocast(dtype=torch.float16):
100%|██████████| 225/225 [02:28<00:00,  1.52it/s]


Early stopping triggered!
Saved checkpoint at epoch 35

Epoch 35/60
Learning Rate: Backbone=0.000100
Train Loss: 0.5007
Eval Metrics:
  - Accuracy: 0.8277
  - ROC AUC: 0.9102
  - TAR@FAR1e-3: 0.1528
  - TAR@FAR1e-4: 0.0623
  - Threshold: 1.0000


Training completed! Best TAR@FAR1e-3: 0.4748
