In [1]:
import os
import pandas as pd
import numpy as np
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import Dataset, DataLoader
from torchvision import transforms, models
import timm  # For EfficientNet
from PIL import Image
import matplotlib.pyplot as plt
import random
from tqdm import tqdm
import time

# Set random seeds for reproducibility
torch.manual_seed(42)
random.seed(42)
np.random.seed(42)

# Configuration
class Config:
    # Paths
    train_csv = "/kaggle/input/visual-product-recognition/train.csv"
    test_csv = "/kaggle/input/visual-product-recognition/test.csv"
    train_dir = "/kaggle/input/visual-product-recognition/train/train"
    test_dir = "/kaggle/input/visual-product-recognition/test/test"
    # Training parameters
    batch_size = 32
    num_epochs = 10
    learning_rate = 0.001
    embedding_dim = 128
    # Model parameters
    distance_metric = "euclidean"  # Options: "cosine", "euclidean", "manhattan"
    loss_type = "contrastive"   # Options: "contrastive", "triplet", "angular"
    margin = 1.0  # Margin for contrastive/triplet/angular loss
    backbone = "resnet101"  # Options: "resnet18", "efficientnet_b0"
    # Image parameters
    img_size = 224
    # Checkpointing
    checkpoint_path = f"siamese_model_{distance_metric}_{loss_type}.pth"
    # Evaluation
    top_k = 5  # For mAP@k calculation

# Custom Dataset (unchanged)
class ProductDataset(Dataset):
    def __init__(self, csv_file, img_dir, transform=None, is_train=True):
        self.data = pd.read_csv(csv_file)
        self.img_dir = img_dir
        self.transform = transform
        self.is_train = is_train

        # Combine class and group to create a unique label
        if 'group' in self.data.columns:
            self.data['label'] = self.data['class'].astype(
                str) + '_' + self.data['group'].astype(str)
        else:
            self.data['label'] = self.data['class'].astype(str)

        self.labels = self.data['label'].unique()
        self.label_to_indices = {label: np.where(self.data['label'] == label)[
            0] for label in self.labels}

        # For triplet loss: create a list of (anchor, positive, negative) indices
        if Config.loss_type == "triplet" and is_train:
            self.triplets = self._generate_triplets()

    def _generate_triplets(self):
        triplets = []
        for label in self.labels:
            label_indices = self.label_to_indices[label]
            if len(label_indices) < 2:
                continue

            for i in range(len(label_indices)):
                anchor_idx = label_indices[i]
                positive_indices = [
                    idx for idx in label_indices if idx != anchor_idx]

                if not positive_indices:
                    continue

                positive_idx = random.choice(positive_indices)

                # Select a negative from a different label
                negative_label = random.choice(
                    [l for l in self.labels if l != label])
                negative_idx = random.choice(
                    self.label_to_indices[negative_label])

                triplets.append((anchor_idx, positive_idx, negative_idx))

        return triplets

    def __len__(self):
        if self.is_train and Config.loss_type == "triplet":
            return len(self.triplets)
        return len(self.data)

    def __getitem__(self, idx):
        if self.is_train and Config.loss_type == "triplet":
            anchor_idx, positive_idx, negative_idx = self.triplets[idx]

            anchor_img = self._load_image(anchor_idx)
            positive_img = self._load_image(positive_idx)
            negative_img = self._load_image(negative_idx)

            anchor_label = self.data.iloc[anchor_idx]['label']
            positive_label = self.data.iloc[positive_idx]['label']
            negative_label = self.data.iloc[negative_idx]['label']

            return (anchor_img, positive_img, negative_img), (anchor_label, positive_label, negative_label)
        else:
            img = self._load_image(idx)
            label = self.data.iloc[idx]['label']
            img_name = self.data.iloc[idx]['name']

            if Config.loss_type == "contrastive" and self.is_train:
                # 50% chance to get a positive pair (same label)
                if random.random() > 0.5:
                    indices = self.label_to_indices[label]
                    idx2 = random.choice([i for i in indices if i != idx]) if len(
                        indices) > 1 else idx
                    is_same = 1
                else:
                    # Get a sample from a different label
                    different_label = random.choice(
                        [l for l in self.labels if l != label])
                    idx2 = random.choice(
                        self.label_to_indices[different_label])
                    is_same = 0

                img2 = self._load_image(idx2)
                label2 = self.data.iloc[idx2]['label']

                return (img, img2), (label, label2, is_same)
            else:
                # For evaluation or when not using contrastive loss
                return img, label, img_name

    def _load_image(self, idx):
        img_name = self.data.iloc[idx]['name']
        img_path = os.path.join(self.img_dir, img_name)
        image = Image.open(img_path).convert('RGB')

        if self.transform:
            image = self.transform(image)

        return image


# Define the CNN encoder with backbone options
class Encoder(nn.Module):
    def __init__(self, embedding_dim=128, backbone="resnet50"):
        super(Encoder, self).__init__()
        print(f"Using backbone: {backbone}")
        if backbone.startswith("resnet"):
            if backbone == "resnet50":
                base_model = models.resnet50(progress=True, weights='DEFAULT')
            elif backbone == "resnet18":
                base_model = models.resnet18(progress=True, weights='DEFAULT')
            elif backbone == "resnet34":
                base_model = models.resnet34(progress=True, weights='DEFAULT')
            elif backbone == "resnet101":
                base_model = models.resnet101(progress=True, weights='DEFAULT')
            elif backbone == "resnet152":
                base_model = models.resnet152(progress=True, weights='DEFAULT')
            else:
                raise ValueError(f"Unsupported ResNet backbone: {backbone}")
            # Remove the final classification layer
            modules = list(base_model.children())[:-1]
            self.features = nn.Sequential(*modules)
            in_features = base_model.fc.in_features
        elif backbone.startswith("efficientnet"):
            if backbone == "efficientnet_b0":
                base_model = timm.create_model(
                    "efficientnet_b0", pretrained=True)
            elif backbone == "efficientnet_b1":
                base_model = timm.create_model(
                    "efficientnet_b1", pretrained=True)
            elif backbone == "efficientnet_b2":
                base_model = timm.create_model(
                    "efficientnet_b2", pretrained=True)
            elif backbone == "efficientnet_b3":
                base_model = timm.create_model(
                    "efficientnet_b3", pretrained=True)
            elif backbone == "efficientnet_b4":
                base_model = timm.create_model(
                    "efficientnet_b4", pretrained=True)
            elif backbone == "efficientnet_b5":
                base_model = timm.create_model(
                    "efficientnet_b5", pretrained=True)
            elif backbone == "efficientnet_b6":
                base_model = timm.create_model(
                    "efficientnet_b6", pretrained=True)
            elif backbone == "efficientnet_b7":
                base_model = timm.create_model(
                    "efficientnet_b7", pretrained=True)
            elif backbone == "efficientnet_b8":
                base_model = timm.create_model(
                    "efficientnet_b8", pretrained=True)
            else:
                raise ValueError(
                    f"Unsupported EfficientNet backbone: {backbone}")
            # Remove the final classification layer
            self.features = nn.Sequential(*list(base_model.children())[:-1])
            in_features = base_model.classifier.in_features

        elif backbone.startswith("vgg"):
            if backbone == "vgg16":
                base_model = models.vgg16(progress=True)
            elif backbone == "vgg19":
                base_model = models.vgg19(progress=True)
            else:
                raise ValueError(f"Unsupported VGG backbone: {backbone}")
            # Remove the final classification layer
            modules = list(base_model.features.children())
            self.features = nn.Sequential(*modules)
            in_features = 512 * 7 * 7

        else:
            raise ValueError(f"Unsupported backbone: {backbone}")
        print("Done loading backbone")
        # Fully connected layers for embedding
        self.classifier = nn.Sequential(
            nn.Linear(in_features, embedding_dim),
            nn.BatchNorm1d(embedding_dim),
            nn.ReLU(inplace=True),
            nn.Linear(embedding_dim, embedding_dim)
        )

    def forward(self, x):
        x = self.features(x)
        x = torch.flatten(x, 1)
        x = self.classifier(x)
        # L2 normalize embeddings
        x = nn.functional.normalize(x, p=2, dim=1)
        return x


# Siamese Network (unchanged)
class SiameseNetwork(nn.Module):
    def __init__(self, embedding_dim=128, backbone="resnet50"):
        super(SiameseNetwork, self).__init__()
        self.encoder = Encoder(embedding_dim, backbone)

    def forward_one(self, x):
        return self.encoder(x)

    def forward(self, x1, x2=None, x3=None):
        output1 = self.forward_one(x1)

        if x2 is not None and x3 is not None:  # Triplet input
            output2 = self.forward_one(x2)
            output3 = self.forward_one(x3)
            return output1, output2, output3
        elif x2 is not None:  # Pair input
            output2 = self.forward_one(x2)
            return output1, output2
        else:  # Single input (for inference)
            return output1

# Loss Functions


class ContrastiveLoss(nn.Module):
    def __init__(self, margin=1.0, distance_metric="cosine"):
        super(ContrastiveLoss, self).__init__()
        self.margin = margin
        self.distance_metric = distance_metric

    def forward(self, output1, output2, target):
        if self.distance_metric == "cosine":
            # Using negative cosine similarity (1 - cos) to convert similarity to distance
            distance = 1.0 - nn.functional.cosine_similarity(output1, output2)
        elif self.distance_metric == "euclidean":
            distance = nn.functional.pairwise_distance(output1, output2, p=2)
        elif self.distance_metric == "manhattan":
            distance = nn.functional.pairwise_distance(output1, output2, p=1)
        else:
            raise ValueError(
                f"Unsupported distance metric: {self.distance_metric}")

        # Contrastive loss: bring positives together, push negatives apart beyond margin
        loss = target * torch.pow(distance, 2) + (1 - target) * \
            torch.pow(torch.clamp(self.margin - distance, min=0.0), 2)
        return loss.mean()


class TripletLoss(nn.Module):
    def __init__(self, margin=1.0, distance_metric="cosine"):
        super(TripletLoss, self).__init__()
        self.margin = margin
        self.distance_metric = distance_metric

    def forward(self, anchor, positive, negative):
        if self.distance_metric == "cosine":
            # Convert cosine similarity to distance
            pos_dist = 1.0 - nn.functional.cosine_similarity(anchor, positive)
            neg_dist = 1.0 - nn.functional.cosine_similarity(anchor, negative)
        elif self.distance_metric == "euclidean":
            pos_dist = nn.functional.pairwise_distance(anchor, positive, p=2)
            neg_dist = nn.functional.pairwise_distance(anchor, negative, p=2)
        elif self.distance_metric == "manhattan":
            pos_dist = nn.functional.pairwise_distance(anchor, positive, p=1)
            neg_dist = nn.functional.pairwise_distance(anchor, negative, p=1)
        else:
            raise ValueError(
                f"Unsupported distance metric: {self.distance_metric}")

        # Triplet loss: ensure positive distance is smaller than negative distance by at least margin
        loss = torch.clamp(pos_dist - neg_dist + self.margin, min=0.0)
        return loss.mean()


class AngularLoss(nn.Module):
    def __init__(self, margin=1.0, alpha=45):
        super(AngularLoss, self).__init__()
        self.margin = margin
        self.alpha = alpha * (np.pi / 180)  # Convert degrees to radians

    def forward(self, anchor, positive, negative):
        # Compute angular distance
        ap = nn.functional.pairwise_distance(anchor, positive, p=2)
        an = nn.functional.pairwise_distance(anchor, negative, p=2)
        angle = torch.atan(torch.sqrt(
            (an**2 + ap**2) / (4 * ap**2))) - self.alpha
        loss = torch.clamp(ap**2 - 4 * an**2 * torch.cos(angle)
                           ** 2 + self.margin, min=0.0)
        return loss.mean()


# Data Transformations (unchanged)
data_transforms = {
    'train': transforms.Compose([
        transforms.Resize((Config.img_size, Config.img_size)),
        transforms.RandomHorizontalFlip(),
        transforms.RandomRotation(10),
        transforms.ColorJitter(
            brightness=0.2, contrast=0.2, saturation=0.2, hue=0.1),
        transforms.ToTensor(),
        transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
    ]),
    'test': transforms.Compose([
        transforms.Resize((Config.img_size, Config.img_size)),
        transforms.ToTensor(),
        transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
    ]),
}


# Create datasets
train_dataset = ProductDataset(
    csv_file=Config.train_csv,
    img_dir=Config.train_dir,
    transform=data_transforms['train'],
    is_train=True
)

test_dataset = ProductDataset(
    csv_file=Config.test_csv,
    img_dir=Config.test_dir,
    transform=data_transforms['test'],
    is_train=False
)

# Create dataloaders
train_loader = DataLoader(
    train_dataset,
    batch_size=Config.batch_size,
    shuffle=True,
    num_workers=4,
    pin_memory=True,
    persistent_workers=True  # Keep workers alive for faster loading
)

test_loader = DataLoader(
    test_dataset,
    batch_size=Config.batch_size,
    shuffle=False,
    num_workers=4,
    pin_memory=True
)

# Initialize model, optimizer, and loss function
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(f"Using device: {device}")

# Initialize model
model = SiameseNetwork(embedding_dim=Config.embedding_dim, backbone=Config.backbone)
model = model.to(device)
print("Model initialized successfully.")
optimizer = optim.Adam(model.parameters(), lr=Config.learning_rate)


if Config.loss_type == "contrastive":
    criterion = ContrastiveLoss(
        margin=Config.margin, distance_metric=Config.distance_metric).to(device)
elif Config.loss_type == "triplet":
    criterion = TripletLoss(margin=Config.margin,
                            distance_metric=Config.distance_metric).to(device)
elif Config.loss_type == "angular":
    criterion = AngularLoss(margin=Config.margin).to(device)
else:
    raise ValueError(f"Unsupported loss type: {Config.loss_type}")

# Training function
def train_epoch(model, dataloader, optimizer, criterion, epoch):
    model.train()
    running_loss = 0.0
    with tqdm(dataloader, desc=f"Epoch {epoch+1}/{Config.num_epochs}") as pbar:
        for i, data in enumerate(pbar):
            optimizer.zero_grad()
            if Config.loss_type == "triplet":
                # Triplet loss expects anchor, positive, and negative images
                (anchor_img, positive_img, negative_img), _ = data
                # Move images to the device
                anchor_img = anchor_img.to(device)
                positive_img = positive_img.to(device)
                negative_img = negative_img.to(device)
                # Forward pass
                anchor_emb, positive_emb, negative_emb = model(anchor_img, positive_img, negative_img)
                loss = criterion(anchor_emb, positive_emb, negative_emb)
            elif Config.loss_type == "contrastive":
                # Contrastive loss expects pairs of images and labels indicating similarity
                (img1, img2), (_, _, labels) = data
                # Move images and labels to the device
                img1 = img1.to(device)
                img2 = img2.to(device)
                labels = labels.to(device)
                # Forward pass
                output1, output2 = model(img1, img2)
                loss = criterion(output1, output2, labels)
            elif Config.loss_type == "angular":
                # Angular loss also expects anchor, positive, and negative images
                (anchor_img, positive_img, negative_img), _ = data
                # Move images to the device
                anchor_img = anchor_img.to(device)
                positive_img = positive_img.to(device)
                negative_img = negative_img.to(device)
                # Forward pass
                anchor_emb, positive_emb, negative_emb = model(anchor_img, positive_img, negative_img)
                loss = criterion(anchor_emb, positive_emb, negative_emb)
            else:
                raise ValueError(f"Unsupported loss type: {Config.loss_type}")
            # Backpropagation and optimization
            loss.backward()
            optimizer.step()
            # Update running loss and progress bar
            running_loss += loss.item()
            pbar.set_postfix(loss=running_loss / (i + 1))
            # Debug checkpoint (optional, for testing purposes)
            if i == 20 and epoch == 0:
                print("Debug checkpoint reached - continuing training...")
                time.sleep(2)  # Pause for 2 seconds
            # Log metrics periodically (e.g., to wandb or other logging tools)
            if i % 10 == 0:
                # wandb.log({"train_loss": loss.item(), "epoch": epoch + i / len(dataloader)})
                pass
    return running_loss / len(dataloader)

# Function to extract embeddings
def extract_embeddings(dataloader, model):
    model.eval()
    embeddings = []
    labels = []
    img_names = []
    with torch.no_grad():
        for images, batch_labels, batch_img_names in tqdm(dataloader, desc="Extracting embeddings"):
            # Move images to the device
            images = images.to(device)
            # Forward pass
            outputs = model.forward_one(images)
            # Keep embeddings on the GPU
            embeddings.append(outputs)
            labels.extend(batch_labels)
            img_names.extend(batch_img_names)
    embeddings = torch.cat(embeddings, dim=0)
    return embeddings, labels, img_names
# Function to calculate distances between query and gallery embeddings


def calculate_distances(query_emb, gallery_emb, distance_metric="cosine"):
    if distance_metric == "cosine":
        # Convert cosine similarity to distance (1 - similarity)
        # Higher similarity = lower distance
        similarity = torch.mm(query_emb, gallery_emb.T)
        return 1.0 - similarity
    elif distance_metric == "euclidean":
        n_query = query_emb.size(0)
        n_gallery = gallery_emb.size(0)
        dist = torch.zeros(n_query, n_gallery)

        for i in range(n_query):
            dist[i] = torch.sum(
                (gallery_emb - query_emb[i].unsqueeze(0))**2, dim=1).sqrt()
        return dist
    elif distance_metric == "manhattan":
        n_query = query_emb.size(0)
        n_gallery = gallery_emb.size(0)
        dist = torch.zeros(n_query, n_gallery)

        for i in range(n_query):
            dist[i] = torch.sum(
                torch.abs(gallery_emb - query_emb[i].unsqueeze(0)), dim=1)
        return dist
    else:
        raise ValueError(f"Unsupported distance metric: {distance_metric}")

# Function to calculate mAP@k


def calculate_map_at_k(distances, query_labels, gallery_labels, k=5):
    """
    Calculate mean Average Precision at k
    """
    n_query = distances.size(0)
    ap_list = []

    for i in range(n_query):
        # Get indices of top-k nearest neighbors
        _, indices = torch.topk(distances[i], k=min(
            k, len(gallery_labels)), largest=False)

        # Check if the retrieved items have the same label
        relevant = [gallery_labels[idx] == query_labels[i] for idx in indices]

        if not any(relevant):
            ap_list.append(0.0)
            continue

        # Calculate precision at each relevant position
        precision_at_i = 0.0
        num_relevant = 0.0

        for j, is_relevant in enumerate(relevant):
            if is_relevant:
                num_relevant += 1
                precision_at_i += num_relevant / (j + 1)

        ap = precision_at_i / num_relevant
        ap_list.append(ap)

    return np.mean(ap_list)

# Function to evaluate the model


def evaluate(model, test_loader, distance_metric="cosine", k=5):
    model.eval()

    # Extract gallery embeddings (using test set as both query and gallery for simplicity)
    gallery_embeddings, gallery_labels, gallery_img_names = extract_embeddings(
        test_loader, model)

    # Calculate distances
    distances = calculate_distances(
        gallery_embeddings, gallery_embeddings, distance_metric)

    # Set diagonal to infinity to exclude self-matches
    distances.fill_diagonal_(float('inf'))

    # Calculate mAP@k
    map_at_k = calculate_map_at_k(distances, gallery_labels, gallery_labels, k)

    print(f"mAP@{k}: {map_at_k:.4f}")
    # wandb.log({"mAP@k": map_at_k})

    return map_at_k

# Image retrieval function


def retrieve_images(model, query_img_path, gallery_loader, top_k=5, distance_metric="cosine"):
    model.eval()
    # Load and preprocess query image
    query_img = Image.open(query_img_path).convert('RGB')
    query_img = data_transforms['test'](query_img).unsqueeze(0)  # Add batch dimension
    # Move query image to the GPU
    query_img = query_img.to(device)
    # Extract gallery embeddings
    gallery_embeddings, _, gallery_img_names = extract_embeddings(gallery_loader, model)
    # Extract query embedding
    with torch.no_grad():
        query_embedding = model.forward_one(query_img)
    # Calculate distances
    if distance_metric == "cosine":
        similarity = torch.mm(query_embedding, gallery_embeddings.T).squeeze()
        distances = 1.0 - similarity
    elif distance_metric == "euclidean":
        distances = torch.sum((gallery_embeddings - query_embedding)**2, dim=1).sqrt()
    elif distance_metric == "manhattan":
        distances = torch.sum(torch.abs(gallery_embeddings - query_embedding), dim=1)
    else:
        raise ValueError(f"Unsupported distance metric: {distance_metric}")
    # Get indices of top-k nearest neighbors
    _, indices = torch.topk(distances, k=min(top_k, len(gallery_img_names)), largest=False)
    # Return top-k image names and distances
    results = [(gallery_img_names[idx], distances[idx].item()) for idx in indices]
    # Move distances to CPU if needed
    distances = distances.cpu()
    return results
# Function to visualize retrieval results


def visualize_retrieval(query_img_path, results, img_dir):
    """
    Visualize query image and retrieved results
    """
    plt.figure(figsize=(15, 3))

    # Display query image
    query_img = Image.open(query_img_path).convert('RGB')
    plt.subplot(1, len(results)+1, 1)
    plt.imshow(query_img)
    plt.title("Query Image")
    plt.axis('off')

    # Display retrieved images
    for i, (img_name, distance) in enumerate(results):
        img_path = os.path.join(img_dir, img_name)
        img = Image.open(img_path).convert('RGB')
        plt.subplot(1, len(results)+1, i+2)
        plt.imshow(img)
        plt.title(f"Dist: {distance:.4f}")
        plt.axis('off')

    plt.tight_layout()
    plt.savefig("retrieval_results.png")
    plt.close()

    # Log to wandb
    # wandb.log({"retrieval_results": wandb.Image("retrieval_results.png")})


# Main training loop
def train_model():
    best_map = 0.0

    for epoch in range(Config.num_epochs):
        epoch_loss = train_epoch(
            model, train_loader, optimizer, criterion, epoch)
        print(f"Epoch {epoch+1}/{Config.num_epochs}, Loss: {epoch_loss:.4f}")

        # Evaluate every 5 epochs or on the last epoch
        if (epoch + 1) % 5 == 0 or epoch == Config.num_epochs - 1:
            map_at_k = evaluate(model, test_loader,
                                Config.distance_metric, Config.top_k)

            # Save model if better
            if map_at_k > best_map:
                best_map = map_at_k
                torch.save({
                    'epoch': epoch,
                    'model_state_dict': model.state_dict(),
                    'optimizer_state_dict': optimizer.state_dict(),
                    'loss': epoch_loss,
                    'map': map_at_k
                }, Config.checkpoint_path)
                print(f"Model saved with mAP@{Config.top_k}: {map_at_k:.4f}")

        # Log to wandb
        # wandb.log({
        #     "epoch": epoch + 1,
        #     "train_loss": epoch_loss,
        # })


if __name__ == "__main__":
    print(
        f"Starting training with {Config.distance_metric}, {Config.loss_type}, and {Config.backbone}...")
    train_model()
    # Load best model for evaluation
    checkpoint = torch.load(Config.checkpoint_path, weights_only=False, map_location=device)
    model.load_state_dict(checkpoint['model_state_dict'])
    optimizer.load_state_dict(checkpoint['optimizer_state_dict'])

    # Evaluate on test set
    evaluate(model, test_loader, Config.distance_metric, Config.top_k)

    # Example image retrieval
    query_img_path = os.path.join(
        Config.test_dir, test_dataset.data.iloc[0]['name'])
    results = retrieve_images(
        model, query_img_path, test_loader, Config.top_k, Config.distance_metric)
    visualize_retrieval(query_img_path, results, Config.test_dir)

    print("Training complete!")

Using device: cuda
Using backbone: resnet101


Downloading: "https://download.pytorch.org/models/resnet101-cd907fc2.pth" to /root/.cache/torch/hub/checkpoints/resnet101-cd907fc2.pth
100%|██████████| 171M/171M [00:00<00:00, 202MB/s]


Done loading backbone
Model initialized successfully.
Starting training with euclidean, contrastive, and resnet101...


Epoch 1/10:   0%|          | 20/4436 [00:15<39:59,  1.84it/s, loss=0.262]

Debug checkpoint reached - continuing training...


Epoch 1/10: 100%|██████████| 4436/4436 [45:51<00:00,  1.61it/s, loss=0.197]


Epoch 1/10, Loss: 0.1971


Epoch 2/10: 100%|██████████| 4436/4436 [45:14<00:00,  1.63it/s, loss=0.167]


Epoch 2/10, Loss: 0.1666


Epoch 3/10: 100%|██████████| 4436/4436 [45:25<00:00,  1.63it/s, loss=0.157]


Epoch 3/10, Loss: 0.1574


Epoch 4/10: 100%|██████████| 4436/4436 [45:34<00:00,  1.62it/s, loss=0.152]


Epoch 4/10, Loss: 0.1516


Epoch 5/10: 100%|██████████| 4436/4436 [45:48<00:00,  1.61it/s, loss=0.147]


Epoch 5/10, Loss: 0.1466


Extracting embeddings: 100%|██████████| 1731/1731 [05:27<00:00,  5.28it/s]


mAP@5: 0.0971
Model saved with mAP@5: 0.0971


Epoch 6/10: 100%|██████████| 4436/4436 [46:30<00:00,  1.59it/s, loss=0.142]


Epoch 6/10, Loss: 0.1420


Epoch 7/10: 100%|██████████| 4436/4436 [46:04<00:00,  1.60it/s, loss=0.138]


Epoch 7/10, Loss: 0.1382


Epoch 8/10: 100%|██████████| 4436/4436 [46:30<00:00,  1.59it/s, loss=0.135]


Epoch 8/10, Loss: 0.1351


Epoch 9/10: 100%|██████████| 4436/4436 [46:16<00:00,  1.60it/s, loss=0.132]


Epoch 9/10, Loss: 0.1317


Epoch 10/10: 100%|██████████| 4436/4436 [45:58<00:00,  1.61it/s, loss=0.13]


Epoch 10/10, Loss: 0.1302


Extracting embeddings: 100%|██████████| 1731/1731 [05:18<00:00,  5.43it/s]


mAP@5: 0.1225
Model saved with mAP@5: 0.1225


Extracting embeddings: 100%|██████████| 1731/1731 [05:03<00:00,  5.69it/s]


mAP@5: 0.1225


Extracting embeddings: 100%|██████████| 1731/1731 [04:54<00:00,  5.89it/s]


Training complete!
