In [1]:
import os
import sys
from pathlib import Path

import numpy as np
import pandas as ps

import torch
import torch.nn as nn
from torch.utils.data import DataLoader

sys.path.append("../src")

from metrics import gap
from models import EncoderWithHead
from models.efficientnets import EfficientNetEncoder
from models.heads import CosFace
from datasets import FolderDataset
from batteries.progress import tqdm
from batteries import t2d, load_checkpoint

In [2]:
train_valid = ps.read_pickle("../input/train_valid.pkl")
IMAGES_DIR = Path("..") / "input" / "train"

landmark_map = {
    landmark: idx
    for idx, landmark in enumerate(sorted(set(train_valid["landmark_id"].values)))
}

valid = train_valid[train_valid["is_valid"] == True]
valid_set = FolderDataset(
    valid["id"].values,
    valid["landmark_id"].values,
    landmark_map,
    data_dir=IMAGES_DIR,
)
valid_loader = DataLoader(
    dataset=valid_set, batch_size=256, num_workers=16
)

print(
    f" * Num records in valid dataset - {len(valid_set)}, batches - {len(valid_loader)}"
)

 * Num records in valid dataset - 72322, batches - 283


In [3]:
EMBEDDING_SIZE = 512
NUM_CLASSESS = len(landmark_map)


model = EncoderWithHead(
    EfficientNetEncoder("efficientnet-b0", EMBEDDING_SIZE, bias=False),
    CosFace(EMBEDDING_SIZE, NUM_CLASSESS, None),
)
load_checkpoint("../logs/full_set2/stage_0/best.pth", model)

Loaded pretrained weights for efficientnet-b0
<= Loaded model from '../logs/full_set2/stage_0/best.pth'


In [4]:
model.head.s = np.sqrt(2) * np.log(NUM_CLASSESS - 1)
model.head.m = 0.1

In [5]:
device = torch.device("cuda:1")
model = model.to(device)
criterion = nn.CrossEntropyLoss()

In [6]:
model.eval()

metrics = {
    "loss": [],
    "accuracy": [],
    "gap": [],
}

with torch.no_grad(), tqdm(total=len(valid_loader), desc="valid") as progress:
    for _idx, batch in enumerate(valid_loader):
        inputs, targets = t2d(batch, device)

        outputs = model(inputs, targets)
        loss = criterion(outputs, targets)

        _loss = loss.detach().item()
        metrics["loss"].append(_loss)
        
        classes = torch.argmax(outputs, 1)
        _acc = (classes == targets).float().mean().detach().item()
        metrics["accuracy"].append(_acc)

        confidences, predictions = torch.max(outputs, dim=1)
        _gap = gap(predictions, confidences, targets)
        metrics["gap"].append(_gap)

        progress.set_postfix_str(f"loss {_loss:.4f}, gap {_gap:.4f}, acc {_acc:.4f}")
        progress.update(1)

valid: 100%|████████████████████| 283/283 [01:52<00:00,  2.52it/s, loss 3.7170, gap 0.3590, acc 0.5385]


In [7]:
metrics["loss"] = np.mean(metrics["loss"])
metrics["accuracy"] = np.mean(metrics["accuracy"])
metrics["gap"] = np.mean(metrics["gap"])

print(metrics)

{'loss': 3.763266608908825, 'accuracy': 0.5353611716112062, 'gap': 0.38452192799212787}


In [7]:
train = train_valid[train_valid["is_valid"] == False]
train_set = FolderDataset(
    train["id"].values,
    train["landmark_id"].values,
    landmark_map,
    data_dir=IMAGES_DIR,
)
train_loader = DataLoader(
    dataset=train_set, batch_size=256, num_workers=16
)

print(
    f" * Num records in valid dataset - {len(train_set)}, batches - {len(train_loader)}"
)

 * Num records in valid dataset - 1508148, batches - 5892


In [8]:
model.eval()

metrics = {
    "loss": [],
    "accuracy": [],
    "gap": [],
}

with torch.no_grad(), tqdm(total=len(train_loader), desc="train") as progress:
    for _idx, batch in enumerate(train_loader):
        inputs, targets = t2d(batch, device)

        outputs = model(inputs, targets)
        loss = criterion(outputs, targets)

        _loss = loss.detach().item()
        metrics["loss"].append(_loss)
        
        classes = torch.argmax(outputs, 1)
        _acc = (classes == targets).float().mean().detach().item()
        metrics["accuracy"].append(_acc)

        confidences, predictions = torch.max(outputs, dim=1)
        _gap = gap(predictions, confidences, targets)
        metrics["gap"].append(_gap)

        progress.set_postfix_str(f"loss {_loss:.4f}, gap {_gap:.4f}, acc {_acc:.4f}")
        progress.update(1)

train: 100%|████████████████████| 5892/5892 [37:52<00:00,  2.59it/s, loss 20.9686, gap 0.0000, acc 0.0000]


In [9]:
metrics["loss"] = np.mean(metrics["loss"])
metrics["accuracy"] = np.mean(metrics["accuracy"])
metrics["gap"] = np.mean(metrics["gap"])

print(metrics)

{'loss': 32.26586946822799, 'accuracy': 2.8507934487440598e-05, 'gap': 5.071256240721227e-07}
