In [None]:
import timm
import torch
import torch.nn as nn
from PIL import Image
import numpy as np
from timm.data import resolve_model_data_config, create_transform
import random
import os
import pandas as pd
from sklearn.preprocessing import LabelEncoder
from sklearn.model_selection import train_test_split
from common.datasets import LandmarkDataset

##### Environment variables

In [None]:
DEVICE = "cuda" if torch.cuda.is_available() else "cpu"
DATASET_PATH = "D:/Datasets/landmark-recognition-2021"
TRAIN_DIR = f"{DATASET_PATH}/train"
TEST_DIR = f"{DATASET_PATH}/test"

##### Hyperparameters

In [None]:
SEED = 42
EPOCHS = 1
BATCH_SIZE = 64
N_WORKERS = 6

##### Training IDs table

In [None]:
train_df = pd.read_csv(f"{DATASET_PATH}/train.csv")
train_df.head()

##### Seeding

In [None]:
def seed_everything(seed):
    random.seed(seed)
    os.environ["PYTHONHASHSEED"] = str(seed)
    np.random.seed(seed)
    torch.manual_seed(seed)

    if torch.cuda.is_available():
        torch.cuda.manual_seed(seed)
        torch.backends.cudnn.benchmark = False


seed_everything(SEED)

##### Training/Test datasets

In [None]:
labels = dict(zip(train_df["id"], train_df["landmark_id"]))

train_ids, test_ids = train_test_split(
    train_df["id"].tolist(),
    test_size=0.2,
    random_state=SEED,
    stratify=train_df["landmark_id"],
)

partition = {"train": train_ids, "test": test_ids}
print(f"Training IDs:{len(partition['train'])}\nTest IDs:{len(partition['test'])}")
print(f"Total IDs/labels: {len(labels)}")

In [None]:
params = {"batch_size": BATCH_SIZE, "shuffle": True, "num_workers": N_WORKERS}

model = timm.create_model("efficientnetv2_m", pretrained=False)
model_config = timm.data.resolve_model_data_config(model)
transform = timm.data.create_transform(**model_config)

train_set = LandmarkDataset(partition["train"], labels, directory=TRAIN_DIR, transform=transform)
train_loader = torch.utils.data.DataLoader(train_set, **params)

test_set = LandmarkDataset(partition["test"], labels, directory=TEST_DIR, transform=transform)
test_loader = torch.utils.data.DataLoader(test_set, **params)

len(train_set), len(test_set)

##### Training loop

In [None]:
for epoch in range(EPOCHS):
    for local_batch, local_labels in train_loader:
        local_batch, local_labels = local_batch.to(DEVICE), local_labels.to(DEVICE)

    with torch.set_grad_enabled(False):
        for local_batch, local_labels in test_loader:
            local_batch, local_labels = local_batch.to(DEVICE), local_labels.to(DEVICE)