In [22]:
import timm
import torch
import torch.nn as nn
from PIL import Image
import numpy as np
from timm.data import resolve_model_data_config, create_transform
import random
import os
import pandas as pd
from sklearn.preprocessing import LabelEncoder
from sklearn.model_selection import train_test_split
from common.datasets import LandmarkDataset
from torch.utils.data import Subset, DataLoader
import torch.optim as optim
from tqdm import tqdm

##### Environment variables

In [23]:
DEVICE = "cuda" if torch.cuda.is_available() else "cpu"
DATASET_PATH = "D:/Datasets/landmark-recognition-2021"
TRAIN_DIR = f"{DATASET_PATH}/train"
TEST_DIR = f"{DATASET_PATH}/train"

##### Hyperparameters

In [24]:
SEED = 42
EPOCHS = 5
BATCH_SIZE = 16
N_WORKERS = 2

##### Training IDs table

In [25]:
train_df = pd.read_csv(f"{DATASET_PATH}/train.csv")
train_df.head()

Unnamed: 0,id,landmark_id
0,17660ef415d37059,1
1,92b6290d571448f6,1
2,cd41bf948edc0340,1
3,fb09f1e98c6d2f70,1
4,25c9dfc7ea69838d,7


##### Seeding

In [26]:
def seed_everything(seed: int = 42):
    random.seed(seed)
    os.environ["PYTHONHASHSEED"] = str(seed)
    np.random.seed(seed)
    torch.manual_seed(seed)

    if torch.cuda.is_available():
        torch.cuda.manual_seed(seed)
        torch.cuda.manual_seed_all(seed)  # for multi-GPU setups

    torch.backends.cudnn.deterministic = True
    torch.backends.cudnn.benchmark = False


seed_everything(SEED)

##### Training/Test datasets

In [27]:
N_CLASSES = len(train_df["landmark_id"].value_counts())

X = train_df.drop("landmark_id", axis=1)
y = train_df["landmark_id"]

X_train, X_test, y_train, y_test = train_test_split(X, y)

In [36]:
model = timm.create_model("efficientnetv2_m", num_classes=N_CLASSES)
data_config = timm.data.resolve_model_data_config(model)
transform = timm.data.create_transform(**data_config)

In [53]:
use_subset = True

train_dataset = LandmarkDataset(X_train, y_train, transform=transform)
test_dataset = LandmarkDataset(X_test, y_test)

if use_subset:
    train_indices = np.random.choice(
        len(train_dataset), int(len(train_dataset) * 0.01), replace=False
    )
    test_indices = np.random.choice(len(test_dataset), int(len(test_dataset) * 0.01), replace=False)

    train_dataset = Subset(train_dataset, train_indices)
    test_dataset = Subset(test_dataset, test_indices)

train_loader = DataLoader(train_dataset, batch_size=BATCH_SIZE, shuffle=True)
test_loader = DataLoader(test_dataset, batch_size=BATCH_SIZE, shuffle=True)

len(train_dataset), len(test_dataset)

(11853, 3951)

##### Training loop

In [None]:
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=1e-3)