# Notebool for DINOv2 models with learning rate 2.5e-4

In [1]:
import copy
import os

import torch
import torchvision
from torch import nn
import torch
import torch.nn as nn
import torchvision.datasets as datasets
import torchvision.transforms as transforms
from torch.utils.data import DataLoader

from tqdm import tqdm
import schedulefree

from kornia import augmentation as K
from kornia.augmentation import AugmentationSequential

  @torch.cuda.amp.custom_fwd(cast_inputs=torch.float32)


In [3]:
# Load a pre-trained DINOv2 model from Facebook Research using torch hub
model = torch.hub.load('facebookresearch/dinov2', 'dinov2_vitl14_lc')

Using cache found in C:\Users\User/.cache\torch\hub\facebookresearch_dinov2_main


In [4]:
# Freeze all model parameters to avoid updating them during training
for param in model.parameters():
    param.requires_grad = False

In [5]:
# Add a linear head to the model
num_classes = 525
model.linear_head = torch.nn.Linear(model.linear_head.in_features, num_classes)

# Enable training only for the linear head (parameters of the added layer)
for param in model.linear_head.parameters():
    param.requires_grad = True

## Set of models with the following parameters:
1. Fixed precision
2. Batch size of 256
4. No augmentations
5. 3 epochs

In [6]:
# Define a set of transformations to resize and convert images to tensors
transform = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.ToTensor(),
])

In [7]:
# Load training and validation datasets from the specified directories
train_dataset = datasets.ImageFolder('../dataset/train', transform=transform)
val_dataset = datasets.ImageFolder('../dataset/val', transform=transform)

# Create data loaders for training and validation datasets(with batch size = 256)
train_dataloader = torch.utils.data.DataLoader(
    train_dataset,
    batch_size=256,
    shuffle=True,
    num_workers=4,
    pin_memory=True
)

val_dataloader = torch.utils.data.DataLoader(
    val_dataset,
    batch_size=256,
    shuffle=False,
    num_workers=4,
    pin_memory=True
)


In [8]:
# Set the device to GPU if available, otherwise use CPU
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

# Define the loss function (cross-entropy) for classification tasks
criterion = nn.CrossEntropyLoss().to(device)

In [9]:
# function for calculating and displaying model accuracy
def check_accuracy(model, dataloader=val_dataloader, device=device, desc='Validation'):
    model.eval() # Set the model to evaluation mode
    val_loss = 0.0
    correct_top1 = 0 # Correct predictions for top-1 accuracy
    correct_top5 = 0 # Correct predictions for top-5 accuracy
    total = 0
    
    # Use tqdm for displaying progress during training
    dataloader_tqdm = tqdm(dataloader, desc=desc, leave=True)
    
    with torch.no_grad():
        for inputs, labels in dataloader_tqdm:
            inputs, labels = inputs.to(device), labels.to(device)
            outputs = model(inputs)
            loss = criterion(outputs, labels)
            val_loss += loss.item()
            
            _, predicted = torch.max(outputs, 1)
            total += labels.size(0)
            correct_top1 += (predicted == labels).sum().item()

            # For top-5 accuracy
            _, top5_pred = outputs.topk(5, 1, True, True)
            correct_top5 += top5_pred.eq(labels.view(-1, 1).expand_as(top5_pred)).sum().item()
            
            dataloader_tqdm.set_postfix(classes=correct_top1 / total, top1_acc=100 * correct_top1 / total, top5_acc=100 * correct_top5 / total)
    
    print(f'Validation Loss: {val_loss/len(dataloader):.4f}, Top-1 Accuracy: {100 * correct_top1 / total:.2f}%, Top-5 Accuracy: {100 * correct_top5 / total:.2f}%')

In [10]:
# train function without augmentations
def train_model_no_aug(model, num_epochs, optimizer):
    for epoch in range(num_epochs):
        model.train() # Set the model to training mode
        running_loss = 0.0
        # Use tqdm for displaying progress during training
        train_loader_tqdm = tqdm(train_dataloader, desc=f'Epoch [{epoch+1}/{num_epochs}]', leave=True)

        for inputs, labels in train_loader_tqdm:
            inputs, labels = inputs.to(device), labels.to(device)

            optimizer.zero_grad()
            outputs = model(inputs)
            loss = criterion(outputs, labels)
            loss.backward()
            optimizer.step()

            running_loss += loss.item()
            gpu_mem = torch.cuda.memory_reserved() / 1e9  # Get GPU memory usage in GB
            
            train_loader_tqdm.set_postfix(GPU_mem=f'{gpu_mem:.3f}G', loss=loss.item(), Instances=len(inputs), Size=inputs.shape[-1])

        print(f'Epoch [{epoch+1}/{num_epochs}], Loss: {running_loss/len(train_dataloader):.4f}')
        
    return model


In [11]:
# Training with Adam optimizer and no augmentations
mymodel_Adam_noAug = copy.deepcopy(model).to(device)
num_epochs = 3  # Set the number of epochs
lr = 2.5e-4
optimizer_Adam = torch.optim.Adam(mymodel_Adam_noAug.linear_head.parameters(), lr=lr)
mymodel_Adam_noAug = train_model_no_aug(mymodel_Adam_noAug, num_epochs, optimizer_Adam)
check_accuracy(mymodel_Adam_noAug, val_dataloader)


Epoch [1/3]: 100%|██████████████| 331/331 [17:01<00:00,  3.08s/it, GPU_mem=5.918G, Instances=155, Size=224, loss=0.084]


Epoch [1/3], Loss: 0.5182


Epoch [2/3]: 100%|██████████████| 331/331 [17:32<00:00,  3.18s/it, GPU_mem=5.918G, Instances=155, Size=224, loss=0.139]


Epoch [2/3], Loss: 0.0914


Epoch [3/3]: 100%|█████████████| 331/331 [18:00<00:00,  3.26s/it, GPU_mem=5.918G, Instances=155, Size=224, loss=0.0231]


Epoch [3/3], Loss: 0.0569


Validation: 100%|██████████████████████████| 11/11 [00:41<00:00,  3.81s/it, classes=0.984, top1_acc=98.4, top5_acc=100]

Validation Loss: 0.0641, Top-1 Accuracy: 98.40%, Top-5 Accuracy: 99.96%





In [12]:
# Save the trained model
state = {'net': mymodel_Adam_noAug.state_dict()}
torch.save(state, '../models/DINO/Dino_Adam_noAug_256.pth')

In [13]:
del mymodel_Adam_noAug

In [35]:
# Training with SGD optimizer and no augmentations (learning rate = 1e-4)
mymodel_SGD_noAug =  copy.deepcopy(model).to(device)
num_epochs = 3  # Set the number of epochs
lr = 1e-4
optimizer_SGD = torch.optim.SGD(mymodel_SGD_noAug.linear_head.parameters(), lr=lr)
mymodel_Adam_noAug = train_model_no_aug(mymodel_SGD_noAug, num_epochs, optimizer_SGD)
check_accuracy(mymodel_SGD_noAug, val_dataloader)


Epoch [1/3]: 100%|████████████████| 331/331 [16:58<00:00,  3.08s/it, GPU_mem=8.611G, Instances=155, Size=224, loss=6.2]


Epoch [1/3], Loss: 6.4051


Epoch [2/3]: 100%|███████████████| 331/331 [17:04<00:00,  3.10s/it, GPU_mem=8.611G, Instances=155, Size=224, loss=6.06]


Epoch [2/3], Loss: 6.1944


Epoch [3/3]: 100%|███████████████| 331/331 [17:02<00:00,  3.09s/it, GPU_mem=8.611G, Instances=155, Size=224, loss=5.83]


Epoch [3/3], Loss: 5.9937


Validation: 100%|█████████████████████████| 11/11 [00:39<00:00,  3.60s/it, classes=0.0232, top1_acc=2.32, top5_acc=7.7]

Validation Loss: 5.8874, Top-1 Accuracy: 2.32%, Top-5 Accuracy: 7.70%





In [36]:
# Save the trained model
state = {'net': mymodel_SGD_noAug.state_dict()}
torch.save(state, '../models/DINO/Dino_SGD_noAug_256.pth')

In [16]:
del mymodel_SGD_noAug

In [18]:
# Training with SGD optimizer and no augmentations (learning rate = 5e-3)
mymodel_SGD_noAug =  copy.deepcopy(model).to(device)
num_epochs = 3  # Set the number of epochs
lr = 5e-3
optimizer_SGD = torch.optim.SGD(mymodel_SGD_noAug.linear_head.parameters(), lr=lr)
mymodel_Adam_noAug = train_model_no_aug(mymodel_SGD_noAug, num_epochs, optimizer_SGD)
check_accuracy(mymodel_SGD_noAug, val_dataloader)


Epoch [1/3]: 100%|██████████████| 331/331 [17:12<00:00,  3.12s/it, GPU_mem=10.161G, Instances=155, Size=224, loss=1.24]


Epoch [1/3], Loss: 3.0608


Epoch [2/3]: 100%|█████████████| 331/331 [17:23<00:00,  3.15s/it, GPU_mem=10.161G, Instances=155, Size=224, loss=0.507]


Epoch [2/3], Loss: 0.7697


Epoch [3/3]: 100%|█████████████| 331/331 [17:14<00:00,  3.12s/it, GPU_mem=10.161G, Instances=155, Size=224, loss=0.393]


Epoch [3/3], Loss: 0.4446


Validation: 100%|█████████████████████████| 11/11 [00:39<00:00,  3.59s/it, classes=0.971, top1_acc=97.1, top5_acc=99.8]

Validation Loss: 0.3231, Top-1 Accuracy: 97.10%, Top-5 Accuracy: 99.85%





In [19]:
# Save the trained model
state = {'net': mymodel_SGD_noAug.state_dict()}
torch.save(state, '../models/DINO/Dino_SGD_noAug_256_lr5e-3.pth')

In [42]:
# Training with AdamW optimizer and no augmentations
mymodel_AdamW_noAug =  copy.deepcopy(model).to(device)
num_epochs = 3  # Set the number of epochs
lr = 2.5e-4
optimizer_AdamW = torch.optim.AdamW(mymodel_AdamW_noAug.linear_head.parameters(), lr=lr)
mymodel_AdamW_noAug = train_model_no_aug(mymodel_AdamW_noAug, num_epochs, optimizer_AdamW)
check_accuracy(mymodel_AdamW_noAug, val_dataloader)


Epoch [1/3]: 100%|████████████| 331/331 [17:07<00:00,  3.10s/it, GPU_mem=10.052G, Instances=155, Size=224, loss=0.0834]


Epoch [1/3], Loss: 0.5130


Epoch [2/3]: 100%|████████████| 331/331 [17:05<00:00,  3.10s/it, GPU_mem=10.052G, Instances=155, Size=224, loss=0.0427]


Epoch [2/3], Loss: 0.0893


Epoch [3/3]: 100%|████████████| 331/331 [17:12<00:00,  3.12s/it, GPU_mem=10.052G, Instances=155, Size=224, loss=0.0682]


Epoch [3/3], Loss: 0.0581


Validation: 100%|█████████████████████████| 11/11 [00:41<00:00,  3.78s/it, classes=0.986, top1_acc=98.6, top5_acc=99.9]

Validation Loss: 0.0684, Top-1 Accuracy: 98.55%, Top-5 Accuracy: 99.92%





In [43]:
# Save the trained model
state = {'net': mymodel_AdamW_noAug.state_dict()}
torch.save(state, '../models/DINO/Dino_AdamW_noAug_256.pth')

In [44]:
del mymodel_AdamW_noAug

In [49]:
# Training with RMSProp optimizer and no augmentations
mymodel_RMSProp_noAug =  copy.deepcopy(model).to(device)
num_epochs = 3  # Set the number of epochs
lr = 2.5e-4
optimizer_RMSProp = torch.optim.RMSprop(mymodel_RMSProp_noAug.linear_head.parameters(), lr=lr)
mymodel_RMSProp_noAug = train_model_no_aug(mymodel_RMSProp_noAug, num_epochs, optimizer_RMSProp)
check_accuracy(mymodel_RMSProp_noAug, val_dataloader)


Epoch [1/3]: 100%|█████████████| 331/331 [17:12<00:00,  3.12s/it, GPU_mem=9.926G, Instances=155, Size=224, loss=0.0775]


Epoch [1/3], Loss: 0.3579


Epoch [2/3]: 100%|██████████████| 331/331 [17:20<00:00,  3.14s/it, GPU_mem=9.926G, Instances=155, Size=224, loss=0.143]


Epoch [2/3], Loss: 0.0890


Epoch [3/3]: 100%|██████████████| 331/331 [17:10<00:00,  3.11s/it, GPU_mem=9.926G, Instances=155, Size=224, loss=0.069]


Epoch [3/3], Loss: 0.0582


Validation: 100%|█████████████████████████| 11/11 [00:40<00:00,  3.65s/it, classes=0.978, top1_acc=97.8, top5_acc=99.9]

Validation Loss: 0.0845, Top-1 Accuracy: 97.83%, Top-5 Accuracy: 99.92%





In [50]:
# Save the trained model
state = {'net': mymodel_RMSProp_noAug.state_dict()}
torch.save(state, '../models/DINO/Dino_RMSProp_noAug_256.pth')

In [51]:
del mymodel_RMSProp_noAug

In [12]:
# Training with NAdam optimizer and no augmentations
mymodel_NAdam_noAug =  copy.deepcopy(model).to(device)
num_epochs = 3  # Set the number of epochs
lr = 2.5e-4
optimizer_NAdam = torch.optim.NAdam(mymodel_NAdam_noAug.linear_head.parameters(), lr=lr)
mymodel_Adam_noAug = train_model_no_aug(mymodel_NAdam_noAug, num_epochs, optimizer_NAdam)
check_accuracy(mymodel_NAdam_noAug, val_dataloader)


Epoch [1/3]: 100%|██████████████| 331/331 [17:16<00:00,  3.13s/it, GPU_mem=5.918G, Instances=155, Size=224, loss=0.131]


Epoch [1/3], Loss: 0.5554


Epoch [2/3]: 100%|██████████████| 331/331 [17:08<00:00,  3.11s/it, GPU_mem=5.918G, Instances=155, Size=224, loss=0.084]


Epoch [2/3], Loss: 0.0894


Epoch [3/3]: 100%|█████████████| 331/331 [17:02<00:00,  3.09s/it, GPU_mem=5.918G, Instances=155, Size=224, loss=0.0567]


Epoch [3/3], Loss: 0.0542


Validation: 100%|██████████████████████████| 11/11 [00:39<00:00,  3.58s/it, classes=0.983, top1_acc=98.3, top5_acc=100]

Validation Loss: 0.0583, Top-1 Accuracy: 98.32%, Top-5 Accuracy: 99.96%





In [13]:
# Save the trained model
state = {'net': mymodel_NAdam_noAug.state_dict()}
torch.save(state, '../models/DINO/Dino_NAdam_noAug_256.pth')

In [14]:
del mymodel_NAdam_noAug

In [15]:
# Training with RAdam optimizer and no augmentations
mymodel_RAdam_noAug = copy.deepcopy(model).to(device)
num_epochs = 3  # Set the number of epochs
lr = 2.5e-4
optimizer_RAdam = torch.optim.Adam(mymodel_RAdam_noAug.linear_head.parameters(), lr=lr)
mymodel_Adam_noAug = train_model_no_aug(mymodel_RAdam_noAug, num_epochs, optimizer_RAdam)
check_accuracy(mymodel_RAdam_noAug, val_dataloader)


Epoch [1/3]: 100%|██████████████| 331/331 [17:03<00:00,  3.09s/it, GPU_mem=6.998G, Instances=155, Size=224, loss=0.119]


Epoch [1/3], Loss: 0.5126


Epoch [2/3]: 100%|██████████████| 331/331 [17:14<00:00,  3.13s/it, GPU_mem=6.998G, Instances=155, Size=224, loss=0.114]


Epoch [2/3], Loss: 0.0902


Epoch [3/3]: 100%|█████████████| 331/331 [17:16<00:00,  3.13s/it, GPU_mem=6.998G, Instances=155, Size=224, loss=0.0424]


Epoch [3/3], Loss: 0.0579


Validation: 100%|█████████████████████████| 11/11 [00:41<00:00,  3.79s/it, classes=0.982, top1_acc=98.2, top5_acc=99.9]

Validation Loss: 0.0641, Top-1 Accuracy: 98.25%, Top-5 Accuracy: 99.92%





In [16]:
# Save the trained model
state = {'net': mymodel_RAdam_noAug.state_dict()}
torch.save(state, '../models/DINO/Dino_RAdam_noAug_256.pth')

In [17]:
del mymodel_RAdam_noAug

In [24]:
# Training with AdamWScheduleFree optimizer and no augmentations
mymodel_AdamWScheduleFree_noAug = copy.deepcopy(model).to(device)
num_epochs = 3  # Set the number of epochs
lr = 2.5e-4
optimizer_AdamWScheduleFree = schedulefree.AdamWScheduleFree(mymodel_AdamWScheduleFree_noAug.linear_head.parameters(), lr=lr)
optimizer_AdamWScheduleFree.train()
mymodel_AdamWScheduleFree_noAug = train_model_no_aug(mymodel_AdamWScheduleFree_noAug, num_epochs, optimizer_AdamWScheduleFree)

Epoch [1/3]: 100%|██████████████| 331/331 [17:12<00:00,  3.12s/it, GPU_mem=8.716G, Instances=155, Size=224, loss=0.179]


Epoch [1/3], Loss: 0.6211


Epoch [2/3]: 100%|██████████████| 331/331 [17:19<00:00,  3.14s/it, GPU_mem=8.716G, Instances=155, Size=224, loss=0.118]


Epoch [2/3], Loss: 0.0839


Epoch [3/3]: 100%|█████████████| 331/331 [17:14<00:00,  3.12s/it, GPU_mem=8.716G, Instances=155, Size=224, loss=0.0295]

Epoch [3/3], Loss: 0.0502





In [25]:
# Evaluating accuracy with AdamWScheduleFree
optimizer_AdamWScheduleFree.eval()
check_accuracy(mymodel_AdamWScheduleFree_noAug, val_dataloader)

Validation: 100%|██████████████████████████| 11/11 [00:39<00:00,  3.56s/it, classes=0.991, top1_acc=99.1, top5_acc=100]

Validation Loss: 0.0532, Top-1 Accuracy: 99.12%, Top-5 Accuracy: 99.96%





In [26]:
# Save the trained model
state = {'net': mymodel_AdamWScheduleFree_noAug.state_dict()}
torch.save(state, '../models/DINO/Dino_AdamWScheduleFree_noAug_256.pth')

In [27]:
del mymodel_AdamWScheduleFree_noAug

In [28]:
# Training with SGDScheduleFree optimizer and no augmentations(learning rate = 2.5e-4)
mymodel_SGDScheduleFree_noAug = copy.deepcopy(model).to(device)
num_epochs = 3  # Set the number of epochs
lr = 2.5e-4
optimizer_SGDScheduleFree = schedulefree.SGDScheduleFree(mymodel_SGDScheduleFree_noAug.linear_head.parameters(), lr=lr)
optimizer_SGDScheduleFree.train()
mymodel_SGDScheduleFree_noAug = train_model_no_aug(mymodel_SGDScheduleFree_noAug, num_epochs, optimizer_SGDScheduleFree)
optimizer_SGDScheduleFree.eval()
check_accuracy(mymodel_SGDScheduleFree_noAug, val_dataloader)

Epoch [1/3]: 100%|███████████████| 331/331 [17:08<00:00,  3.11s/it, GPU_mem=8.334G, Instances=155, Size=224, loss=6.15]


Epoch [1/3], Loss: 6.3317


Epoch [2/3]: 100%|███████████████| 331/331 [17:10<00:00,  3.11s/it, GPU_mem=8.334G, Instances=155, Size=224, loss=5.96]


Epoch [2/3], Loss: 6.0437


Epoch [3/3]: 100%|████████████████| 331/331 [17:12<00:00,  3.12s/it, GPU_mem=8.334G, Instances=155, Size=224, loss=5.6]


Epoch [3/3], Loss: 5.7724


Validation: 100%|████████████████████████| 11/11 [00:39<00:00,  3.58s/it, classes=0.0533, top1_acc=5.33, top5_acc=15.1]

Validation Loss: 5.6905, Top-1 Accuracy: 5.33%, Top-5 Accuracy: 15.09%





In [29]:
# Save the trained model
state = {'net': mymodel_SGDScheduleFree_noAug.state_dict()}
torch.save(state, '../models/DINO/Dino_SGDScheduleFree_noAug_256.pth')

In [None]:
del mymodel_SGDScheduleFree_noAug

In [11]:
# Training with SGDScheduleFree optimizer and no augmentations(learning rate = 1e-3)
mymodel_SGDScheduleFree_noAug = copy.deepcopy(model).to(device)
num_epochs = 3  # Set the number of epochs
lr = 1e-3
optimizer_SGDScheduleFree = schedulefree.SGDScheduleFree(mymodel_SGDScheduleFree_noAug.linear_head.parameters(), lr=lr)
optimizer_SGDScheduleFree.train()
mymodel_SGDScheduleFree_noAug = train_model_no_aug(mymodel_SGDScheduleFree_noAug, num_epochs, optimizer_SGDScheduleFree)
optimizer_SGDScheduleFree.eval()
check_accuracy(mymodel_SGDScheduleFree_noAug, val_dataloader)

Epoch [1/3]: 100%|███████████████| 331/331 [17:13<00:00,  3.12s/it, GPU_mem=5.918G, Instances=155, Size=224, loss=5.37]


Epoch [1/3], Loss: 5.9340


Epoch [2/3]: 100%|███████████████| 331/331 [17:09<00:00,  3.11s/it, GPU_mem=5.918G, Instances=155, Size=224, loss=4.51]


Epoch [2/3], Loss: 4.9130


Epoch [3/3]: 100%|███████████████| 331/331 [17:13<00:00,  3.12s/it, GPU_mem=5.918G, Instances=155, Size=224, loss=3.54]


Epoch [3/3], Loss: 3.9930


Validation: 100%|█████████████████████████| 11/11 [00:39<00:00,  3.57s/it, classes=0.528, top1_acc=52.8, top5_acc=88.9]

Validation Loss: 3.7607, Top-1 Accuracy: 52.76%, Top-5 Accuracy: 88.91%





In [12]:
# Save the trained model
state = {'net': mymodel_SGDScheduleFree_noAug.state_dict()}
torch.save(state, '../models/DINO/Dino_SGDScheduleFree_noAug_256_lr_1e-3.pth')

In [13]:
del mymodel_SGDScheduleFree_noAug

In [22]:
# Training with SGDScheduleFree optimizer and no augmentations(learning rate = 5e-3)
mymodel_SGDScheduleFree_noAug = copy.deepcopy(model).to(device)
num_epochs = 3  # Set the number of epochs
lr = 5e-3
optimizer_SGDScheduleFree = schedulefree.SGDScheduleFree(mymodel_SGDScheduleFree_noAug.linear_head.parameters(), lr=lr)
optimizer_SGDScheduleFree.train()
mymodel_SGDScheduleFree_noAug = train_model_no_aug(mymodel_SGDScheduleFree_noAug, num_epochs, optimizer_SGDScheduleFree)
optimizer_SGDScheduleFree.eval()
check_accuracy(mymodel_SGDScheduleFree_noAug, val_dataloader)

Epoch [1/3]: 100%|████████████████| 331/331 [17:49<00:00,  3.23s/it, GPU_mem=6.998G, Instances=155, Size=224, loss=2.4]


Epoch [1/3], Loss: 4.1640


Epoch [2/3]: 100%|██████████████| 331/331 [17:47<00:00,  3.23s/it, GPU_mem=6.998G, Instances=155, Size=224, loss=0.861]


Epoch [2/3], Loss: 1.4026


Epoch [3/3]: 100%|██████████████| 331/331 [17:47<00:00,  3.23s/it, GPU_mem=6.998G, Instances=155, Size=224, loss=0.516]


Epoch [3/3], Loss: 0.6417


Validation: 100%|█████████████████████████| 11/11 [00:41<00:00,  3.79s/it, classes=0.955, top1_acc=95.5, top5_acc=99.7]

Validation Loss: 0.5065, Top-1 Accuracy: 95.47%, Top-5 Accuracy: 99.66%





In [23]:
# Save the trained model
state = {'net': mymodel_SGDScheduleFree_noAug.state_dict()}
torch.save(state, '../models/DINO/Dino_SGDScheduleFree_noAug_256_lr_5e-3.pth')

In [13]:
del mymodel_SGDScheduleFree_noAug

## Set of models with the following parameters:
1. Automatic mixed precision
2. Batch size of 64
4. Kornia augmentations (see specifics below)
5. 3 epochs

In [25]:
# Create data loaders for training and validation datasets(with batch size = 128)
train_dataloader = torch.utils.data.DataLoader(
    train_dataset,
    batch_size=128,
    shuffle=True,
    num_workers=4,
    pin_memory=True
)

val_dataloader = torch.utils.data.DataLoader(
    val_dataset,
    batch_size=128,
    shuffle=False,
    num_workers=4,
    pin_memory=True
)


In [26]:
# Create an augmentation pipeline using Kornia's AugmentationSequential
aug = AugmentationSequential(
    # Apply a random rotation up to 45 degrees with a 30% probability
    K.RandomRotation(degrees=45.0, p=0.3),
    # Apply a random horizontal flip with a 30% probability
    K.RandomHorizontalFlip(p=0.3),
    # Apply a random vertical flip with a 30% probability
    K.RandomVerticalFlip(p=0.3),
    # Apply a random affine transformation with rotation up to 30 degrees with a 30% probability
    K.RandomAffine(degrees=30.0, p=0.3)
).to(device)

  @torch.cuda.amp.custom_fwd(cast_inputs=torch.float32)


In [28]:
use_amp = True
scaler = torch.amp.GradScaler(device,enabled=use_amp)
# train function with augmentations
def train_model_aug(model, num_epochs, optimizer):
    for epoch in range(num_epochs):
        model.train()# Set the model to training mode
        running_loss = 0.0
        
        # Use tqdm to create a progress bar for the training loop
        train_loader_tqdm = tqdm(train_dataloader, desc=f'Epoch [{epoch+1}/{num_epochs}]', leave=True)
        
        for inputs, labels in train_loader_tqdm:
            inputs, labels = aug(inputs.to(device)), labels.to(device)

            optimizer.zero_grad(set_to_none=True)
            with torch.cuda.amp.autocast(enabled=use_amp):
                outputs = model(inputs)
                loss = criterion(outputs, labels)
                
            scaler.scale(loss).backward()
            scaler.step(optimizer)
            scaler.update()

            running_loss += loss.item()
            gpu_mem = torch.cuda.memory_reserved() / 1e9  # Get GPU memory usage in GB
            
            train_loader_tqdm.set_postfix(GPU_mem=f'{gpu_mem:.3f}G', loss=loss.item(), Instances=len(inputs), Size=inputs.shape[-1])

        print(f'Epoch [{epoch+1}/{num_epochs}], Loss: {running_loss/len(train_dataloader):.4f}')
        
    return model


In [21]:
# Training with Adam optimizer and augmentations
mymodel_Adam_Aug = copy.deepcopy(model).to(device)
num_epochs = 3  # Set the number of epochs
lr = 2.5e-4
optimizer_Adam = torch.optim.Adam(mymodel_Adam_Aug.linear_head.parameters(), lr=lr)
mymodel_Adam_Aug = train_model_aug(mymodel_Adam_Aug, num_epochs, optimizer_Adam)
check_accuracy(mymodel_Adam_Aug, val_dataloader)


  with torch.cuda.amp.autocast(enabled=use_amp):
Epoch [1/3]: 100%|███████████████| 662/662 [06:02<00:00,  1.82it/s, GPU_mem=4.335G, Instances=27, Size=224, loss=0.271]


Epoch [1/3], Loss: 0.5038


Epoch [2/3]: 100%|██████████████| 662/662 [06:03<00:00,  1.82it/s, GPU_mem=4.335G, Instances=27, Size=224, loss=0.0466]


Epoch [2/3], Loss: 0.1194


Epoch [3/3]: 100%|██████████████| 662/662 [06:03<00:00,  1.82it/s, GPU_mem=4.335G, Instances=27, Size=224, loss=0.0496]


Epoch [3/3], Loss: 0.0890


Validation: 100%|██████████████████████████| 21/21 [00:41<00:00,  1.97s/it, classes=0.983, top1_acc=98.3, top5_acc=100]

Validation Loss: 0.0729, Top-1 Accuracy: 98.29%, Top-5 Accuracy: 100.00%





In [22]:
# Save the trained model
state = {'net': mymodel_Adam_Aug.state_dict()}
torch.save(state, '../models/DINO/Dino_Adam_Aug_128.pth')

In [23]:
del mymodel_Adam_Aug

In [26]:
# Training with SGD optimizer and augmentations(learning rate = 2.5e-4)
mymodel_SGD_Aug =  copy.deepcopy(model).to(device)
num_epochs = 3  # Set the number of epochs
lr = 2.5e-4
optimizer_SGD = torch.optim.SGD(mymodel_SGD_Aug.linear_head.parameters(), lr=lr)
mymodel_Adam_Aug = train_model_aug(mymodel_SGD_Aug, num_epochs, optimizer_SGD)
check_accuracy(mymodel_SGD_Aug, val_dataloader)

  with torch.cuda.amp.autocast(enabled=use_amp):
Epoch [1/3]: 100%|███████████████| 331/331 [06:00<00:00,  1.09s/it, GPU_mem=6.426G, Instances=155, Size=224, loss=6.06]


Epoch [1/3], Loss: 6.2466


Epoch [2/3]: 100%|███████████████| 331/331 [06:06<00:00,  1.11s/it, GPU_mem=6.426G, Instances=155, Size=224, loss=5.56]


Epoch [2/3], Loss: 5.8120


Epoch [3/3]: 100%|████████████████| 331/331 [06:03<00:00,  1.10s/it, GPU_mem=6.426G, Instances=155, Size=224, loss=5.3]


Epoch [3/3], Loss: 5.4059


Validation: 100%|█████████████████████████| 11/11 [00:42<00:00,  3.85s/it, classes=0.209, top1_acc=20.9, top5_acc=44.8]

Validation Loss: 5.1485, Top-1 Accuracy: 20.91%, Top-5 Accuracy: 44.84%





In [28]:
# Save the trained model
state = {'net': mymodel_SGD_Aug.state_dict()}
torch.save(state, '../models/DINO/Dino_SGD_Aug_128.pth')

In [29]:
del mymodel_SGD_Aug

In [31]:
# Training with SGD optimizer and augmentations(learning rate = 5e-3)
mymodel_SGD_Aug =  copy.deepcopy(model).to(device)
num_epochs = 3  # Set the number of epochs
lr = 5e-3
optimizer_SGD = torch.optim.SGD(mymodel_SGD_Aug.linear_head.parameters(), lr=lr)
mymodel_Adam_Aug = train_model_aug(mymodel_SGD_Aug, num_epochs, optimizer_SGD)
check_accuracy(mymodel_SGD_Aug, val_dataloader)


  with torch.cuda.amp.autocast(enabled=use_amp):
Epoch [1/3]: 100%|███████████████| 662/662 [06:02<00:00,  1.82it/s, GPU_mem=8.080G, Instances=27, Size=224, loss=0.639]


Epoch [1/3], Loss: 2.1978


Epoch [2/3]: 100%|███████████████| 662/662 [06:03<00:00,  1.82it/s, GPU_mem=8.080G, Instances=27, Size=224, loss=0.277]


Epoch [2/3], Loss: 0.4907


Epoch [3/3]: 100%|███████████████| 662/662 [06:03<00:00,  1.82it/s, GPU_mem=8.080G, Instances=27, Size=224, loss=0.135]


Epoch [3/3], Loss: 0.3217


Validation: 100%|█████████████████████████| 21/21 [00:41<00:00,  1.96s/it, classes=0.978, top1_acc=97.8, top5_acc=99.8]

Validation Loss: 0.1787, Top-1 Accuracy: 97.79%, Top-5 Accuracy: 99.85%





In [32]:
# Save the trained model
state = {'net': mymodel_SGD_Aug.state_dict()}
torch.save(state, '../models/DINO/Dino_SGD_Aug_128_lr_5e-3.pth')

In [30]:
del mymodel_SGD_Aug

In [31]:
# Training with AdamW optimizer and augmentations
mymodel_AdamW_Aug =  copy.deepcopy(model).to(device)
num_epochs = 3  # Set the number of epochs
lr = 2.5e-4
optimizer_AdamW = torch.optim.AdamW(mymodel_AdamW_Aug.linear_head.parameters(), lr=lr)
mymodel_AdamW_Aug = train_model_aug(mymodel_AdamW_Aug, num_epochs, optimizer_AdamW)
check_accuracy(mymodel_AdamW_Aug, val_dataloader)


  with torch.cuda.amp.autocast(enabled=use_amp):
Epoch [1/3]: 100%|███████████████| 662/662 [06:03<00:00,  1.82it/s, GPU_mem=4.444G, Instances=27, Size=224, loss=0.144]


Epoch [1/3], Loss: 0.4990


Epoch [2/3]: 100%|███████████████| 662/662 [06:04<00:00,  1.82it/s, GPU_mem=4.444G, Instances=27, Size=224, loss=0.376]


Epoch [2/3], Loss: 0.1202


Epoch [3/3]: 100%|█████████████| 662/662 [06:03<00:00,  1.82it/s, GPU_mem=4.444G, Instances=27, Size=224, loss=0.00846]


Epoch [3/3], Loss: 0.0899


Validation: 100%|██████████████████████████| 21/21 [00:41<00:00,  1.97s/it, classes=0.981, top1_acc=98.1, top5_acc=100]

Validation Loss: 0.0763, Top-1 Accuracy: 98.10%, Top-5 Accuracy: 100.00%





In [32]:
# Save the trained model
state = {'net': mymodel_AdamW_Aug.state_dict()}
torch.save(state, '../models/DINO/Dino_AdamW_Aug_128.pth')

In [33]:
del mymodel_AdamW_Aug

In [34]:
# Training with RMSProp optimizer and augmentations
mymodel_RMSProp_Aug =  copy.deepcopy(model).to(device)
num_epochs = 3  # Set the number of epochs
lr = 2.5e-4
optimizer_RMSProp = torch.optim.RMSprop(mymodel_RMSProp_Aug.linear_head.parameters(), lr=lr)
mymodel_RMSProp_Aug = train_model_aug(mymodel_RMSProp_Aug, num_epochs, optimizer_RMSProp)
check_accuracy(mymodel_RMSProp_Aug, val_dataloader)


  with torch.cuda.amp.autocast(enabled=use_amp):
Epoch [1/3]: 100%|██████████████| 662/662 [06:03<00:00,  1.82it/s, GPU_mem=5.929G, Instances=27, Size=224, loss=0.0192]


Epoch [1/3], Loss: 0.4438


Epoch [2/3]: 100%|███████████████| 662/662 [06:04<00:00,  1.82it/s, GPU_mem=5.929G, Instances=27, Size=224, loss=0.111]


Epoch [2/3], Loss: 0.1227


Epoch [3/3]: 100%|██████████████| 662/662 [06:08<00:00,  1.80it/s, GPU_mem=5.929G, Instances=27, Size=224, loss=0.0161]


Epoch [3/3], Loss: 0.0904


Validation: 100%|█████████████████████████| 21/21 [00:41<00:00,  1.96s/it, classes=0.977, top1_acc=97.7, top5_acc=99.9]

Validation Loss: 0.0813, Top-1 Accuracy: 97.68%, Top-5 Accuracy: 99.92%





In [35]:
# Save the trained model
state = {'net': mymodel_RMSProp_Aug.state_dict()}
torch.save(state, '../models/DINO/Dino_RMSProp_Aug_128.pth')

In [36]:
del mymodel_RMSProp_Aug

In [37]:
# Training with NAdam optimizer and augmentations
mymodel_NAdam_Aug =  copy.deepcopy(model).to(device)
num_epochs = 3  # Set the number of epochs
lr = 2.5e-4
optimizer_NAdam = torch.optim.NAdam(mymodel_NAdam_Aug.linear_head.parameters(), lr=lr)
mymodel_Adam_Aug = train_model_aug(mymodel_NAdam_Aug, num_epochs, optimizer_NAdam)
check_accuracy(mymodel_NAdam_Aug, val_dataloader)


  with torch.cuda.amp.autocast(enabled=use_amp):
Epoch [1/3]: 100%|██████████████| 662/662 [06:13<00:00,  1.77it/s, GPU_mem=5.929G, Instances=27, Size=224, loss=0.0525]


Epoch [1/3], Loss: 0.5091


Epoch [2/3]: 100%|███████████████| 662/662 [06:06<00:00,  1.80it/s, GPU_mem=5.929G, Instances=27, Size=224, loss=0.238]


Epoch [2/3], Loss: 0.1145


Epoch [3/3]: 100%|██████████████| 662/662 [06:03<00:00,  1.82it/s, GPU_mem=5.929G, Instances=27, Size=224, loss=0.0498]


Epoch [3/3], Loss: 0.0832


Validation: 100%|██████████████████████████| 21/21 [00:41<00:00,  1.96s/it, classes=0.983, top1_acc=98.3, top5_acc=100]

Validation Loss: 0.0685, Top-1 Accuracy: 98.29%, Top-5 Accuracy: 99.96%





In [39]:
# Save the trained model
state = {'net': mymodel_NAdam_Aug.state_dict()}
torch.save(state, '../models/DINO/Dino_NAdam_Aug_128.pth')

In [40]:
del mymodel_NAdam_Aug

In [42]:
# Training with RAdam optimizer and augmentations
mymodel_RAdam_Aug = copy.deepcopy(model).to(device)
num_epochs = 3  # Set the number of epochs
lr = 2.5e-4
optimizer_RAdam = torch.optim.Adam(mymodel_RAdam_Aug.linear_head.parameters(), lr=lr)
mymodel_Adam_Aug = train_model_aug(mymodel_RAdam_Aug, num_epochs, optimizer_RAdam)
check_accuracy(mymodel_RAdam_Aug, val_dataloader)

  with torch.cuda.amp.autocast(enabled=use_amp):
Epoch [1/3]: 100%|██████████████| 662/662 [06:07<00:00,  1.80it/s, GPU_mem=4.618G, Instances=27, Size=224, loss=0.0573]


Epoch [1/3], Loss: 0.4995


Epoch [2/3]: 100%|███████████████| 662/662 [06:04<00:00,  1.82it/s, GPU_mem=4.618G, Instances=27, Size=224, loss=0.105]


Epoch [2/3], Loss: 0.1193


Epoch [3/3]: 100%|██████████████| 662/662 [06:01<00:00,  1.83it/s, GPU_mem=4.618G, Instances=27, Size=224, loss=0.0305]


Epoch [3/3], Loss: 0.0874


Validation: 100%|██████████████████████████| 21/21 [00:40<00:00,  1.95s/it, classes=0.977, top1_acc=97.7, top5_acc=100]

Validation Loss: 0.0734, Top-1 Accuracy: 97.68%, Top-5 Accuracy: 99.96%





In [43]:
# Save the trained model
state = {'net': mymodel_RAdam_Aug.state_dict()}
torch.save(state, '../models/DINO/Dino_RAdam_Aug_128.pth')

In [44]:
del mymodel_RAdam_Aug

In [45]:
# Training with AdamWScheduleFree optimizer and augmentations
mymodel_AdamWScheduleFree_Aug = copy.deepcopy(model).to(device)
num_epochs = 3  # Set the number of epochs
lr = 2.5e-4
optimizer_AdamWScheduleFree = schedulefree.AdamWScheduleFree(mymodel_AdamWScheduleFree_Aug.linear_head.parameters(), lr=lr)
optimizer_AdamWScheduleFree.train()
mymodel_AdamWScheduleFree_Aug = train_model_aug(mymodel_AdamWScheduleFree_Aug, num_epochs, optimizer_AdamWScheduleFree)
optimizer_AdamWScheduleFree.eval()
check_accuracy(mymodel_AdamWScheduleFree_Aug, val_dataloader)

  with torch.cuda.amp.autocast(enabled=use_amp):
Epoch [1/3]: 100%|███████████████| 662/662 [06:01<00:00,  1.83it/s, GPU_mem=6.101G, Instances=27, Size=224, loss=0.123]


Epoch [1/3], Loss: 0.5601


Epoch [2/3]: 100%|███████████████| 662/662 [06:01<00:00,  1.83it/s, GPU_mem=6.101G, Instances=27, Size=224, loss=0.104]


Epoch [2/3], Loss: 0.0940


Epoch [3/3]: 100%|██████████████| 662/662 [06:00<00:00,  1.84it/s, GPU_mem=6.101G, Instances=27, Size=224, loss=0.0779]


Epoch [3/3], Loss: 0.0605


Validation: 100%|██████████████████████████| 21/21 [00:40<00:00,  1.95s/it, classes=0.991, top1_acc=99.1, top5_acc=100]

Validation Loss: 0.0458, Top-1 Accuracy: 99.09%, Top-5 Accuracy: 99.96%





In [46]:
# Save the trained model
state = {'net': mymodel_AdamWScheduleFree_Aug.state_dict()}
torch.save(state, '../models/DINO/Dino_AdamWScheduleFree_Aug_128.pth')

In [47]:
del mymodel_AdamWScheduleFree_Aug

In [48]:
# Training with SGDScheduleFree optimizer and augmentations(learning rate = 2.5e-4)
mymodel_SGDScheduleFree_Aug = copy.deepcopy(model).to(device)
num_epochs = 3  # Set the number of epochs
lr = 2.5e-4
optimizer_SGDScheduleFree = schedulefree.SGDScheduleFree(mymodel_SGDScheduleFree_Aug.linear_head.parameters(), lr=lr)
optimizer_SGDScheduleFree.train()
mymodel_SGDScheduleFree_Aug = train_model_aug(mymodel_SGDScheduleFree_Aug, num_epochs, optimizer_SGDScheduleFree)
optimizer_SGDScheduleFree.eval()
check_accuracy(mymodel_SGDScheduleFree_Aug, val_dataloader)

  with torch.cuda.amp.autocast(enabled=use_amp):
Epoch [1/3]: 100%|████████████████| 662/662 [06:00<00:00,  1.83it/s, GPU_mem=6.103G, Instances=27, Size=224, loss=5.88]


Epoch [1/3], Loss: 6.2244


Epoch [2/3]: 100%|████████████████| 662/662 [06:00<00:00,  1.83it/s, GPU_mem=6.103G, Instances=27, Size=224, loss=5.55]


Epoch [2/3], Loss: 5.7432


Epoch [3/3]: 100%|████████████████| 662/662 [06:00<00:00,  1.83it/s, GPU_mem=6.103G, Instances=27, Size=224, loss=5.07]


Epoch [3/3], Loss: 5.2954


Validation: 100%|█████████████████████████| 21/21 [00:41<00:00,  1.95s/it, classes=0.212, top1_acc=21.2, top5_acc=45.1]

Validation Loss: 5.1468, Top-1 Accuracy: 21.22%, Top-5 Accuracy: 45.10%





In [49]:
# Save the trained model
state = {'net': mymodel_SGDScheduleFree_Aug.state_dict()}
torch.save(state, '../models/DINO/Dino_SGDScheduleFree_Aug_128.pth')

In [50]:
del mymodel_SGDScheduleFree_Aug

In [52]:
# Training with SGDScheduleFree optimizer and augmentations(learning rate = 1e-3)
mymodel_SGDScheduleFree_Aug = copy.deepcopy(model).to(device)
num_epochs = 3  # Set the number of epochs
lr = 1e-3
optimizer_SGDScheduleFree = schedulefree.SGDScheduleFree(mymodel_SGDScheduleFree_Aug.linear_head.parameters(), lr=lr)
optimizer_SGDScheduleFree.train()
mymodel_SGDScheduleFree_Aug = train_model_aug(mymodel_SGDScheduleFree_Aug, num_epochs, optimizer_SGDScheduleFree)
optimizer_SGDScheduleFree.eval()
check_accuracy(mymodel_SGDScheduleFree_Aug, val_dataloader)

  with torch.cuda.amp.autocast(enabled=use_amp):
Epoch [1/3]: 100%|████████████████| 662/662 [06:11<00:00,  1.78it/s, GPU_mem=5.962G, Instances=27, Size=224, loss=4.45]


Epoch [1/3], Loss: 5.5427


Epoch [2/3]: 100%|████████████████| 662/662 [06:07<00:00,  1.80it/s, GPU_mem=5.962G, Instances=27, Size=224, loss=3.11]


Epoch [2/3], Loss: 3.9039


Epoch [3/3]: 100%|████████████████| 662/662 [06:05<00:00,  1.81it/s, GPU_mem=5.962G, Instances=27, Size=224, loss=2.34]


Epoch [3/3], Loss: 2.6445


Validation: 100%|█████████████████████████| 21/21 [00:41<00:00,  1.97s/it, classes=0.735, top1_acc=73.5, top5_acc=97.4]

Validation Loss: 2.2185, Top-1 Accuracy: 73.49%, Top-5 Accuracy: 97.37%





In [53]:
# Save the trained model
state = {'net': mymodel_SGDScheduleFree_Aug.state_dict()}
torch.save(state, '../models/DINO/Dino_SGDScheduleFree_Aug_128_lr_1e-3.pth')

In [54]:
del mymodel_SGDScheduleFree_Aug

In [55]:
# Training with SGDScheduleFree optimizer and augmentations(learning rate = 5e-3)
mymodel_SGDScheduleFree_Aug = copy.deepcopy(model).to(device)
num_epochs = 3  # Set the number of epochs
lr = 5e-3
optimizer_SGDScheduleFree = schedulefree.SGDScheduleFree(mymodel_SGDScheduleFree_Aug.linear_head.parameters(), lr=lr)
optimizer_SGDScheduleFree.train()
mymodel_SGDScheduleFree_Aug = train_model_aug(mymodel_SGDScheduleFree_Aug, num_epochs, optimizer_SGDScheduleFree)
optimizer_SGDScheduleFree.eval()
check_accuracy(mymodel_SGDScheduleFree_Aug, val_dataloader)

  with torch.cuda.amp.autocast(enabled=use_amp):
Epoch [1/3]: 100%|████████████████| 662/662 [06:08<00:00,  1.80it/s, GPU_mem=7.445G, Instances=27, Size=224, loss=1.59]


Epoch [1/3], Loss: 3.0719


Epoch [2/3]: 100%|███████████████| 662/662 [06:04<00:00,  1.82it/s, GPU_mem=7.445G, Instances=27, Size=224, loss=0.299]


Epoch [2/3], Loss: 0.6983


Epoch [3/3]: 100%|███████████████| 662/662 [06:07<00:00,  1.80it/s, GPU_mem=7.445G, Instances=27, Size=224, loss=0.331]


Epoch [3/3], Loss: 0.3821


Validation: 100%|████████████████████████████| 21/21 [00:41<00:00,  1.97s/it, classes=0.97, top1_acc=97, top5_acc=99.8]

Validation Loss: 0.2389, Top-1 Accuracy: 96.99%, Top-5 Accuracy: 99.77%





In [56]:
# Save the trained model
state = {'net': mymodel_SGDScheduleFree_Aug.state_dict()}
torch.save(state, '../models/DINO/Dino_SGDScheduleFree_Aug_128_lr_5e-3.pth')