# Notebook for DINOv2 models with learning rate of 5e-4

In [1]:
import copy
import os

import torch
import torchvision
from torch import nn
import torch
import torch.nn as nn
import torchvision.datasets as datasets
import torchvision.transforms as transforms
from torch.utils.data import DataLoader


from tqdm import tqdm

import schedulefree

from kornia import augmentation as K
from kornia.augmentation import AugmentationSequential

  @torch.cuda.amp.custom_fwd(cast_inputs=torch.float32)


In [3]:
# Load a pre-trained DINOv2 model from Facebook Research using torch hub
model = torch.hub.load('facebookresearch/dinov2', 'dinov2_vitl14_lc')

Using cache found in C:\Users\User/.cache\torch\hub\facebookresearch_dinov2_main


In [4]:
# Freeze all model parameters to avoid updating them during training
for param in model.parameters():
    param.requires_grad = False

In [5]:
# Add a linear head to the model
num_classes = 525
model.linear_head = torch.nn.Linear(model.linear_head.in_features, num_classes)

# Enable training only for the linear head (parameters of the added layers)
for param in model.linear_head.parameters():
    param.requires_grad = True

In [6]:
# Define a set of transformations to resize and convert images to tensors
transform = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.ToTensor(),
])

In [7]:
# Load training and validation datasets from the specified directories
train_dataset = datasets.ImageFolder('../dataset/train', transform=transform)
val_dataset = datasets.ImageFolder('../dataset/val', transform=transform)

# Create data loaders for training and validation datasets(with batch size = 128)
train_dataloader = torch.utils.data.DataLoader(
    train_dataset,
    batch_size=128,
    shuffle=True,
    num_workers=4,
    pin_memory=True
)

val_dataloader = torch.utils.data.DataLoader(
    val_dataset,
    batch_size=128,
    shuffle=False,
    num_workers=4,
    pin_memory=True
)


In [8]:
# Set the device to GPU if available, otherwise use CPU
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

# Define the loss function (cross-entropy) for classification tasks
criterion = nn.CrossEntropyLoss().to(device)

In [9]:
# function for calculating and displaying model accuracy
def check_accuracy(model, dataloader=val_dataloader, device=device, desc='Validation'):
    model.eval() # Set the model to evaluation mode
    val_loss = 0.0
    correct_top1 = 0 # Correct predictions for top-1 accuracy
    correct_top5 = 0 # Correct predictions for top-5 accuracy
    total = 0
    
    # Use tqdm for displaying progress during validation
    dataloader_tqdm = tqdm(dataloader, desc=desc, leave=True)
    
    with torch.no_grad():
        for inputs, labels in dataloader_tqdm:
            inputs, labels = inputs.to(device), labels.to(device)
            outputs = model(inputs)
            loss = criterion(outputs, labels)
            val_loss += loss.item()
            
            _, predicted = torch.max(outputs, 1)
            total += labels.size(0)
            correct_top1 += (predicted == labels).sum().item()

            # For top-5 accuracy
            _, top5_pred = outputs.topk(5, 1, True, True)
            correct_top5 += top5_pred.eq(labels.view(-1, 1).expand_as(top5_pred)).sum().item()
            
            dataloader_tqdm.set_postfix(classes=correct_top1 / total, top1_acc=100 * correct_top1 / total, top5_acc=100 * correct_top5 / total)
    
    print(f'Validation Loss: {val_loss/len(dataloader):.4f}, Top-1 Accuracy: {100 * correct_top1 / total:.2f}%, Top-5 Accuracy: {100 * correct_top5 / total:.2f}%')

In [10]:
use_amp = True # Flag to use automatic mixed precision for training
scaler = torch.amp.GradScaler(device,enabled=use_amp) # GradScaler for mixed precision training


# train function without augmentations
def train_model_no_aug(model, num_epochs, optimizer):
    for epoch in range(num_epochs):
        model.train() # Set the model to training mode
        running_loss = 0.0
        correct_top1 = 0 # Correct predictions for top-1 accuracy
        total = 0
        # Use tqdm for displaying progress during training
        train_loader_tqdm = tqdm(train_dataloader, desc=f'Epoch [{epoch+1}/{num_epochs}]', leave=True)
        
        for inputs, labels in train_loader_tqdm:
            inputs, labels = inputs.to(device), labels.to(device)

            optimizer.zero_grad(set_to_none=True)
            with torch.cuda.amp.autocast(enabled=use_amp):
                outputs = model(inputs)
                loss = criterion(outputs, labels)
                
            scaler.scale(loss).backward()
            scaler.step(optimizer)
            scaler.update()

            running_loss += loss.item()
            gpu_mem = torch.cuda.memory_reserved() / 1e9  # Get GPU memory usage in GB
            _, predicted = torch.max(outputs, 1)
            total += labels.size(0)
            correct_top1 += (predicted == labels).sum().item()
            
            train_loader_tqdm.set_postfix(GPU_mem=f'{gpu_mem:.3f}G', loss=loss.item(), Instances=len(inputs), Size=inputs.shape[-1], top1_acc=100 * correct_top1 / total)

        print(f'Epoch [{epoch+1}/{num_epochs}], Loss: {running_loss/len(train_dataloader):.4f}')
        
    return model


## Set of models with the following parameters:
1. Automatic mixed precision
2. Batch size of 128
4. No augmentations
5. 3 epochs

In [16]:
# Training with Adam optimizer and no augmentations
mymodel_Adam_noAug = copy.deepcopy(model).to(device)
num_epochs = 3  # Set the number of epochs
lr = 5e-4
optimizer_Adam = torch.optim.Adam(mymodel_Adam_noAug.linear_head.parameters(), lr=lr)
mymodel_Adam_noAug = train_model_no_aug(mymodel_Adam_noAug, num_epochs, optimizer_Adam)
check_accuracy(mymodel_Adam_noAug, val_dataloader)


  with torch.cuda.amp.autocast(enabled=use_amp):
Epoch [1/3]: 100%|█| 662/662 [06:05<00:00,  1.81it/s, GPU_mem=5.524G, Instances=27, Size=224, loss=0.0956, top1_acc=91.


Epoch [1/3], Loss: 0.4092


Epoch [2/3]: 100%|█| 662/662 [06:16<00:00,  1.76it/s, GPU_mem=5.524G, Instances=27, Size=224, loss=0.0456, top1_acc=96.


Epoch [2/3], Loss: 0.1332


Epoch [3/3]: 100%|█| 662/662 [05:54<00:00,  1.87it/s, GPU_mem=5.524G, Instances=27, Size=224, loss=0.209, top1_acc=97.7


Epoch [3/3], Loss: 0.1018


Validation: 100%|█████████████████████████| 21/21 [00:41<00:00,  1.95s/it, classes=0.974, top1_acc=97.4, top5_acc=99.8]

Validation Loss: 0.1358, Top-1 Accuracy: 97.37%, Top-5 Accuracy: 99.81%





In [17]:
# Save the trained model
state = {'net': mymodel_Adam_noAug.state_dict()}
torch.save(state, '../models/DINO/Dino_Adam_noAug_128_lr_5e-4.pth')

In [18]:
del mymodel_Adam_noAug

In [21]:
# Training with SGD optimizer and no augmentations(learning rate = 5e-3)
mymodel_SGD_noAug =  copy.deepcopy(model).to(device)
num_epochs = 3  # Set the number of epochs
lr = 5e-3
optimizer_SGD = torch.optim.SGD(mymodel_SGD_noAug.linear_head.parameters(), lr=lr)
mymodel_Adam_noAug = train_model_no_aug(mymodel_SGD_noAug, num_epochs, optimizer_SGD)
check_accuracy(mymodel_SGD_noAug, val_dataloader)


  with torch.cuda.amp.autocast(enabled=use_amp):
Epoch [1/3]: 100%|█| 662/662 [06:04<00:00,  1.81it/s, GPU_mem=6.835G, Instances=27, Size=224, loss=0.669, top1_acc=74.6


Epoch [1/3], Loss: 1.9486


Epoch [2/3]: 100%|█| 662/662 [06:01<00:00,  1.83it/s, GPU_mem=6.835G, Instances=27, Size=224, loss=0.299, top1_acc=95.7


Epoch [2/3], Loss: 0.3943


Epoch [3/3]: 100%|█| 662/662 [06:00<00:00,  1.83it/s, GPU_mem=6.835G, Instances=27, Size=224, loss=0.118, top1_acc=96.8


Epoch [3/3], Loss: 0.2606


Validation: 100%|█████████████████████████| 21/21 [00:41<00:00,  1.97s/it, classes=0.981, top1_acc=98.1, top5_acc=99.9]

Validation Loss: 0.1807, Top-1 Accuracy: 98.10%, Top-5 Accuracy: 99.89%





In [22]:
# Save the trained model
state = {'net': mymodel_SGD_noAug.state_dict()}
torch.save(state, '../models/DINO/Dino_SGD_noAug_128_lr5e-3.pth')

In [23]:
del mymodel_SGD_noAug

In [24]:
# Training with AdamW optimizer and no augmentations
mymodel_AdamW_noAug =  copy.deepcopy(model).to(device)
num_epochs = 3  # Set the number of epochs
lr = 5e-4
optimizer_AdamW = torch.optim.AdamW(mymodel_AdamW_noAug.linear_head.parameters(), lr=lr)
mymodel_AdamW_noAug = train_model_no_aug(mymodel_AdamW_noAug, num_epochs, optimizer_AdamW)
check_accuracy(mymodel_AdamW_noAug, val_dataloader)


  with torch.cuda.amp.autocast(enabled=use_amp):
Epoch [1/3]: 100%|█| 662/662 [06:01<00:00,  1.83it/s, GPU_mem=8.317G, Instances=27, Size=224, loss=0.236, top1_acc=91.8


Epoch [1/3], Loss: 0.4057


Epoch [2/3]: 100%|█| 662/662 [06:01<00:00,  1.83it/s, GPU_mem=8.317G, Instances=27, Size=224, loss=0.873, top1_acc=96.9


Epoch [2/3], Loss: 0.1361


Epoch [3/3]: 100%|█| 662/662 [06:01<00:00,  1.83it/s, GPU_mem=8.317G, Instances=27, Size=224, loss=0.00565, top1_acc=97


Epoch [3/3], Loss: 0.1017


Validation: 100%|█████████████████████████| 21/21 [00:41<00:00,  1.97s/it, classes=0.974, top1_acc=97.4, top5_acc=99.7]

Validation Loss: 0.1532, Top-1 Accuracy: 97.37%, Top-5 Accuracy: 99.73%





In [25]:
# Save the trained model
state = {'net': mymodel_AdamW_noAug.state_dict()}
torch.save(state, '../models/DINO/Dino_AdamW_noAug_128_lr_5e-4.pth')

In [26]:
del mymodel_AdamW_noAug

In [28]:
# Training with RMSProp optimizer and no augmentations
mymodel_RMSProp_noAug =  copy.deepcopy(model).to(device)
num_epochs = 3  # Set the number of epochs
lr = 5e-4
optimizer_RMSProp = torch.optim.RMSprop(mymodel_RMSProp_noAug.linear_head.parameters(), lr=lr)
mymodel_RMSProp_noAug = train_model_no_aug(mymodel_RMSProp_noAug, num_epochs, optimizer_RMSProp)
check_accuracy(mymodel_RMSProp_noAug, val_dataloader)


  with torch.cuda.amp.autocast(enabled=use_amp):
Epoch [1/3]: 100%|█| 662/662 [05:53<00:00,  1.87it/s, GPU_mem=8.133G, Instances=27, Size=224, loss=0.0235, top1_acc=91.


Epoch [1/3], Loss: 0.5999


Epoch [2/3]: 100%|█| 662/662 [05:53<00:00,  1.87it/s, GPU_mem=8.133G, Instances=27, Size=224, loss=0.00112, top1_acc=96


Epoch [2/3], Loss: 0.1621


Epoch [3/3]: 100%|█| 662/662 [05:53<00:00,  1.87it/s, GPU_mem=8.133G, Instances=27, Size=224, loss=0.0254, top1_acc=97.


Epoch [3/3], Loss: 0.1118


Validation: 100%|█████████████████████████| 21/21 [00:40<00:00,  1.95s/it, classes=0.979, top1_acc=97.9, top5_acc=99.8]

Validation Loss: 0.1332, Top-1 Accuracy: 97.90%, Top-5 Accuracy: 99.81%





In [29]:
# Save the trained model
state = {'net': mymodel_RMSProp_noAug.state_dict()}
torch.save(state, '../models/DINO/Dino_RMSProp_noAug_128_lr_5e-4.pth')

In [30]:
del mymodel_RMSProp_noAug

In [32]:
# Training with NAdam optimizer and no augmentations
mymodel_NAdam_noAug =  copy.deepcopy(model).to(device)
num_epochs = 3  # Set the number of epochs
lr = 5e-4
optimizer_NAdam = torch.optim.NAdam(mymodel_NAdam_noAug.linear_head.parameters(), lr=lr)
mymodel_Adam_noAug = train_model_no_aug(mymodel_NAdam_noAug, num_epochs, optimizer_NAdam)
check_accuracy(mymodel_NAdam_noAug, val_dataloader)


  with torch.cuda.amp.autocast(enabled=use_amp):
Epoch [1/3]: 100%|█| 662/662 [05:59<00:00,  1.84it/s, GPU_mem=6.847G, Instances=27, Size=224, loss=0.0391, top1_acc=92.


Epoch [1/3], Loss: 0.3720


Epoch [2/3]: 100%|█| 662/662 [06:16<00:00,  1.76it/s, GPU_mem=6.847G, Instances=27, Size=224, loss=0.0101, top1_acc=97.


Epoch [2/3], Loss: 0.1140


Epoch [3/3]: 100%|█| 662/662 [06:09<00:00,  1.79it/s, GPU_mem=6.847G, Instances=27, Size=224, loss=0.0026, top1_acc=98.


Epoch [3/3], Loss: 0.0799


Validation: 100%|█████████████████████████| 21/21 [00:41<00:00,  1.96s/it, classes=0.968, top1_acc=96.8, top5_acc=99.8]

Validation Loss: 0.1647, Top-1 Accuracy: 96.76%, Top-5 Accuracy: 99.77%





In [33]:
# Save the trained model
state = {'net': mymodel_NAdam_noAug.state_dict()}
torch.save(state, '../models/DINO/Dino_NAdam_noAug_128_lr_5e-4.pth')

In [34]:
del mymodel_NAdam_noAug

In [35]:
# Training with RAdam optimizer and no augmentations
mymodel_RAdam_noAug = copy.deepcopy(model).to(device)
num_epochs = 3  # Set the number of epochs
lr = 5e-4
optimizer_RAdam = torch.optim.Adam(mymodel_RAdam_noAug.linear_head.parameters(), lr=lr)
mymodel_Adam_noAug = train_model_no_aug(mymodel_RAdam_noAug, num_epochs, optimizer_RAdam)
check_accuracy(mymodel_RAdam_noAug, val_dataloader)


  with torch.cuda.amp.autocast(enabled=use_amp):
Epoch [1/3]: 100%|█| 662/662 [06:01<00:00,  1.83it/s, GPU_mem=8.330G, Instances=27, Size=224, loss=0.243, top1_acc=91.7


Epoch [1/3], Loss: 0.4042


Epoch [2/3]: 100%|█| 662/662 [06:04<00:00,  1.82it/s, GPU_mem=8.330G, Instances=27, Size=224, loss=0.0178, top1_acc=96.


Epoch [2/3], Loss: 0.1357


Epoch [3/3]: 100%|█| 662/662 [06:02<00:00,  1.83it/s, GPU_mem=8.330G, Instances=27, Size=224, loss=0.0983, top1_acc=97.


Epoch [3/3], Loss: 0.0972


Validation: 100%|█████████████████████████| 21/21 [00:41<00:00,  1.98s/it, classes=0.975, top1_acc=97.5, top5_acc=99.8]

Validation Loss: 0.1348, Top-1 Accuracy: 97.52%, Top-5 Accuracy: 99.85%





In [36]:
# Save the trained model
state = {'net': mymodel_RAdam_noAug.state_dict()}
torch.save(state, '../models/DINO/Dino_RAdam_noAug_128_lr_5e-4.pth')

In [37]:
del mymodel_RAdam_noAug

In [38]:
# Training with AdamWScheduleFree optimizer and no augmentations
mymodel_AdamWScheduleFree_noAug = copy.deepcopy(model).to(device)
num_epochs = 3  # Set the number of epochs
lr = 5e-4
optimizer_AdamWScheduleFree = schedulefree.AdamWScheduleFree(mymodel_AdamWScheduleFree_noAug.linear_head.parameters(), lr=lr)
optimizer_AdamWScheduleFree.train()
mymodel_AdamWScheduleFree_noAug = train_model_no_aug(mymodel_AdamWScheduleFree_noAug, num_epochs, optimizer_AdamWScheduleFree)
optimizer_AdamWScheduleFree.eval()
check_accuracy(mymodel_AdamWScheduleFree_noAug, val_dataloader)

  with torch.cuda.amp.autocast(enabled=use_amp):
Epoch [1/3]: 100%|█| 662/662 [06:02<00:00,  1.83it/s, GPU_mem=8.330G, Instances=27, Size=224, loss=0.0325, top1_acc=93.


Epoch [1/3], Loss: 0.3405


Epoch [2/3]: 100%|█| 662/662 [06:01<00:00,  1.83it/s, GPU_mem=8.330G, Instances=27, Size=224, loss=0.0851, top1_acc=98.


Epoch [2/3], Loss: 0.0471


Epoch [3/3]: 100%|█| 662/662 [06:01<00:00,  1.83it/s, GPU_mem=8.330G, Instances=27, Size=224, loss=0.00942, top1_acc=99


Epoch [3/3], Loss: 0.0201


Validation: 100%|██████████████████████████| 21/21 [00:41<00:00,  1.97s/it, classes=0.988, top1_acc=98.8, top5_acc=100]

Validation Loss: 0.0440, Top-1 Accuracy: 98.78%, Top-5 Accuracy: 99.96%





In [40]:
# Save the trained model
state = {'net': mymodel_AdamWScheduleFree_noAug.state_dict()}
torch.save(state, '../models/DINO/Dino_AdamWScheduleFree_noAug_128_lr_5e-4.pth')

In [41]:
del mymodel_AdamWScheduleFree_noAug

In [43]:
# Training with SGDScheduleFree optimizer and no augmentations(learning rate = 5e-3)
mymodel_SGDScheduleFree_noAug = copy.deepcopy(model).to(device)
num_epochs = 3  # Set the number of epochs
lr = 5e-3
optimizer_SGDScheduleFree = schedulefree.SGDScheduleFree(mymodel_SGDScheduleFree_noAug.linear_head.parameters(), lr=lr)
optimizer_SGDScheduleFree.train()
mymodel_SGDScheduleFree_noAug = train_model_no_aug(mymodel_SGDScheduleFree_noAug, num_epochs, optimizer_SGDScheduleFree)
optimizer_SGDScheduleFree.eval()
check_accuracy(mymodel_SGDScheduleFree_noAug, val_dataloader)

  with torch.cuda.amp.autocast(enabled=use_amp):
Epoch [1/3]: 100%|█| 662/662 [06:04<00:00,  1.81it/s, GPU_mem=7.002G, Instances=27, Size=224, loss=0.933, top1_acc=62.5


Epoch [1/3], Loss: 2.7848


Epoch [2/3]: 100%|█| 662/662 [06:01<00:00,  1.83it/s, GPU_mem=7.002G, Instances=27, Size=224, loss=0.445, top1_acc=94.5


Epoch [2/3], Loss: 0.5344


Epoch [3/3]: 100%|█| 662/662 [05:54<00:00,  1.87it/s, GPU_mem=7.002G, Instances=27, Size=224, loss=0.215, top1_acc=96.1


Epoch [3/3], Loss: 0.3002


Validation: 100%|█████████████████████████| 21/21 [00:41<00:00,  1.96s/it, classes=0.976, top1_acc=97.6, top5_acc=99.8]

Validation Loss: 0.2232, Top-1 Accuracy: 97.64%, Top-5 Accuracy: 99.77%





In [44]:
# Save the trained model
state = {'net': mymodel_SGDScheduleFree_noAug.state_dict()}
torch.save(state, '../models/DINO/Dino_SGDScheduleFree_noAug_128_lr_5e-4.pth')

In [45]:
del mymodel_SGDScheduleFree_noAug

In [11]:
# Training with SGDScheduleFree optimizer and no augmentations(learning rate = 1e-3)
mymodel_SGDScheduleFree_noAug = copy.deepcopy(model).to(device)
num_epochs = 3  # Set the number of epochs
lr = 1e-3
optimizer_SGDScheduleFree = schedulefree.SGDScheduleFree(mymodel_SGDScheduleFree_noAug.linear_head.parameters(), lr=lr)
optimizer_SGDScheduleFree.train()
mymodel_SGDScheduleFree_noAug = train_model_no_aug(mymodel_SGDScheduleFree_noAug, num_epochs, optimizer_SGDScheduleFree)
optimizer_SGDScheduleFree.eval()
check_accuracy(mymodel_SGDScheduleFree_noAug, val_dataloader)

Epoch [1/3]: 100%|███████████████| 331/331 [17:13<00:00,  3.12s/it, GPU_mem=5.918G, Instances=155, Size=224, loss=5.37]


Epoch [1/3], Loss: 5.9340


Epoch [2/3]: 100%|███████████████| 331/331 [17:09<00:00,  3.11s/it, GPU_mem=5.918G, Instances=155, Size=224, loss=4.51]


Epoch [2/3], Loss: 4.9130


Epoch [3/3]: 100%|███████████████| 331/331 [17:13<00:00,  3.12s/it, GPU_mem=5.918G, Instances=155, Size=224, loss=3.54]


Epoch [3/3], Loss: 3.9930


Validation: 100%|█████████████████████████| 11/11 [00:39<00:00,  3.57s/it, classes=0.528, top1_acc=52.8, top5_acc=88.9]

Validation Loss: 3.7607, Top-1 Accuracy: 52.76%, Top-5 Accuracy: 88.91%





In [12]:
# Save the trained model
state = {'net': mymodel_SGDScheduleFree_noAug.state_dict()}
torch.save(state, '../models/DINO/Dino_SGDScheduleFree_noAug_128_v2.pth')

In [13]:
del mymodel_SGDScheduleFree_noAug

## Set of models with the following parameters:
1. Automatic mixed precision
2. Batch size of 128
4. Kornia augmentations (see specifics below)
5. 3 epochs

In [10]:
# Create an augmentation pipeline using Kornia's AugmentationSequential
aug = AugmentationSequential(
    # Apply a random rotation up to 45 degrees with a 30% probability
    K.RandomRotation(degrees=45.0, p=0.3),
    # Apply a random horizontal flip with a 30% probability
    K.RandomHorizontalFlip(p=0.3),
    # Apply a random vertical flip with a 30% probability
    K.RandomVerticalFlip(p=0.3),
    # Apply a random affine transformation with rotation up to 30 degrees with a 30% probability
    K.RandomAffine(degrees=30.0, p=0.3)
).to(device)

In [11]:
use_amp = True # Flag to use automatic mixed precision for training
scaler = torch.amp.GradScaler(device,enabled=use_amp) # GradScaler for mixed precision training

# train function with augmentations
def train_model_aug(model, num_epochs, optimizer):
    for epoch in range(num_epochs):
        model.train() # Set the model to training mode
        running_loss = 0.0
        correct_top1 = 0 # Correct predictions for top-1 accuracy
        total = 0
        # Use tqdm for displaying progress during training
        train_loader_tqdm = tqdm(train_dataloader, desc=f'Epoch [{epoch+1}/{num_epochs}]', leave=True)
        
        for inputs, labels in train_loader_tqdm:
            inputs, labels = aug(inputs.to(device)), labels.to(device)

            optimizer.zero_grad(set_to_none=True)
            with torch.cuda.amp.autocast(enabled=use_amp):
                outputs = model(inputs)
                loss = criterion(outputs, labels)
                
            scaler.scale(loss).backward()
            scaler.step(optimizer)
            scaler.update()

            running_loss += loss.item()
            gpu_mem = torch.cuda.memory_reserved() / 1e9  # Get GPU memory usage in GB
            _, predicted = torch.max(outputs, 1)
            total += labels.size(0)
            correct_top1 += (predicted == labels).sum().item()
            
            train_loader_tqdm.set_postfix(GPU_mem=f'{gpu_mem:.3f}G', loss=loss.item(), Instances=len(inputs), Size=inputs.shape[-1], top1_acc=100 * correct_top1 / total)

        print(f'Epoch [{epoch+1}/{num_epochs}], Loss: {running_loss/len(train_dataloader):.4f}')
        
    return model


In [12]:
# Training with Adam optimizer and augmentations
mymodel_Adam_Aug = copy.deepcopy(model).to(device)
num_epochs = 3  # Set the number of epochs
lr = 5e-4
optimizer_Adam = torch.optim.Adam(mymodel_Adam_Aug.linear_head.parameters(), lr=lr)
mymodel_Adam_Aug = train_model_aug(mymodel_Adam_Aug, num_epochs, optimizer_Adam)
check_accuracy(mymodel_Adam_Aug, val_dataloader)


  with torch.cuda.amp.autocast(enabled=use_amp):
Epoch [1/3]: 100%|█| 662/662 [06:15<00:00,  1.76it/s, GPU_mem=2.951G, Instances=27, Size=224, loss=0.425, top1_acc=90.4


Epoch [1/3], Loss: 0.4582


Epoch [2/3]: 100%|█| 662/662 [06:11<00:00,  1.78it/s, GPU_mem=2.951G, Instances=27, Size=224, loss=0.00232, top1_acc=96


Epoch [2/3], Loss: 0.1710


Epoch [3/3]: 100%|██| 662/662 [06:07<00:00,  1.80it/s, GPU_mem=2.951G, Instances=27, Size=224, loss=0.5, top1_acc=96.7]


Epoch [3/3], Loss: 0.1437


Validation: 100%|████████████████████████████| 21/21 [00:41<00:00,  2.00s/it, classes=0.97, top1_acc=97, top5_acc=99.9]

Validation Loss: 0.1545, Top-1 Accuracy: 97.03%, Top-5 Accuracy: 99.92%





In [14]:
# Save the trained model
state = {'net': mymodel_Adam_Aug.state_dict()}
torch.save(state, '../models/DINO/Dino_Adam_Aug_128_lr_5e-4.pth')

In [15]:
del mymodel_Adam_Aug

In [16]:
# Training with SGD optimizer and augmentations(learning rate = 5e-4)
mymodel_SGD_Aug =  copy.deepcopy(model).to(device)
num_epochs = 3  # Set the number of epochs
lr = 5e-4
optimizer_SGD = torch.optim.SGD(mymodel_SGD_Aug.linear_head.parameters(), lr=lr)
mymodel_Adam_Aug = train_model_aug(mymodel_SGD_Aug, num_epochs, optimizer_SGD)
check_accuracy(mymodel_SGD_Aug, val_dataloader)


  with torch.cuda.amp.autocast(enabled=use_amp):
Epoch [1/3]: 100%|██| 662/662 [06:15<00:00,  1.76it/s, GPU_mem=4.228G, Instances=27, Size=224, loss=4.9, top1_acc=11.6]


Epoch [1/3], Loss: 5.6102


Epoch [2/3]: 100%|█| 662/662 [06:17<00:00,  1.75it/s, GPU_mem=4.228G, Instances=27, Size=224, loss=3.44, top1_acc=46.8]


Epoch [2/3], Loss: 4.1266


Epoch [3/3]: 100%|█| 662/662 [06:18<00:00,  1.75it/s, GPU_mem=4.228G, Instances=27, Size=224, loss=2.48, top1_acc=66.7]


Epoch [3/3], Loss: 2.9670


Validation: 100%|████████████████████████████| 21/21 [00:43<00:00,  2.05s/it, classes=0.75, top1_acc=75, top5_acc=97.1]

Validation Loss: 2.3063, Top-1 Accuracy: 75.01%, Top-5 Accuracy: 97.14%





In [17]:
# Save the trained model
state = {'net': mymodel_SGD_Aug.state_dict()}
torch.save(state, '../models/DINO/Dino_SGD_Aug_128_lr_5e-4.pth')

In [18]:
del mymodel_SGD_Aug

In [15]:
# Training with SGD optimizer and augmentations(learning rate = 5e-3)
mymodel_SGD_Aug =  copy.deepcopy(model).to(device)
num_epochs = 3  # Set the number of epochs
lr = 5e-3
optimizer_SGD = torch.optim.SGD(mymodel_SGD_Aug.linear_head.parameters(), lr=lr)
mymodel_Adam_Aug = train_model_aug(mymodel_SGD_Aug, num_epochs, optimizer_SGD)
check_accuracy(mymodel_SGD_Aug, val_dataloader)


  with torch.cuda.amp.autocast(enabled=use_amp):
Epoch [1/3]: 100%|███████████████| 662/662 [06:04<00:00,  1.81it/s, GPU_mem=2.957G, Instances=27, Size=224, loss=0.619]


Epoch [1/3], Loss: 2.1752


Epoch [2/3]: 100%|███████████████| 662/662 [06:03<00:00,  1.82it/s, GPU_mem=2.957G, Instances=27, Size=224, loss=0.448]


Epoch [2/3], Loss: 0.4919


Epoch [3/3]: 100%|███████████████| 662/662 [06:04<00:00,  1.82it/s, GPU_mem=2.957G, Instances=27, Size=224, loss=0.221]


Epoch [3/3], Loss: 0.3225


Validation: 100%|█████████████████████████| 21/21 [00:41<00:00,  1.99s/it, classes=0.979, top1_acc=97.9, top5_acc=99.8]

Validation Loss: 0.1815, Top-1 Accuracy: 97.87%, Top-5 Accuracy: 99.85%





In [16]:
# Save the trained model
state = {'net': mymodel_SGD_Aug.state_dict()}
torch.save(state, '../models/DINO/Dino_SGD_Aug_128_lr_5e-3.pth')

In [17]:
del mymodel_SGD_Aug

In [19]:
# Training with AdamW optimizer and augmentations
mymodel_AdamW_Aug =  copy.deepcopy(model).to(device)
num_epochs = 3  # Set the number of epochs
lr = 5e-4
optimizer_AdamW = torch.optim.AdamW(mymodel_AdamW_Aug.linear_head.parameters(), lr=lr)
mymodel_AdamW_Aug = train_model_aug(mymodel_AdamW_Aug, num_epochs, optimizer_AdamW)
check_accuracy(mymodel_AdamW_Aug, val_dataloader)


  with torch.cuda.amp.autocast(enabled=use_amp):
Epoch [1/3]: 100%|█| 662/662 [06:17<00:00,  1.75it/s, GPU_mem=5.711G, Instances=27, Size=224, loss=0.0421, top1_acc=90.


Epoch [1/3], Loss: 0.4564


Epoch [2/3]: 100%|█| 662/662 [06:14<00:00,  1.77it/s, GPU_mem=5.711G, Instances=27, Size=224, loss=0.217, top1_acc=95.9


Epoch [2/3], Loss: 0.1767


Epoch [3/3]: 100%|█| 662/662 [06:20<00:00,  1.74it/s, GPU_mem=5.711G, Instances=27, Size=224, loss=0.0163, top1_acc=96.


Epoch [3/3], Loss: 0.1455


Validation: 100%|█████████████████████████| 21/21 [00:41<00:00,  1.99s/it, classes=0.971, top1_acc=97.1, top5_acc=99.9]

Validation Loss: 0.1144, Top-1 Accuracy: 97.10%, Top-5 Accuracy: 99.92%





In [20]:
# Save the trained model
state = {'net': mymodel_AdamW_Aug.state_dict()}
torch.save(state, '../models/DINO/Dino_AdamW_Aug_128_lr_5e-4.pth')

In [21]:
del mymodel_AdamW_Aug

In [22]:
# Training with RMSProp optimizer and augmentations
mymodel_RMSProp_Aug =  copy.deepcopy(model).to(device)
num_epochs = 3  # Set the number of epochs
lr = 5e-4
optimizer_RMSProp = torch.optim.RMSprop(mymodel_RMSProp_Aug.linear_head.parameters(), lr=lr)
mymodel_RMSProp_Aug = train_model_aug(mymodel_RMSProp_Aug, num_epochs, optimizer_RMSProp)
check_accuracy(mymodel_RMSProp_Aug, val_dataloader)


  with torch.cuda.amp.autocast(enabled=use_amp):
Epoch [1/3]: 100%|█| 662/662 [06:10<00:00,  1.79it/s, GPU_mem=5.713G, Instances=27, Size=224, loss=0.0817, top1_acc=89.


Epoch [1/3], Loss: 0.6279


Epoch [2/3]: 100%|█| 662/662 [06:02<00:00,  1.83it/s, GPU_mem=5.713G, Instances=27, Size=224, loss=0.185, top1_acc=95.9


Epoch [2/3], Loss: 0.1963


Epoch [3/3]: 100%|█| 662/662 [06:03<00:00,  1.82it/s, GPU_mem=5.713G, Instances=27, Size=224, loss=0.0905, top1_acc=96.


Epoch [3/3], Loss: 0.1561


Validation: 100%|█████████████████████████| 21/21 [00:41<00:00,  1.96s/it, classes=0.974, top1_acc=97.4, top5_acc=99.9]

Validation Loss: 0.1551, Top-1 Accuracy: 97.37%, Top-5 Accuracy: 99.89%





In [23]:
# Save the trained model
state = {'net': mymodel_RMSProp_Aug.state_dict()}
torch.save(state, '../models/DINO/Dino_RMSProp_Aug_128_lr_5e-4.pth')

In [24]:
del mymodel_RMSProp_Aug

In [37]:
# Training with NAdam optimizer and augmentations
mymodel_NAdam_Aug =  copy.deepcopy(model).to(device)
num_epochs = 3  # Set the number of epochs
lr = 5e-4
optimizer_NAdam = torch.optim.NAdam(mymodel_NAdam_Aug.linear_head.parameters(), lr=lr)
mymodel_Adam_Aug = train_model_aug(mymodel_NAdam_Aug, num_epochs, optimizer_NAdam)
check_accuracy(mymodel_NAdam_Aug, val_dataloader)


  with torch.cuda.amp.autocast(enabled=use_amp):
Epoch [1/3]: 100%|██████████████| 662/662 [06:13<00:00,  1.77it/s, GPU_mem=5.929G, Instances=27, Size=224, loss=0.0525]


Epoch [1/3], Loss: 0.5091


Epoch [2/3]: 100%|███████████████| 662/662 [06:06<00:00,  1.80it/s, GPU_mem=5.929G, Instances=27, Size=224, loss=0.238]


Epoch [2/3], Loss: 0.1145


Epoch [3/3]: 100%|██████████████| 662/662 [06:03<00:00,  1.82it/s, GPU_mem=5.929G, Instances=27, Size=224, loss=0.0498]


Epoch [3/3], Loss: 0.0832


Validation: 100%|██████████████████████████| 21/21 [00:41<00:00,  1.96s/it, classes=0.983, top1_acc=98.3, top5_acc=100]

Validation Loss: 0.0685, Top-1 Accuracy: 98.29%, Top-5 Accuracy: 99.96%





In [39]:
# Save the trained model
state = {'net': mymodel_NAdam_Aug.state_dict()}
torch.save(state, '../models/DINO/Dino_NAdam_Aug_128_lr_5e-4.pth')

In [40]:
del mymodel_NAdam_Aug

In [25]:
# Training with RAdam optimizer and augmentations
mymodel_RAdam_Aug = copy.deepcopy(model).to(device)
num_epochs = 3  # Set the number of epochs
lr = 5e-4
optimizer_RAdam = torch.optim.Adam(mymodel_RAdam_Aug.linear_head.parameters(), lr=lr)
mymodel_Adam_Aug = train_model_aug(mymodel_RAdam_Aug, num_epochs, optimizer_RAdam)
check_accuracy(mymodel_RAdam_Aug, val_dataloader)


  with torch.cuda.amp.autocast(enabled=use_amp):
Epoch [1/3]: 100%|█| 662/662 [06:03<00:00,  1.82it/s, GPU_mem=6.252G, Instances=27, Size=224, loss=0.0253, top1_acc=90.


Epoch [1/3], Loss: 0.4553


Epoch [2/3]: 100%|██| 662/662 [06:05<00:00,  1.81it/s, GPU_mem=6.252G, Instances=27, Size=224, loss=0.538, top1_acc=96]


Epoch [2/3], Loss: 0.1732


Epoch [3/3]: 100%|█| 662/662 [06:09<00:00,  1.79it/s, GPU_mem=6.252G, Instances=27, Size=224, loss=0.00202, top1_acc=96


Epoch [3/3], Loss: 0.1450


Validation: 100%|█████████████████████████| 21/21 [00:41<00:00,  1.99s/it, classes=0.971, top1_acc=97.1, top5_acc=99.9]

Validation Loss: 0.1453, Top-1 Accuracy: 97.07%, Top-5 Accuracy: 99.89%





In [26]:
# Save the trained model
state = {'net': mymodel_RAdam_Aug.state_dict()}
torch.save(state, '../models/DINO/Dino_RAdam_Aug_128_lr_5e-4.pth')

In [27]:
del mymodel_RAdam_Aug

In [28]:
# Training with AdamWScheduleFree optimizer and augmentations
mymodel_AdamWScheduleFree_Aug = copy.deepcopy(model).to(device)
num_epochs = 3  # Set the number of epochs
lr = 5e-4
optimizer_AdamWScheduleFree = schedulefree.AdamWScheduleFree(mymodel_AdamWScheduleFree_Aug.linear_head.parameters(), lr=lr)
optimizer_AdamWScheduleFree.train()
mymodel_AdamWScheduleFree_Aug = train_model_aug(mymodel_AdamWScheduleFree_Aug, num_epochs, optimizer_AdamWScheduleFree)
optimizer_AdamWScheduleFree.eval()
check_accuracy(mymodel_AdamWScheduleFree_Aug, val_dataloader)

  with torch.cuda.amp.autocast(enabled=use_amp):
Epoch [1/3]: 100%|█| 662/662 [06:06<00:00,  1.80it/s, GPU_mem=6.252G, Instances=27, Size=224, loss=0.0886, top1_acc=92.


Epoch [1/3], Loss: 0.3971


Epoch [2/3]: 100%|█| 662/662 [06:07<00:00,  1.80it/s, GPU_mem=6.252G, Instances=27, Size=224, loss=0.244, top1_acc=98.1


Epoch [2/3], Loss: 0.0750


Epoch [3/3]: 100%|█| 662/662 [06:11<00:00,  1.78it/s, GPU_mem=6.252G, Instances=27, Size=224, loss=0.0966, top1_acc=98.


Epoch [3/3], Loss: 0.0440


Validation: 100%|██████████████████████████| 21/21 [00:41<00:00,  1.97s/it, classes=0.991, top1_acc=99.1, top5_acc=100]

Validation Loss: 0.0423, Top-1 Accuracy: 99.12%, Top-5 Accuracy: 100.00%





In [29]:
# Save the trained model
state = {'net': mymodel_AdamWScheduleFree_Aug.state_dict()}
torch.save(state, '../models/DINO/Dino_AdamWScheduleFree_Aug_128_lr_5e-4.pth')

In [30]:
del mymodel_AdamWScheduleFree_Aug

In [31]:
# Training with SGD optimizer and augmentations(learning rate = 5e-4)
mymodel_SGDScheduleFree_Aug = copy.deepcopy(model).to(device)
num_epochs = 3  # Set the number of epochs
lr = 5e-4
optimizer_SGDScheduleFree = schedulefree.SGDScheduleFree(mymodel_SGDScheduleFree_Aug.linear_head.parameters(), lr=lr)
optimizer_SGDScheduleFree.train()
mymodel_SGDScheduleFree_Aug = train_model_aug(mymodel_SGDScheduleFree_Aug, num_epochs, optimizer_SGDScheduleFree)
optimizer_SGDScheduleFree.eval()
check_accuracy(mymodel_SGDScheduleFree_Aug, val_dataloader)

  with torch.cuda.amp.autocast(enabled=use_amp):
Epoch [1/3]: 100%|█| 662/662 [06:10<00:00,  1.79it/s, GPU_mem=6.252G, Instances=27, Size=224, loss=5.62, top1_acc=3.67]


Epoch [1/3], Loss: 5.9631


Epoch [2/3]: 100%|█| 662/662 [06:05<00:00,  1.81it/s, GPU_mem=6.252G, Instances=27, Size=224, loss=4.54, top1_acc=24.1]


Epoch [2/3], Loss: 5.0704


Epoch [3/3]: 100%|█| 662/662 [06:23<00:00,  1.72it/s, GPU_mem=6.252G, Instances=27, Size=224, loss=3.94, top1_acc=44.1]


Epoch [3/3], Loss: 4.2576


Validation: 100%|█████████████████████████| 21/21 [00:42<00:00,  2.02s/it, classes=0.486, top1_acc=48.6, top5_acc=83.5]

Validation Loss: 3.9643, Top-1 Accuracy: 48.65%, Top-5 Accuracy: 83.50%





In [32]:
# Save the trained model
state = {'net': mymodel_SGDScheduleFree_Aug.state_dict()}
torch.save(state, '../models/DINO/Dino_SGDScheduleFree_Aug_128_lr_5e-4.pth')

In [33]:
del mymodel_SGDScheduleFree_Aug

In [34]:
# Training with SGD optimizer and augmentations(learning rate = 5e-4)
mymodel_SGDScheduleFree_Aug = copy.deepcopy(model).to(device)
num_epochs = 3  # Set the number of epochs
lr = 1e-3
optimizer_SGDScheduleFree = schedulefree.SGDScheduleFree(mymodel_SGDScheduleFree_Aug.linear_head.parameters(), lr=lr)
optimizer_SGDScheduleFree.train()
mymodel_SGDScheduleFree_Aug = train_model_aug(mymodel_SGDScheduleFree_Aug, num_epochs, optimizer_SGDScheduleFree)
optimizer_SGDScheduleFree.eval()
check_accuracy(mymodel_SGDScheduleFree_Aug, val_dataloader)

  with torch.cuda.amp.autocast(enabled=use_amp):
Epoch [1/3]: 100%|█| 662/662 [06:19<00:00,  1.75it/s, GPU_mem=6.252G, Instances=27, Size=224, loss=4.59, top1_acc=13.6]


Epoch [1/3], Loss: 5.5243


Epoch [2/3]: 100%|█| 662/662 [06:13<00:00,  1.77it/s, GPU_mem=6.252G, Instances=27, Size=224, loss=3.09, top1_acc=50.1]


Epoch [2/3], Loss: 3.8937


Epoch [3/3]: 100%|█| 662/662 [06:13<00:00,  1.77it/s, GPU_mem=6.252G, Instances=27, Size=224, loss=2.16, top1_acc=68.6]


Epoch [3/3], Loss: 2.6438


Validation: 100%|█████████████████████████| 21/21 [00:41<00:00,  1.98s/it, classes=0.735, top1_acc=73.5, top5_acc=96.9]

Validation Loss: 2.2124, Top-1 Accuracy: 73.52%, Top-5 Accuracy: 96.88%





In [35]:
# Save the trained model
state = {'net': mymodel_SGDScheduleFree_Aug.state_dict()}
torch.save(state, '../models/DINO/Dino_SGDScheduleFree_Aug_128_lr_1e-3.pth')

In [36]:
del mymodel_SGDScheduleFree_Aug

In [37]:
# Training with SGD optimizer and augmentations(learning rate = 5e-3)
mymodel_SGDScheduleFree_Aug = copy.deepcopy(model).to(device)
num_epochs = 3  # Set the number of epochs
lr = 5e-3
optimizer_SGDScheduleFree = schedulefree.SGDScheduleFree(mymodel_SGDScheduleFree_Aug.linear_head.parameters(), lr=lr)
optimizer_SGDScheduleFree.train()
mymodel_SGDScheduleFree_Aug = train_model_aug(mymodel_SGDScheduleFree_Aug, num_epochs, optimizer_SGDScheduleFree)
optimizer_SGDScheduleFree.eval()
check_accuracy(mymodel_SGDScheduleFree_Aug, val_dataloader)

  with torch.cuda.amp.autocast(enabled=use_amp):
Epoch [1/3]: 100%|███| 662/662 [06:15<00:00,  1.76it/s, GPU_mem=6.252G, Instances=27, Size=224, loss=1.14, top1_acc=58]


Epoch [1/3], Loss: 3.0721


Epoch [2/3]: 100%|█| 662/662 [06:15<00:00,  1.76it/s, GPU_mem=6.252G, Instances=27, Size=224, loss=0.263, top1_acc=92.8


Epoch [2/3], Loss: 0.6978


Epoch [3/3]: 100%|██| 662/662 [06:15<00:00,  1.76it/s, GPU_mem=6.252G, Instances=27, Size=224, loss=0.234, top1_acc=95]


Epoch [3/3], Loss: 0.3842


Validation: 100%|████████████████████████████| 21/21 [00:41<00:00,  1.99s/it, classes=0.97, top1_acc=97, top5_acc=99.8]

Validation Loss: 0.2355, Top-1 Accuracy: 96.99%, Top-5 Accuracy: 99.81%





In [38]:
# Save the trained model
state = {'net': mymodel_SGDScheduleFree_Aug.state_dict()}
torch.save(state, '../models/DINO/Dino_SGDScheduleFree_Aug_128_lr_5e-3.pth')

In [39]:
del mymodel_SGDScheduleFree_Aug