# Notebook for DINOv2 models with dropout

In [6]:
import copy
import os

import torch
import torchvision
from torch import nn
import torch
import torch.nn as nn
import torchvision.datasets as datasets
import torchvision.transforms as transforms
from torch.utils.data import DataLoader


from tqdm import tqdm

import schedulefree

from kornia import augmentation as K
from kornia.augmentation import AugmentationSequential

  @torch.cuda.amp.custom_fwd(cast_inputs=torch.float32)


In [7]:
# Load a pre-trained DINOv2 model from Facebook Research using torch hub
model = torch.hub.load('facebookresearch/dinov2', 'dinov2_vitl14_lc')

Using cache found in C:\Users\User/.cache\torch\hub\facebookresearch_dinov2_main


In [8]:
# Freeze all model parameters to avoid updating them during training
for param in model.parameters():
    param.requires_grad = False

In [9]:
# Add a linear head to the model with a dropout layer for regularization
dropout_rate = 0.3
num_classes = 525
model.linear_head = nn.Sequential(
    nn.Dropout(dropout_rate),
    nn.Linear(model.linear_head.in_features, num_classes)
)
# Enable training only for the linear head (parameters of the added layers)
for param in model.linear_head.parameters():
    param.requires_grad = True

In [10]:
# Define a set of transformations to resize and convert images to tensors
transform = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.ToTensor(),
])

In [13]:
# Load training and validation datasets from the specified directories
train_dataset = datasets.ImageFolder('../dataset/train', transform=transform)
val_dataset = datasets.ImageFolder('../dataset/val', transform=transform)

# Create data loaders for training and validation datasets(with batch size = 256)
train_dataloader = torch.utils.data.DataLoader(
    train_dataset,
    batch_size=256,
    shuffle=True,
    num_workers=4,
    pin_memory=True
)

val_dataloader = torch.utils.data.DataLoader(
    val_dataset,
    batch_size=256,
    shuffle=False,
    num_workers=4,
    pin_memory=True
)


In [14]:
# Set the device to GPU if available, otherwise use CPU
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

# Define the loss function (cross-entropy) for classification tasks
criterion = nn.CrossEntropyLoss().to(device)

In [15]:
# function for calculating and displaying model accuracy
def check_accuracy(model, dataloader=val_dataloader, device=device, desc='Validation'):
    model.eval() # Set the model to evaluation mode
    val_loss = 0.0
    correct_top1 = 0 # Correct predictions for top-1 accuracy
    correct_top5 = 0 # Correct predictions for top-5 accuracy
    total = 0
    
    # Use tqdm for displaying progress during validation
    dataloader_tqdm = tqdm(dataloader, desc=desc, leave=True)
    
    with torch.no_grad():
        for inputs, labels in dataloader_tqdm:
            inputs, labels = inputs.to(device), labels.to(device)
            outputs = model(inputs)
            loss = criterion(outputs, labels)
            val_loss += loss.item()
            
            _, predicted = torch.max(outputs, 1)
            total += labels.size(0)
            correct_top1 += (predicted == labels).sum().item()

            # For top-5 accuracy
            _, top5_pred = outputs.topk(5, 1, True, True)
            correct_top5 += top5_pred.eq(labels.view(-1, 1).expand_as(top5_pred)).sum().item()
            
            dataloader_tqdm.set_postfix(classes=correct_top1 / total, top1_acc=100 * correct_top1 / total, top5_acc=100 * correct_top5 / total)
    
    print(f'Validation Loss: {val_loss/len(dataloader):.4f}, Top-1 Accuracy: {100 * correct_top1 / total:.2f}%, Top-5 Accuracy: {100 * correct_top5 / total:.2f}%')

In [18]:
use_amp = True # Flag to use automatic mixed precision for training
scaler = torch.amp.GradScaler(device,enabled=use_amp) # GradScaler for mixed precision training


# train function without augmentations
def train_model_no_aug(model, num_epochs, optimizer):
    for epoch in range(num_epochs):
        model.train() # Set the model to training mode
        running_loss = 0.0
        correct_top1 = 0 # Correct predictions for top-1 accuracy
        total = 0
        # Use tqdm for displaying progress during training
        train_loader_tqdm = tqdm(train_dataloader, desc=f'Epoch [{epoch+1}/{num_epochs}]', leave=True)
        
        for inputs, labels in train_loader_tqdm:
            inputs, labels = inputs.to(device), labels.to(device)

            optimizer.zero_grad(set_to_none=True)
            with torch.cuda.amp.autocast(enabled=use_amp):
                outputs = model(inputs)
                loss = criterion(outputs, labels)
                
            scaler.scale(loss).backward()
            scaler.step(optimizer)
            scaler.update()

            running_loss += loss.item()
            gpu_mem = torch.cuda.memory_reserved() / 1e9  # Get GPU memory usage in GB
            _, predicted = torch.max(outputs, 1)
            total += labels.size(0)
            correct_top1 += (predicted == labels).sum().item()
            
            train_loader_tqdm.set_postfix(GPU_mem=f'{gpu_mem:.3f}G', loss=loss.item(), Instances=len(inputs), Size=inputs.shape[-1], top1_acc=100 * correct_top1 / total)

        print(f'Epoch [{epoch+1}/{num_epochs}], Loss: {running_loss/len(train_dataloader):.4f}')
        
    return model


## Set of models with the following parameters:
1. Automatic mixed precision
2. Batch size of 256
3. Dropout rate of 0.3
4. No augmentations
5. 3 epochs

In [19]:
# Training with Adam optimizer and no augmentations
mymodel_Adam_noAug = copy.deepcopy(model).to(device)
num_epochs = 3  # Set the number of epochs
lr = 2.5e-4
optimizer_Adam = torch.optim.Adam(mymodel_Adam_noAug.linear_head.parameters(), lr=lr)
mymodel_Adam_noAug = train_model_no_aug(mymodel_Adam_noAug, num_epochs, optimizer_Adam)
check_accuracy(mymodel_Adam_noAug, val_dataloader)


  with torch.cuda.amp.autocast(enabled=use_amp):
Epoch [1/3]: 100%|█| 331/331 [06:00<00:00,  1.09s/it, GPU_mem=4.513G, Instances=155, Size=224, loss=0.246, top1_acc=89.


Epoch [1/3], Loss: 0.5942


Epoch [2/3]: 100%|█| 331/331 [05:56<00:00,  1.08s/it, GPU_mem=4.513G, Instances=155, Size=224, loss=0.0489, top1_acc=97


Epoch [2/3], Loss: 0.1017


Epoch [3/3]: 100%|█| 331/331 [05:54<00:00,  1.07s/it, GPU_mem=4.513G, Instances=155, Size=224, loss=0.0697, top1_acc=98


Epoch [3/3], Loss: 0.0675


Validation: 100%|██████████████████████████| 11/11 [00:40<00:00,  3.64s/it, classes=0.982, top1_acc=98.2, top5_acc=100]

Validation Loss: 0.0702, Top-1 Accuracy: 98.25%, Top-5 Accuracy: 100.00%





In [20]:
# Save the trained model
state = {'net': mymodel_Adam_noAug.state_dict()}
torch.save(state, '../models/DINO/Dino_Adam_noAug_256_dropout_0.3.pth')

In [21]:
del mymodel_Adam_noAug

In [23]:
# Training with SGD optimizer and no augmentations (learning rate = 5e-3)
mymodel_SGD_noAug =  copy.deepcopy(model).to(device)
num_epochs = 3  # Set the number of epochs
lr = 5e-3
optimizer_SGD = torch.optim.SGD(mymodel_SGD_noAug.linear_head.parameters(), lr=lr)
mymodel_Adam_noAug = train_model_no_aug(mymodel_SGD_noAug, num_epochs, optimizer_SGD)
check_accuracy(mymodel_SGD_noAug, val_dataloader)


  with torch.cuda.amp.autocast(enabled=use_amp):
Epoch [1/3]: 100%|█| 331/331 [05:50<00:00,  1.06s/it, GPU_mem=5.799G, Instances=155, Size=224, loss=1.27, top1_acc=53.7


Epoch [1/3], Loss: 3.1425


Epoch [2/3]: 100%|█| 331/331 [05:54<00:00,  1.07s/it, GPU_mem=5.799G, Instances=155, Size=224, loss=0.518, top1_acc=90.


Epoch [2/3], Loss: 0.8207


Epoch [3/3]: 100%|█| 331/331 [05:55<00:00,  1.07s/it, GPU_mem=5.799G, Instances=155, Size=224, loss=0.362, top1_acc=93.


Epoch [3/3], Loss: 0.4766


Validation: 100%|█████████████████████████| 11/11 [00:39<00:00,  3.58s/it, classes=0.971, top1_acc=97.1, top5_acc=99.9]

Validation Loss: 0.3117, Top-1 Accuracy: 97.07%, Top-5 Accuracy: 99.89%





In [24]:
# Save the trained model
state = {'net': mymodel_SGD_noAug.state_dict()}
torch.save(state, '../models/DINO/Dino_SGD_noAug_256_dropout_0.3_lr5e-3.pth')

In [25]:
del mymodel_SGD_noAug

In [26]:
# Training with AdamW optimizer and no augmentations
mymodel_AdamW_noAug =  copy.deepcopy(model).to(device)
num_epochs = 3  # Set the number of epochs
lr = 2.5e-4
optimizer_AdamW = torch.optim.AdamW(mymodel_AdamW_noAug.linear_head.parameters(), lr=lr)
mymodel_AdamW_noAug = train_model_no_aug(mymodel_AdamW_noAug, num_epochs, optimizer_AdamW)
check_accuracy(mymodel_AdamW_noAug, val_dataloader)


  with torch.cuda.amp.autocast(enabled=use_amp):
Epoch [1/3]: 100%|█| 331/331 [05:53<00:00,  1.07s/it, GPU_mem=8.764G, Instances=155, Size=224, loss=0.159, top1_acc=89.


Epoch [1/3], Loss: 0.5929


Epoch [2/3]: 100%|█| 331/331 [06:00<00:00,  1.09s/it, GPU_mem=8.764G, Instances=155, Size=224, loss=0.113, top1_acc=97.


Epoch [2/3], Loss: 0.1013


Epoch [3/3]: 100%|█| 331/331 [05:56<00:00,  1.08s/it, GPU_mem=8.764G, Instances=155, Size=224, loss=0.0639, top1_acc=98


Epoch [3/3], Loss: 0.0676


Validation: 100%|██████████████████████████| 11/11 [00:40<00:00,  3.66s/it, classes=0.984, top1_acc=98.4, top5_acc=100]

Validation Loss: 0.0665, Top-1 Accuracy: 98.40%, Top-5 Accuracy: 99.96%





In [27]:
# Save the trained model
state = {'net': mymodel_AdamW_noAug.state_dict()}
torch.save(state, '../models/DINO/Dino_AdamW_noAug_256_dropout_0.3.pth')

In [28]:
del mymodel_AdamW_noAug

In [29]:
# Training with RMSprop optimizer and no augmentations
mymodel_RMSProp_noAug =  copy.deepcopy(model).to(device)
num_epochs = 3  # Set the number of epochs
lr = 2.5e-4
optimizer_RMSProp = torch.optim.RMSprop(mymodel_RMSProp_noAug.linear_head.parameters(), lr=lr)
mymodel_RMSProp_noAug = train_model_no_aug(mymodel_RMSProp_noAug, num_epochs, optimizer_RMSProp)
check_accuracy(mymodel_RMSProp_noAug, val_dataloader)


  with torch.cuda.amp.autocast(enabled=use_amp):
Epoch [1/3]: 100%|█| 331/331 [05:52<00:00,  1.06s/it, GPU_mem=8.766G, Instances=155, Size=224, loss=0.194, top1_acc=91.


Epoch [1/3], Loss: 0.4498


Epoch [2/3]: 100%|█| 331/331 [05:53<00:00,  1.07s/it, GPU_mem=8.766G, Instances=155, Size=224, loss=0.0701, top1_acc=97


Epoch [2/3], Loss: 0.0958


Epoch [3/3]: 100%|█| 331/331 [06:00<00:00,  1.09s/it, GPU_mem=8.766G, Instances=155, Size=224, loss=0.0149, top1_acc=98


Epoch [3/3], Loss: 0.0633


Validation: 100%|██████████████████████████| 11/11 [00:40<00:00,  3.65s/it, classes=0.988, top1_acc=98.8, top5_acc=100]

Validation Loss: 0.0525, Top-1 Accuracy: 98.78%, Top-5 Accuracy: 99.96%





In [30]:
# Save the trained model
state = {'net': mymodel_RMSProp_noAug.state_dict()} 
torch.save(state, '../models/DINO/Dino_RMSProp_noAug_256_dropout_0.3.pth')

In [31]:
del mymodel_RMSProp_noAug

In [33]:
# Training with NAdam optimizer and no augmentations
mymodel_NAdam_noAug =  copy.deepcopy(model).to(device)
num_epochs = 3  # Set the number of epochs
lr = 2.5e-4
optimizer_NAdam = torch.optim.NAdam(mymodel_NAdam_noAug.linear_head.parameters(), lr=lr)
mymodel_Adam_noAug = train_model_no_aug(mymodel_NAdam_noAug, num_epochs, optimizer_NAdam)
check_accuracy(mymodel_NAdam_noAug, val_dataloader)


  with torch.cuda.amp.autocast(enabled=use_amp):
Epoch [1/3]: 100%|█| 331/331 [05:55<00:00,  1.07s/it, GPU_mem=7.132G, Instances=155, Size=224, loss=0.177, top1_acc=89.


Epoch [1/3], Loss: 0.6373


Epoch [2/3]: 100%|█| 331/331 [05:53<00:00,  1.07s/it, GPU_mem=7.132G, Instances=155, Size=224, loss=0.079, top1_acc=97.


Epoch [2/3], Loss: 0.1005


Epoch [3/3]: 100%|█| 331/331 [05:59<00:00,  1.09s/it, GPU_mem=7.132G, Instances=155, Size=224, loss=0.0491, top1_acc=98


Epoch [3/3], Loss: 0.0646


Validation: 100%|█████████████████████████| 11/11 [00:39<00:00,  3.59s/it, classes=0.986, top1_acc=98.6, top5_acc=99.9]

Validation Loss: 0.0609, Top-1 Accuracy: 98.59%, Top-5 Accuracy: 99.92%





In [34]:
# Save the trained model
state = {'net': mymodel_NAdam_noAug.state_dict()}
torch.save(state, '../models/DINO/Dino_NAdam_noAug_256_dropout_0.3.pth')

In [35]:
del mymodel_NAdam_noAug

In [36]:
# Training with RAdam optimizer and no augmentations
mymodel_RAdam_noAug = copy.deepcopy(model).to(device)
num_epochs = 3  # Set the number of epochs
lr = 2.5e-4
optimizer_RAdam = torch.optim.Adam(mymodel_RAdam_noAug.linear_head.parameters(), lr=lr)
mymodel_Adam_noAug = train_model_no_aug(mymodel_RAdam_noAug, num_epochs, optimizer_RAdam)
check_accuracy(mymodel_RAdam_noAug, val_dataloader)


  with torch.cuda.amp.autocast(enabled=use_amp):
Epoch [1/3]: 100%|█| 331/331 [05:59<00:00,  1.09s/it, GPU_mem=10.098G, Instances=155, Size=224, loss=0.249, top1_acc=89


Epoch [1/3], Loss: 0.5934


Epoch [2/3]: 100%|█| 331/331 [06:01<00:00,  1.09s/it, GPU_mem=10.098G, Instances=155, Size=224, loss=0.127, top1_acc=97


Epoch [2/3], Loss: 0.1027


Epoch [3/3]: 100%|█| 331/331 [05:59<00:00,  1.08s/it, GPU_mem=10.098G, Instances=155, Size=224, loss=0.0869, top1_acc=9


Epoch [3/3], Loss: 0.0672


Validation: 100%|█████████████████████████| 11/11 [00:39<00:00,  3.59s/it, classes=0.985, top1_acc=98.5, top5_acc=99.9]

Validation Loss: 0.0596, Top-1 Accuracy: 98.51%, Top-5 Accuracy: 99.92%





In [37]:
# Save the trained model
state = {'net': mymodel_RAdam_noAug.state_dict()}
torch.save(state, '../models/DINO/Dino_RAdam_noAug_256_dropout_0.3.pth')

In [38]:
del mymodel_RAdam_noAug

In [41]:
# Training with AdamWScheduleFree optimizer and no augmentations
mymodel_AdamWScheduleFree_noAug = copy.deepcopy(model).to(device)
num_epochs = 3  # Set the number of epochs
lr = 2.5e-4
optimizer_AdamWScheduleFree = schedulefree.AdamWScheduleFree(mymodel_AdamWScheduleFree_noAug.linear_head.parameters(), lr=lr)
optimizer_AdamWScheduleFree.train()
mymodel_AdamWScheduleFree_noAug = train_model_no_aug(mymodel_AdamWScheduleFree_noAug, num_epochs, optimizer_AdamWScheduleFree)
optimizer_AdamWScheduleFree.eval()
check_accuracy(mymodel_AdamWScheduleFree_noAug, val_dataloader)

  with torch.cuda.amp.autocast(enabled=use_amp):
Epoch [1/3]: 100%|█| 331/331 [05:50<00:00,  1.06s/it, GPU_mem=6.000G, Instances=155, Size=224, loss=0.111, top1_acc=88]


Epoch [1/3], Loss: 0.7289


Epoch [2/3]: 100%|█| 331/331 [05:57<00:00,  1.08s/it, GPU_mem=6.000G, Instances=155, Size=224, loss=0.0646, top1_acc=98


Epoch [2/3], Loss: 0.0994


Epoch [3/3]: 100%|█| 331/331 [06:01<00:00,  1.09s/it, GPU_mem=6.000G, Instances=155, Size=224, loss=0.0672, top1_acc=98


Epoch [3/3], Loss: 0.0640


Validation: 100%|█████████████████████████| 11/11 [00:40<00:00,  3.66s/it, classes=0.989, top1_acc=98.9, top5_acc=99.9]

Validation Loss: 0.0519, Top-1 Accuracy: 98.93%, Top-5 Accuracy: 99.92%





In [42]:
# Save the trained model
state = {'net': mymodel_AdamWScheduleFree_noAug.state_dict()}
torch.save(state, '../models/DINO/Dino_AdamWScheduleFree_noAug_256_dropout_0.3.pth')

In [43]:
del mymodel_AdamWScheduleFree_noAug

In [49]:
# Training with SGDScheduleFree optimizer and no augmentations(lr = 5e-3)
mymodel_SGDScheduleFree_noAug = copy.deepcopy(model).to(device)
num_epochs = 3  # Set the number of epochs
lr = 5e-3
optimizer_SGDScheduleFree = schedulefree.SGDScheduleFree(mymodel_SGDScheduleFree_noAug.linear_head.parameters(), lr=lr)
optimizer_SGDScheduleFree.train()
mymodel_SGDScheduleFree_noAug = train_model_no_aug(mymodel_SGDScheduleFree_noAug, num_epochs, optimizer_SGDScheduleFree)
optimizer_SGDScheduleFree.eval()
check_accuracy(mymodel_SGDScheduleFree_noAug, val_dataloader)

  with torch.cuda.amp.autocast(enabled=use_amp):
Epoch [1/3]: 100%|█| 331/331 [05:52<00:00,  1.07s/it, GPU_mem=7.378G, Instances=155, Size=224, loss=2.37, top1_acc=34.7


Epoch [1/3], Loss: 4.2115


Epoch [2/3]: 100%|█| 331/331 [05:52<00:00,  1.06s/it, GPU_mem=7.378G, Instances=155, Size=224, loss=0.858, top1_acc=81]


Epoch [2/3], Loss: 1.4619


Epoch [3/3]: 100%|█| 331/331 [05:56<00:00,  1.08s/it, GPU_mem=7.378G, Instances=155, Size=224, loss=0.449, top1_acc=90.


Epoch [3/3], Loss: 0.6891


Validation: 100%|█████████████████████████| 11/11 [00:40<00:00,  3.69s/it, classes=0.956, top1_acc=95.6, top5_acc=99.8]

Validation Loss: 0.4978, Top-1 Accuracy: 95.58%, Top-5 Accuracy: 99.77%





In [50]:
# Save the trained model
state = {'net': mymodel_SGDScheduleFree_noAug.state_dict()}
torch.save(state, '../models/DINO/Dino_SGDScheduleFree_noAug_256_dropout_0.3_lr_5e-3.pth')

In [51]:
del mymodel_SGDScheduleFree_noAug

In [44]:
# Training with SGDScheduleFree optimizer and no augmentations(lr = 1e-3)
mymodel_SGDScheduleFree_noAug = copy.deepcopy(model).to(device)
num_epochs = 3  # Set the number of epochs
lr = 1e-3
optimizer_SGDScheduleFree = schedulefree.SGDScheduleFree(mymodel_SGDScheduleFree_noAug.linear_head.parameters(), lr=lr)
optimizer_SGDScheduleFree.train()
mymodel_SGDScheduleFree_noAug = train_model_no_aug(mymodel_SGDScheduleFree_noAug, num_epochs, optimizer_SGDScheduleFree)
optimizer_SGDScheduleFree.eval()
check_accuracy(mymodel_SGDScheduleFree_noAug, val_dataloader)

  with torch.cuda.amp.autocast(enabled=use_amp):
Epoch [1/3]: 100%|█| 331/331 [05:59<00:00,  1.08s/it, GPU_mem=8.967G, Instances=155, Size=224, loss=5.43, top1_acc=2.25


Epoch [1/3], Loss: 6.0033


Epoch [2/3]: 100%|█| 331/331 [05:57<00:00,  1.08s/it, GPU_mem=8.967G, Instances=155, Size=224, loss=4.64, top1_acc=18.7


Epoch [2/3], Loss: 4.9820


Epoch [3/3]: 100%|█| 331/331 [05:54<00:00,  1.07s/it, GPU_mem=8.967G, Instances=155, Size=224, loss=3.69, top1_acc=39.5


Epoch [3/3], Loss: 4.0758


Validation: 100%|███████████████████████████| 11/11 [00:39<00:00,  3.63s/it, classes=0.514, top1_acc=51.4, top5_acc=89]

Validation Loss: 3.7592, Top-1 Accuracy: 51.35%, Top-5 Accuracy: 88.99%





In [45]:
# Save the trained model
state = {'net': mymodel_SGDScheduleFree_noAug.state_dict()}
torch.save(state, '../models/DINO/Dino_SGDScheduleFree_noAug_256_dropout_0.3_lr_1e-3.pth')

In [46]:
del mymodel_SGDScheduleFree_noAug

## Set of models with the following parameters:
1. Automatic mixed precision
2. Batch size of 64
3. Dropout rate of 0.3
4. Kornia augmentations (see specifics below)
5. 3 epochs

In [17]:
# Create data loaders for training and validation datasets(with batch size = 64)
train_dataloader = torch.utils.data.DataLoader(
    train_dataset,
    batch_size=64,
    shuffle=True,
    num_workers=4,
    pin_memory=True
)

val_dataloader = torch.utils.data.DataLoader(
    val_dataset,
    batch_size=64,
    shuffle=False,
    num_workers=4,
    pin_memory=True
)


In [18]:
# Create an augmentation pipeline using Kornia's AugmentationSequential
aug = AugmentationSequential(
    # Apply a random rotation up to 45 degrees with a 30% probability
    K.RandomRotation(degrees=45.0, p=0.3),
    # Apply a random horizontal flip with a 30% probability
    K.RandomHorizontalFlip(p=0.3),
    # Apply a random vertical flip with a 30% probability
    K.RandomVerticalFlip(p=0.3),
    # Apply a random affine transformation with rotation up to 30 degrees with a 30% probability
    K.RandomAffine(degrees=30.0, p=0.3)
).to(device)

In [21]:
use_amp = True # Flag to use automatic mixed precision for training
scaler = torch.amp.GradScaler(device,enabled=use_amp) # GradScaler for mixed precision training

# train function with augmentations
def train_model_aug(model, num_epochs, optimizer):
    for epoch in range(num_epochs):
        model.train() # Set the model to training mode
        running_loss = 0.0
        correct_top1 = 0 # Correct predictions for top-1 accuracy
        total = 0
        # Use tqdm for displaying progress during training
        train_loader_tqdm = tqdm(train_dataloader, desc=f'Epoch [{epoch+1}/{num_epochs}]', leave=True)
        
        for inputs, labels in train_loader_tqdm:
            inputs, labels = aug(inputs.to(device)), labels.to(device)

            optimizer.zero_grad(set_to_none=True)
            with torch.cuda.amp.autocast(enabled=use_amp):
                outputs = model(inputs)
                loss = criterion(outputs, labels)
                
            scaler.scale(loss).backward()
            scaler.step(optimizer)
            scaler.update()

            running_loss += loss.item()
            gpu_mem = torch.cuda.memory_reserved() / 1e9  # Get GPU memory usage in GB
            _, predicted = torch.max(outputs, 1)
            total += labels.size(0)
            correct_top1 += (predicted == labels).sum().item()
            
            train_loader_tqdm.set_postfix(GPU_mem=f'{gpu_mem:.3f}G', loss=loss.item(), Instances=len(inputs), Size=inputs.shape[-1], top1_acc=100 * correct_top1 / total)

        print(f'Epoch [{epoch+1}/{num_epochs}], Loss: {running_loss/len(train_dataloader):.4f}')
        
    return model


In [22]:
# Training with Adam optimizer and augmentations
mymodel_Adam_Aug = copy.deepcopy(model).to(device)
num_epochs = 3  # Set the number of epochs
lr = 2.5e-4
optimizer_Adam = torch.optim.Adam(mymodel_Adam_Aug.linear_head.parameters(), lr=lr)
mymodel_Adam_Aug = train_model_aug(mymodel_Adam_Aug, num_epochs, optimizer_Adam)
check_accuracy(mymodel_Adam_Aug, val_dataloader)


  with torch.cuda.amp.autocast(enabled=use_amp):
Epoch [1/3]: 100%|█| 1323/1323 [06:26<00:00,  3.42it/s, GPU_mem=3.504G, Instances=27, Size=224, loss=0.11, top1_acc=89.


Epoch [1/3], Loss: 0.5103


Epoch [2/3]: 100%|█| 1323/1323 [06:26<00:00,  3.42it/s, GPU_mem=3.504G, Instances=27, Size=224, loss=0.445, top1_acc=96


Epoch [2/3], Loss: 0.1420


Epoch [3/3]: 100%|█| 1323/1323 [06:19<00:00,  3.49it/s, GPU_mem=3.504G, Instances=27, Size=224, loss=0.0118, top1_acc=9


Epoch [3/3], Loss: 0.1148


Validation: 100%|█████████████████████████| 42/42 [00:39<00:00,  1.06it/s, classes=0.978, top1_acc=97.8, top5_acc=99.9]

Validation Loss: 0.0863, Top-1 Accuracy: 97.83%, Top-5 Accuracy: 99.92%





In [23]:
# Save the trained model
state = {'net': mymodel_Adam_Aug.state_dict()}
torch.save(state, '../models/DINO/Dino_Adam_Aug_64_dropout_0.3.pth')

In [24]:
del mymodel_Adam_Aug

In [25]:
# Training with SGD optimizer and augmentations(lr = 5e-3)
mymodel_SGD_Aug =  copy.deepcopy(model).to(device)
num_epochs = 3  # Set the number of epochs
lr = 5e-3
optimizer_SGD = torch.optim.SGD(mymodel_SGD_Aug.linear_head.parameters(), lr=lr)
mymodel_Adam_Aug = train_model_aug(mymodel_SGD_Aug, num_epochs, optimizer_SGD)
check_accuracy(mymodel_SGD_Aug, val_dataloader)


  with torch.cuda.amp.autocast(enabled=use_amp):
Epoch [1/3]: 100%|█| 1323/1323 [06:22<00:00,  3.46it/s, GPU_mem=4.660G, Instances=27, Size=224, loss=0.24, top1_acc=77.


Epoch [1/3], Loss: 1.4328


Epoch [2/3]: 100%|█| 1323/1323 [06:25<00:00,  3.43it/s, GPU_mem=4.660G, Instances=27, Size=224, loss=0.157, top1_acc=95


Epoch [2/3], Loss: 0.3162


Epoch [3/3]: 100%|█| 1323/1323 [06:25<00:00,  3.43it/s, GPU_mem=4.660G, Instances=27, Size=224, loss=0.114, top1_acc=96


Epoch [3/3], Loss: 0.2262


Validation: 100%|██████████████████████████| 42/42 [00:39<00:00,  1.05it/s, classes=0.982, top1_acc=98.2, top5_acc=100]

Validation Loss: 0.1200, Top-1 Accuracy: 98.25%, Top-5 Accuracy: 99.96%





In [26]:
# Save the trained model
state = {'net': mymodel_SGD_Aug.state_dict()}
torch.save(state, '../models/DINO/Dino_SGD_Aug_64_dropout_0.3.pth')

In [27]:
del mymodel_SGD_Aug

In [28]:
# Training with AdamW optimizer and augmentations
mymodel_AdamW_Aug =  copy.deepcopy(model).to(device)
num_epochs = 3  # Set the number of epochs
lr = 2.5e-4
optimizer_AdamW = torch.optim.AdamW(mymodel_AdamW_Aug.linear_head.parameters(), lr=lr)
mymodel_AdamW_Aug = train_model_aug(mymodel_AdamW_Aug, num_epochs, optimizer_AdamW)
check_accuracy(mymodel_AdamW_Aug, val_dataloader)


  with torch.cuda.amp.autocast(enabled=use_amp):
Epoch [1/3]: 100%|█| 1323/1323 [06:25<00:00,  3.43it/s, GPU_mem=4.930G, Instances=27, Size=224, loss=0.1, top1_acc=89.8


Epoch [1/3], Loss: 0.5098


Epoch [2/3]: 100%|█| 1323/1323 [06:17<00:00,  3.51it/s, GPU_mem=4.930G, Instances=27, Size=224, loss=0.0983, top1_acc=9


Epoch [2/3], Loss: 0.1462


Epoch [3/3]: 100%|█| 1323/1323 [06:18<00:00,  3.49it/s, GPU_mem=4.930G, Instances=27, Size=224, loss=0.163, top1_acc=97


Epoch [3/3], Loss: 0.1101


Validation: 100%|█████████████████████████| 42/42 [00:41<00:00,  1.02it/s, classes=0.974, top1_acc=97.4, top5_acc=99.9]

Validation Loss: 0.0915, Top-1 Accuracy: 97.41%, Top-5 Accuracy: 99.89%





In [29]:
# Save the trained model
state = {'net': mymodel_AdamW_Aug.state_dict()}
torch.save(state, '../models/DINO/Dino_AdamW_Aug_64_dropout_0.3.pth')

In [30]:
del mymodel_AdamW_Aug

In [31]:
# Training with RMSProp optimizer and augmentations
mymodel_RMSProp_Aug =  copy.deepcopy(model).to(device)
num_epochs = 3  # Set the number of epochs
lr = 2.5e-4
optimizer_RMSProp = torch.optim.RMSprop(mymodel_RMSProp_Aug.linear_head.parameters(), lr=lr)
mymodel_RMSProp_Aug = train_model_aug(mymodel_RMSProp_Aug, num_epochs, optimizer_RMSProp)
check_accuracy(mymodel_RMSProp_Aug, val_dataloader)


  with torch.cuda.amp.autocast(enabled=use_amp):
Epoch [1/3]: 100%|█| 1323/1323 [06:24<00:00,  3.44it/s, GPU_mem=6.094G, Instances=27, Size=224, loss=0.468, top1_acc=91


Epoch [1/3], Loss: 0.4164


Epoch [2/3]: 100%|█| 1323/1323 [06:27<00:00,  3.42it/s, GPU_mem=6.094G, Instances=27, Size=224, loss=0.044, top1_acc=96


Epoch [2/3], Loss: 0.1356


Epoch [3/3]: 100%|█| 1323/1323 [06:24<00:00,  3.44it/s, GPU_mem=6.094G, Instances=27, Size=224, loss=0.00475, top1_acc=


Epoch [3/3], Loss: 0.1018


Validation: 100%|█████████████████████████| 42/42 [00:39<00:00,  1.06it/s, classes=0.984, top1_acc=98.4, top5_acc=99.9]

Validation Loss: 0.0729, Top-1 Accuracy: 98.40%, Top-5 Accuracy: 99.92%





In [32]:
# Save the trained model
state = {'net': mymodel_RMSProp_Aug.state_dict()}
torch.save(state, '../models/DINO/Dino_RMSProp_Aug_64_dropout_0.3.pth')

In [33]:
del mymodel_RMSProp_Aug

In [34]:
# Training with NAdam optimizer and augmentations
mymodel_NAdam_Aug =  copy.deepcopy(model).to(device)
num_epochs = 3  # Set the number of epochs
lr = 2.5e-4
optimizer_NAdam = torch.optim.NAdam(mymodel_NAdam_Aug.linear_head.parameters(), lr=lr)
mymodel_Adam_Aug = train_model_aug(mymodel_NAdam_Aug, num_epochs, optimizer_NAdam)
check_accuracy(mymodel_NAdam_Aug, val_dataloader)


  with torch.cuda.amp.autocast(enabled=use_amp):
Epoch [1/3]: 100%|█| 1323/1323 [06:19<00:00,  3.49it/s, GPU_mem=6.839G, Instances=27, Size=224, loss=0.176, top1_acc=90


Epoch [1/3], Loss: 0.4975


Epoch [2/3]: 100%|█| 1323/1323 [06:19<00:00,  3.49it/s, GPU_mem=6.839G, Instances=27, Size=224, loss=0.0611, top1_acc=9


Epoch [2/3], Loss: 0.1350


Epoch [3/3]: 100%|█| 1323/1323 [06:19<00:00,  3.49it/s, GPU_mem=6.839G, Instances=27, Size=224, loss=0.0813, top1_acc=9


Epoch [3/3], Loss: 0.1075


Validation: 100%|█████████████████████████| 42/42 [00:39<00:00,  1.06it/s, classes=0.981, top1_acc=98.1, top5_acc=99.9]

Validation Loss: 0.0731, Top-1 Accuracy: 98.06%, Top-5 Accuracy: 99.92%





In [35]:
# Save the trained model
state = {'net': mymodel_NAdam_Aug.state_dict()}
torch.save(state, '../models/DINO/Dino_NAdam_Aug_64_dropout_0.3.pth')

In [36]:
del mymodel_NAdam_Aug

In [37]:
# Training with RAdam optimizer and augmentations
mymodel_RAdam_Aug = copy.deepcopy(model).to(device)
num_epochs = 3  # Set the number of epochs
lr = 2.5e-4
optimizer_RAdam = torch.optim.Adam(mymodel_RAdam_Aug.linear_head.parameters(), lr=lr)
mymodel_Adam_Aug = train_model_aug(mymodel_RAdam_Aug, num_epochs, optimizer_RAdam)
check_accuracy(mymodel_RAdam_Aug, val_dataloader)


  with torch.cuda.amp.autocast(enabled=use_amp):
Epoch [1/3]: 100%|█| 1323/1323 [06:25<00:00,  3.43it/s, GPU_mem=6.839G, Instances=27, Size=224, loss=0.285, top1_acc=89


Epoch [1/3], Loss: 0.5105


Epoch [2/3]: 100%|█| 1323/1323 [06:38<00:00,  3.32it/s, GPU_mem=6.839G, Instances=27, Size=224, loss=0.0401, top1_acc=9


Epoch [2/3], Loss: 0.1458


Epoch [3/3]: 100%|█| 1323/1323 [06:23<00:00,  3.45it/s, GPU_mem=6.839G, Instances=27, Size=224, loss=0.363, top1_acc=97


Epoch [3/3], Loss: 0.1141


Validation: 100%|██████████████████████████| 42/42 [00:39<00:00,  1.06it/s, classes=0.973, top1_acc=97.3, top5_acc=100]

Validation Loss: 0.0966, Top-1 Accuracy: 97.26%, Top-5 Accuracy: 100.00%





In [38]:
# Save the trained model
state = {'net': mymodel_RAdam_Aug.state_dict()}    
torch.save(state, '../models/DINO/Dino_RAdam_Aug_64_dropout_0.3.pth')

In [39]:
del mymodel_RAdam_Aug

In [40]:
# Training with AdamWScheduleFree optimizer and augmentations
mymodel_AdamWScheduleFree_Aug = copy.deepcopy(model).to(device)
num_epochs = 3  # Set the number of epochs
lr = 2.5e-4
optimizer_AdamWScheduleFree = schedulefree.AdamWScheduleFree(mymodel_AdamWScheduleFree_Aug.linear_head.parameters(), lr=lr)
optimizer_AdamWScheduleFree.train()
mymodel_AdamWScheduleFree_Aug = train_model_aug(mymodel_AdamWScheduleFree_Aug, num_epochs, optimizer_AdamWScheduleFree)
optimizer_AdamWScheduleFree.eval()
check_accuracy(mymodel_AdamWScheduleFree_Aug, val_dataloader)

  with torch.cuda.amp.autocast(enabled=use_amp):
Epoch [1/3]: 100%|█| 1323/1323 [06:18<00:00,  3.49it/s, GPU_mem=6.839G, Instances=27, Size=224, loss=0.0504, top1_acc=9


Epoch [1/3], Loss: 0.5343


Epoch [2/3]: 100%|█| 1323/1323 [06:19<00:00,  3.49it/s, GPU_mem=6.839G, Instances=27, Size=224, loss=0.0685, top1_acc=9


Epoch [2/3], Loss: 0.0953


Epoch [3/3]: 100%|█| 1323/1323 [06:19<00:00,  3.49it/s, GPU_mem=6.839G, Instances=27, Size=224, loss=0.0321, top1_acc=9


Epoch [3/3], Loss: 0.0613


Validation: 100%|█████████████████████████████| 42/42 [00:39<00:00,  1.06it/s, classes=0.99, top1_acc=99, top5_acc=100]

Validation Loss: 0.0475, Top-1 Accuracy: 99.01%, Top-5 Accuracy: 99.96%





In [41]:
# Save the trained model
state = {'net': mymodel_AdamWScheduleFree_Aug.state_dict()}
torch.save(state, '../models/DINO/Dino_AdamWScheduleFree_Aug_64_dropout_0.3.pth')

In [42]:
del mymodel_AdamWScheduleFree_Aug

In [43]:
# Training with SGDScheduleFree optimizer and augmentations(learning rate = 2.5e-4)
mymodel_SGDScheduleFree_Aug = copy.deepcopy(model).to(device)
num_epochs = 3  # Set the number of epochs
lr = 2.5e-4
optimizer_SGDScheduleFree = schedulefree.SGDScheduleFree(mymodel_SGDScheduleFree_Aug.linear_head.parameters(), lr=lr)
optimizer_SGDScheduleFree.train()
mymodel_SGDScheduleFree_Aug = train_model_aug(mymodel_SGDScheduleFree_Aug, num_epochs, optimizer_SGDScheduleFree)
optimizer_SGDScheduleFree.eval()
check_accuracy(mymodel_SGDScheduleFree_Aug, val_dataloader)

  with torch.cuda.amp.autocast(enabled=use_amp):
Epoch [1/3]: 100%|█| 1323/1323 [06:17<00:00,  3.51it/s, GPU_mem=6.839G, Instances=27, Size=224, loss=5.8, top1_acc=1.73


Epoch [1/3], Loss: 6.0973


Epoch [2/3]: 100%|█| 1323/1323 [06:31<00:00,  3.38it/s, GPU_mem=6.839G, Instances=27, Size=224, loss=4.87, top1_acc=14.


Epoch [2/3], Loss: 5.2015


Epoch [3/3]: 100%|█| 1323/1323 [06:34<00:00,  3.35it/s, GPU_mem=6.839G, Instances=27, Size=224, loss=3.79, top1_acc=33.


Epoch [3/3], Loss: 4.3846


Validation: 100%|███████████████████████████| 42/42 [00:42<00:00,  1.01s/it, classes=0.487, top1_acc=48.7, top5_acc=83]

Validation Loss: 4.0366, Top-1 Accuracy: 48.69%, Top-5 Accuracy: 83.01%





In [44]:
# Save the trained model
state = {'net': mymodel_SGDScheduleFree_Aug.state_dict()}
torch.save(state, '../models/DINO/Dino_SGDScheduleFree_Aug_64_dropout_0.3.pth')

In [50]:
del mymodel_SGDScheduleFree_Aug

In [45]:
# Training with SGDScheduleFree optimizer and augmentations(learning rate = 5e-3)
mymodel_SGDScheduleFree_Aug = copy.deepcopy(model).to(device)
num_epochs = 3  # Set the number of epochs
lr = 5e-3
optimizer_SGDScheduleFree = schedulefree.SGDScheduleFree(mymodel_SGDScheduleFree_Aug.linear_head.parameters(), lr=lr)
optimizer_SGDScheduleFree.train()
mymodel_SGDScheduleFree_Aug = train_model_aug(mymodel_SGDScheduleFree_Aug, num_epochs, optimizer_SGDScheduleFree)
optimizer_SGDScheduleFree.eval()
check_accuracy(mymodel_SGDScheduleFree_Aug, val_dataloader)

  with torch.cuda.amp.autocast(enabled=use_amp):
Epoch [1/3]: 100%|█| 1323/1323 [06:35<00:00,  3.35it/s, GPU_mem=6.839G, Instances=27, Size=224, loss=0.557, top1_acc=69


Epoch [1/3], Loss: 2.0041


Epoch [2/3]: 100%|█| 1323/1323 [06:26<00:00,  3.43it/s, GPU_mem=6.839G, Instances=27, Size=224, loss=0.206, top1_acc=93


Epoch [2/3], Loss: 0.3659


Epoch [3/3]: 100%|█| 1323/1323 [06:21<00:00,  3.47it/s, GPU_mem=6.839G, Instances=27, Size=224, loss=0.219, top1_acc=95


Epoch [3/3], Loss: 0.2435


Validation: 100%|█████████████████████████| 42/42 [00:39<00:00,  1.06it/s, classes=0.982, top1_acc=98.2, top5_acc=99.9]

Validation Loss: 0.1343, Top-1 Accuracy: 98.21%, Top-5 Accuracy: 99.89%





In [46]:
# Save the trained model
state = {'net': mymodel_SGDScheduleFree_Aug.state_dict()}     
torch.save(state, '../models/DINO/Dino_SGDScheduleFree_Aug_64_dropout_0.3_lr_5e-3.pth')

In [47]:
del mymodel_SGDScheduleFree_Aug