# Training and Testing A Visual Transformer Model
Here we test a visual Transfrom ViT from [An Image is Worth 16x16 Words: Transformers for Image Recognition at Scale](https://arxiv.org/abs/2010.11929)

In [1]:
import torch
import torchvision as tv
import torchvision.transforms.v2 as v2
from our_datasets import Country_images
from DenseNet import DenseNet
#from Country_dict import comp_country_dict
import os

USE_GPU = True
if USE_GPU and torch.cuda.is_available():
    device = torch.device('cuda')
else:
    device = torch.device('cpu')
#get models
print(f"Using {device} device")

Using cuda device


## Load our Dataset and Create our dataloaders

In [2]:
batch_size = 16
weights = tv.models.DenseNet201_Weights.DEFAULT
transform = v2.Compose([weights.transforms(), ])


dataset_path = os.path.join("data","compressed_dataset")
dataset = Country_images("country.csv",dataset_path,transform=transform)
num_classes = dataset.get_num_classes()
train_dataset, val_dataset, test_dataset = torch.utils.data.random_split(dataset,lengths=[0.7,0.1,0.2])
if device == 'cuda':
    data_loader_params = {
        'batch_size': batch_size,  # Batch size for data loading
        'num_workers': 8,  # Number of subprocesses to use for data loading
        'persistent_workers': True,  # If True, the data loader will not shutdown the worker processes after a dataset has been consumed once. This allows to maintain the worker dataset instances alive.
        'pin_memory': True,  # If True, the data loader will copy Tensors into CUDA pinned memory before returning them. Useful when using GPU.
        'pin_memory_device': 'cuda' ,  # Specifies the device where the data should be loaded. Commonly set to use the GPU.
    }
else:
    data_loader_params = {
        'batch_size': batch_size,  # Batch size for data loading
        #'num_workers': 8,  # Number of subprocesses to use for data loading
        #'persistent_workers': True,  # If True, the data loader will not shutdown the worker processes after a dataset has been consumed once. This allows to maintain the worker dataset instances alive.
        #'pin_memory': True,  # If True, the data loader will copy Tensors into CUDA pinned memory before returning them. Useful when using GPU.
        #'pin_memory_device': 'cuda' ,  # Specifies the device where the data should be loaded. Commonly set to use the GPU.
    }
train_dataloader      = torch.utils.data.DataLoader(train_dataset, **data_loader_params, shuffle=True)
val_dataloader        = torch.utils.data.DataLoader(val_dataset, **data_loader_params, shuffle=True)
test_dataloader       = torch.utils.data.DataLoader(test_dataset, **data_loader_params, shuffle=False)

## Load our model

## Train our Model

In [None]:
#we will call the function we defined in "Training_Functions.py"
from Training_Functions import TrainModel
model_train = True
fine_tune = False
use_amp= True

if model_train and fine_tune:
    print("Starting Fine Tuning")
    checkpoint = torch.load(os.path.join(model.path,"DenseNet-201-2"+"-Best.pth"),map_location=torch.device(device))
    model.load_state_dict(checkpoint)
    model = model.to(device)
    TrainModel(model,EPOCHS, loss_fn, train_dataloader, val_dataloader, optimizer, lr_scheduler, use_amp, scaler)
elif model_train:
    print("Starting Training")
    model = model.to(device)
    TrainModel(model,EPOCHS, loss_fn, train_dataloader, val_dataloader, optimizer, lr_scheduler, use_amp, scaler)
else:
    print("Loading Model")
    checkpoint = torch.load(os.path.join( model.path,model.name+"-Best.pth"),map_location=torch.device(device))
    model.load_state_dict(checkpoint)
    model = model.to(device)

Starting Training
EPOCH 1:


## Test our Model

In [None]:
#we will call the function we defined in "Training_Functions.py"
from Training_Functions import TestModel
TestModel(model, test_dataloader, loss_fn)

Got 8499 / 9598 correct (88.55)


0.8854969785371952