### Requirements

In [1]:
from dataset import*
from utility import*
from training import *
from vit_pytorch.vit import ViT

In [2]:
root_project = '/home/anto/University/Driving-Visual-Attention/'

In [3]:
print(f"We have {'' if torch.cuda.is_available() else 'not'} access to a GPU")
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
if torch.cuda.is_available():
    print(torch.cuda.current_device())
    print(torch.cuda.device(0))
    print(torch.cuda.device_count())
    print(torch.cuda.get_device_name(0))
print(device)

We have  access to a GPU
0
<torch.cuda.device object at 0x7f41e842cf10>
1
NVIDIA GeForce RTX 3060 Laptop GPU
cuda


In [4]:
seed_everything(42)

### Data Loader and Visualization

##### Files where to write the paths and labels

In [5]:
percentage = 1
save_train_file = root_project + 'save/save_train' + str(percentage)
save_val_file = root_project + 'save/save_val' + str(percentage)
save_test_file = root_project + 'save/save_test' + str(percentage)

##### Train Loader

In [6]:
train_dataset_classloader = DataLoaderVisualizer(root_project,save_train_file,percentage,'train')

Building path structure

The dataset has already been prepared, ready to use


##### Validtion Loader

In [7]:
val_dataset_classloader = DataLoaderVisualizer(root_project,save_val_file,percentage,'val')

Building path structure

The dataset has already been prepared, ready to use


##### Test Loader

In [8]:
test_dataset_classloader = DataLoaderVisualizer(root_project,save_test_file,percentage,'test')

Building path structure

The dataset has already been prepared, ready to use


##### Visualization

In [9]:
#train_dataset_classloader.visualize_dataset()

In [10]:
#val_dataset_classloader.visualize_dataset()

In [11]:
#test_dataset_classloader.visualize_dataset()

### Pytorch Dataset 

In [12]:
# Crop and convert to tensor
crop_params = (125, 75, 768, 768)
# mean and std of images, calculated in advance
mean = (0.4573337137699127, 0.4427291750907898, 0.3902426064014435)
std = (0.23664842545986176, 0.22875066101551056, 0.2255575954914093)

my_transforms = transforms.Compose([
    transforms.ToTensor(),
    CropTransform(crop_params),
    transforms.Normalize(mean=mean, std=mean, inplace=True)
])

In [13]:
train_dataset = DGAZEDataset('train','save/save_train1',my_transforms)
print(f'Train dataset len is {len(train_dataset)}')

Train dataset len is 1054


In [14]:
'''tensor_img = train_dataset[100][0]*255
numpy_img = tensor_img.permute(1, 2, 0).numpy().astype(np.uint8)
from PIL import Image
from IPython.display import display
# Convert to PIL Image
image_pil = Image.fromarray(numpy_img)

# Display the image in the notebook
display(image_pil)'''

'tensor_img = train_dataset[100][0]*255\nnumpy_img = tensor_img.permute(1, 2, 0).numpy().astype(np.uint8)\nfrom PIL import Image\nfrom IPython.display import display\n# Convert to PIL Image\nimage_pil = Image.fromarray(numpy_img)\n\n# Display the image in the notebook\ndisplay(image_pil)'

In [15]:
val_dataset = DGAZEDataset('val','save/save_val1',my_transforms)
print(f'Val dataset len is {len(val_dataset)}')

Val dataset len is 127


In [16]:
test_dataset = DGAZEDataset('test','save/save_test1',my_transforms)
print(f'Test dataset len is {len(test_dataset)}')

Test dataset len is 126


### Vision Transformer Model

##### Hyerparameters

In [17]:
EPOCHS = 10
BATCH_SIZE = 16
IMAGE_SIZE = 768
HEADS = 8
DROPOUT = 0.1
PATCH_SIZE = 64
DIM = 1024
MLP_DIM= 1024
DEPTH = 4
pre_trained = False

In [18]:
model = ViT(
    image_size = IMAGE_SIZE,
    patch_size = PATCH_SIZE,
    num_classes = 2,
    channels= 3,
    pool='mean',
    dim = DIM,
    depth = DEPTH,
    heads = HEADS,
    mlp_dim = MLP_DIM,
    dropout = DROPOUT,
    emb_dropout = 0.1
)
model.to(device)

ViT(
  (to_patch_embedding): Sequential(
    (0): Rearrange('b c (h p1) (w p2) -> b (h w) (p1 p2 c)', p1=64, p2=64)
    (1): LayerNorm((12288,), eps=1e-05, elementwise_affine=True)
    (2): Linear(in_features=12288, out_features=1024, bias=True)
    (3): LayerNorm((1024,), eps=1e-05, elementwise_affine=True)
  )
  (dropout): Dropout(p=0.1, inplace=False)
  (transformer): Transformer(
    (norm): LayerNorm((1024,), eps=1e-05, elementwise_affine=True)
    (layers): ModuleList(
      (0-3): 4 x ModuleList(
        (0): Attention(
          (norm): LayerNorm((1024,), eps=1e-05, elementwise_affine=True)
          (attend): Softmax(dim=-1)
          (dropout): Dropout(p=0.1, inplace=False)
          (to_qkv): Linear(in_features=1024, out_features=1536, bias=False)
          (to_out): Sequential(
            (0): Linear(in_features=512, out_features=1024, bias=True)
            (1): Dropout(p=0.1, inplace=False)
          )
        )
        (1): FeedForward(
          (net): Sequential(
    

##### Criterion and Optimizer

In [19]:
#criterion = nn.MSELoss()
criterion = nn.L1Loss()
optimizer = optim.Adam(model.parameters(), lr=0.01)

##### Dataloader

In [20]:
train_loader = DataLoader(train_dataset, batch_size=BATCH_SIZE, shuffle=True, drop_last=True)
val_loader = DataLoader(val_dataset, batch_size=BATCH_SIZE, shuffle=False)

### Training 

In [21]:
if pre_trained:
    ckpt_path = '/home/anto/University/Driving-Visual-Attention/save/model_weights_epochs10.pth'
    checkpoint = torch.load(ckpt_path)
    model.load_state_dict(checkpoint['model_state_dict'])
    optimizer.load_state_dict(checkpoint['optimizer_state_dict'])

In [22]:
wandb.login()
wandb.init(project="GazeViT", name="10 epoche lr=0.01")

Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.
[34m[1mwandb[0m: Currently logged in as: [33mscardino-2020613[0m ([33mvesuvio-erutta[0m). Use [1m`wandb login --relogin`[0m to force relogin


In [23]:
if pre_trained:
    start_epoch = checkpoint['epoch']
    EPOCHS = start_epoch + EPOCHS
else:
    start_epoch = 0
    EPOCHS = EPOCHS

for epoch in range(start_epoch, EPOCHS):
    # Training
    train_loss = train_epoch(model, train_loader, criterion, optimizer, device, epoch)
    wandb.log({"epoch": epoch + 1,"train_loss": train_loss})

    # Validation
    val_loss, val_accuracy = validate(model, val_loader, criterion, device, epoch)
    wandb.log({"epoch": epoch + 1,"val_loss": val_loss})
    wandb.log({"epoch": epoch + 1,"val_accuracy": val_accuracy})

    log_image(val_loader, model, device)

    # Finish the WandB run
    wandb.finish()

Training Epoch 0:  34%|███▍      | 22/65 [00:10<00:21,  2.04batch/s, batch loss=549.52]

In [None]:
save_dict = {
    'epoch': epoch,
    'model_state_dict': model.state_dict(),
    'optimizer_state_dict': optimizer.state_dict(),
}
torch.save(save_dict, root_project + 'save/GazeVit_weights_epochs' + str(EPOCHS)+ '.pth')

### Test