In [1]:
import math, sys, os, torch, torchvision
import numpy as np

device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
print('Using gpu: %s ' % torch.cuda.is_available())

print("CUDA version:", torch.version.cuda)



Using gpu: True 
CUDA version: 11.8


In [8]:
model = torch.hub.load('facebookresearch/deit:main', 'deit_tiny_patch16_224', pretrained=True)
# print(model)
# print(vars(model))
print(model.forward_features)

Using cache found in C:\Users\rabas/.cache\torch\hub\facebookresearch_deit_main


<bound method VisionTransformer.forward_features of VisionTransformer(
  (patch_embed): PatchEmbed(
    (proj): Conv2d(3, 192, kernel_size=(16, 16), stride=(16, 16))
    (norm): Identity()
  )
  (pos_drop): Dropout(p=0.0, inplace=False)
  (norm_pre): Identity()
  (blocks): Sequential(
    (0): Block(
      (norm1): LayerNorm((192,), eps=1e-06, elementwise_affine=True)
      (attn): Attention(
        (qkv): Linear(in_features=192, out_features=576, bias=True)
        (attn_drop): Dropout(p=0.0, inplace=False)
        (proj): Linear(in_features=192, out_features=192, bias=True)
        (proj_drop): Dropout(p=0.0, inplace=False)
      )
      (ls1): Identity()
      (drop_path1): Identity()
      (norm2): LayerNorm((192,), eps=1e-06, elementwise_affine=True)
      (mlp): Mlp(
        (fc1): Linear(in_features=192, out_features=768, bias=True)
        (act): GELU(approximate='none')
        (drop1): Dropout(p=0.0, inplace=False)
        (fc2): Linear(in_features=768, out_features=192, bia

In [12]:
# Training and testing loop
import torch.optim as optim
import torch.nn as nn

device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")

# Training parameters
num_epochs = 20             # Number of epochs (adjust as needed)
learning_rate = 0.001       # Learning rate
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model_deit.head.parameters(), lr=learning_rate)

# Lists to track metrics
train_losses, val_losses = [], []
train_accuracies, val_accuracies = [], []
val_f1_scores = []

# Training loop
for epoch in range(num_epochs):
    # === Training Phase ===
    model_deit.head.train()
    running_loss, correct_preds, total = 0.0, 0, 0

    for features, labels in train_loader:
        features, labels = features.to(device), labels.to(device)
        # Select the CLS token (first token) for classification
        cls_features = features[:, 0, :]

        optimizer.zero_grad()
        outputs = model_deit.head(cls_features)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()

        running_loss += loss.item() * labels.size(0)
        preds = torch.argmax(outputs, dim=1)
        correct_preds += torch.sum(preds == labels).item()
        total += labels.size(0)

    train_loss = running_loss / total
    train_acc = correct_preds / total
    train_losses.append(train_loss)
    train_accuracies.append(train_acc)

    # === Validation Phase ===
    model_deit.head.eval()
    val_running_loss, val_correct, val_total = 0.0, 0, 0
    all_val_preds, all_val_labels = [], []

    with torch.no_grad():
        for features, labels in test_loader:
            features, labels = features.to(device), labels.to(device)
            cls_features = features[:, 0, :]
            outputs = model_deit.head(cls_features)
            loss = criterion(outputs, labels)

            val_running_loss += loss.item() * labels.size(0)
            preds = torch.argmax(outputs, dim=1)
            val_correct += torch.sum(preds == labels).item()
            val_total += labels.size(0)

            all_val_preds.append(preds.cpu())
            all_val_labels.append(labels.cpu())

    val_loss = val_running_loss / val_total
    val_acc = val_correct / val_total
    val_losses.append(val_loss)
    val_accuracies.append(val_acc)

    # Compute the F1-score on the validation set (binary average for 2 classes)
    all_val_preds = torch.cat(all_val_preds).numpy()
    all_val_labels = torch.cat(all_val_labels).numpy()

    print(f"Epoch {epoch+1}/{num_epochs} | Train Loss: {train_loss:.4f} | Train Acc: {train_acc:.2%} | "
          f"Val Loss: {val_loss:.4f} | Val Acc: {val_acc:.2%}")

print("Training completed!")



# === Final Evaluation on the Test Set ===
model_deit.head.eval()
all_test_preds, all_test_labels = [], []

with torch.no_grad():
    for features, labels in test_loader:
        features, labels = features.to(device), labels.to(device)
        cls_features = features[:, 0, :]
        outputs = model_deit.head(cls_features)
        preds = torch.argmax(outputs, dim=1)
        all_test_preds.append(preds.cpu())
        all_test_labels.append(labels.cpu())

all_test_preds = torch.cat(all_test_preds).numpy()
all_test_labels = torch.cat(all_test_labels).numpy()


Epoch 1/20 | Train Loss: 0.8426 | Train Acc: 47.92% | Val Loss: 0.7135 | Val Acc: 50.00%
Epoch 2/20 | Train Loss: 0.7599 | Train Acc: 45.83% | Val Loss: 0.6788 | Val Acc: 58.33%
Epoch 3/20 | Train Loss: 0.6999 | Train Acc: 45.83% | Val Loss: 0.7269 | Val Acc: 41.67%
Epoch 4/20 | Train Loss: 0.6638 | Train Acc: 56.25% | Val Loss: 0.6893 | Val Acc: 58.33%
Epoch 5/20 | Train Loss: 0.6502 | Train Acc: 64.58% | Val Loss: 0.6894 | Val Acc: 66.67%
Epoch 6/20 | Train Loss: 0.6405 | Train Acc: 64.58% | Val Loss: 0.6666 | Val Acc: 58.33%
Epoch 7/20 | Train Loss: 0.6146 | Train Acc: 64.58% | Val Loss: 0.7152 | Val Acc: 50.00%
Epoch 8/20 | Train Loss: 0.6212 | Train Acc: 64.58% | Val Loss: 0.6754 | Val Acc: 58.33%
Epoch 9/20 | Train Loss: 0.6008 | Train Acc: 68.75% | Val Loss: 0.6828 | Val Acc: 75.00%
Epoch 10/20 | Train Loss: 0.5715 | Train Acc: 70.83% | Val Loss: 0.6707 | Val Acc: 50.00%
Epoch 11/20 | Train Loss: 0.5792 | Train Acc: 68.75% | Val Loss: 0.6683 | Val Acc: 58.33%
Epoch 12/20 | Train