In [1]:
import os
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader, random_split
from torchvision.datasets import ImageFolder
from torchvision.transforms import transforms
import timm
from sklearn.metrics import classification_report, confusion_matrix
import matplotlib.pyplot as plt
import seaborn as sns
import numpy as np
from tqdm import tqdm
import torch.nn.functional as F
import warnings

warnings.filterwarnings("ignore")

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
# Device configuration
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

# Define the dataset path
dataset_path = "C:/Users/offic/OneDrive/Masaüstü/datasets/caltech-101"

# Define data transformations
transform = transforms.Compose([
    transforms.Resize((224, 224)),  # Resize images to 224x224 for DaViT
    transforms.ToTensor(),
    transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))
])

# Load the dataset
dataset = ImageFolder(root=dataset_path, transform=transform)

# Split the dataset into training and validation sets
train_size = int(0.8 * len(dataset))
val_size = len(dataset) - train_size
train_dataset, val_dataset = random_split(dataset, [train_size, val_size])

train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True)
val_loader = DataLoader(val_dataset, batch_size=32, shuffle=False)


In [3]:
# Load DaViT model from Timm
model = timm.create_model('davit_tiny', pretrained=True)
num_features = model.head.in_features
model.head = nn.Sequential(
    nn.AdaptiveAvgPool2d(1),  # Pool to 1x1 spatial size
    nn.Flatten(),             # Flatten the tensor
    nn.Linear(num_features, len(dataset.classes))  # Adjust for Caltech-101 classes
)
model.to(device)

# Loss function, optimizer, and scheduler
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)
scheduler = optim.lr_scheduler.StepLR(optimizer, step_size=7, gamma=0.1)

def calculate_accuracy(outputs, labels):
    _, preds = torch.max(outputs, 1)
    return torch.sum(preds == labels).item() / len(labels)

In [4]:
#Training the model.
num_epochs = 10

for epoch in range(num_epochs):
    model.train()
    running_loss = 0.0
    running_corrects = 0
    total_train = 0

    # Progress bar for the current epoch
    epoch_bar = tqdm(train_loader, desc=f"Epoch [{epoch+1}/{num_epochs}]", leave=False)

    for images, labels in epoch_bar:
        images, labels = images.to(device), labels.to(device)
        total_train += labels.size(0)

        optimizer.zero_grad()
        outputs = model(images)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()

        running_loss += loss.item() * images.size(0)
        running_corrects += calculate_accuracy(outputs, labels) * labels.size(0)

        # Update progress bar with current metrics
        epoch_bar.set_postfix({
            'loss': running_loss/total_train,
            'accuracy': running_corrects/total_train
        })

    epoch_loss = running_loss / total_train
    epoch_acc = running_corrects / total_train

    # Validation phase
    model.eval()
    val_running_loss = 0.0
    val_running_corrects = 0
    total_val = 0

    with torch.no_grad():
        for images, labels in val_loader:
            images, labels = images.to(device), labels.to(device)
            total_val += labels.size(0)

            outputs = model(images)
            loss = criterion(outputs, labels)

            val_running_loss += loss.item() * images.size(0)
            val_running_corrects += calculate_accuracy(outputs, labels) * labels.size(0)

    val_loss = val_running_loss / total_val
    val_acc = val_running_corrects / total_val

    print(f'Epoch [{epoch+1}/{num_epochs}], Loss: {epoch_loss:.4f}, Accuracy: {epoch_acc:.4f}, Val Loss: {val_loss:.4f}, Val Accuracy: {val_acc:.4f}')
    scheduler.step()

                                                                                           

Epoch [1/10], Loss: 0.8531, Accuracy: 0.7936, Val Loss: 0.6183, Val Accuracy: 0.8414


                                                                                           

Epoch [2/10], Loss: 0.2835, Accuracy: 0.9232, Val Loss: 0.5421, Val Accuracy: 0.8715


                                                                                           

Epoch [3/10], Loss: 0.1885, Accuracy: 0.9500, Val Loss: 0.5220, Val Accuracy: 0.8759


                                                                                           

Epoch [4/10], Loss: 0.1874, Accuracy: 0.9500, Val Loss: 0.4614, Val Accuracy: 0.8824


                                                                                            

Epoch [5/10], Loss: 0.1266, Accuracy: 0.9650, Val Loss: 0.3313, Val Accuracy: 0.9202


                                                                                           

Epoch [6/10], Loss: 0.1386, Accuracy: 0.9619, Val Loss: 0.4811, Val Accuracy: 0.8901


                                                                                            

Epoch [7/10], Loss: 0.1024, Accuracy: 0.9739, Val Loss: 0.4315, Val Accuracy: 0.8972


                                                                                            

Epoch [8/10], Loss: 0.0194, Accuracy: 0.9948, Val Loss: 0.2620, Val Accuracy: 0.9404


                                                                                             

Epoch [9/10], Loss: 0.0019, Accuracy: 0.9995, Val Loss: 0.2620, Val Accuracy: 0.9420


                                                                                              

Epoch [10/10], Loss: 0.0015, Accuracy: 0.9995, Val Loss: 0.2626, Val Accuracy: 0.9420


In [10]:
y_true = all_labels
y_pred = all_preds
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score

# Calculating the metrics
accuracy = accuracy_score(y_true, y_pred)
precision = precision_score(y_true, y_pred, average='weighted')
uar = recall_score(y_true, y_pred, average='macro')
f1 = f1_score(y_true, y_pred, average='weighted')

print(f"Accuracy: {accuracy:.4f}")
print(f"Overall Precision: {precision:.4f}")
print(f"Unweighted Average Recall (UAR): {uar:.4f}")
print(f"Overall F1-Score: {f1:.4f}")


Accuracy: 0.9420
Overall Precision: 0.9457
Unweighted Average Recall (UAR): 0.9243
Overall F1-Score: 0.9415


In [12]:
model.eval()
all_preds = []
all_labels = []
with torch.no_grad():
    for images, labels in val_loader:
        images, labels = images.to(device), labels.to(device)
        outputs = model(images)
        _, predicted = torch.max(outputs.data, 1)
        all_preds.extend(predicted.cpu().numpy())
        all_labels.extend(labels.cpu().numpy())

# Classification report
print("Classification Report:")
print(classification_report(all_labels, all_preds, target_names=dataset.classes))

Classification Report:
                   precision    recall  f1-score   support

BACKGROUND_Google       0.82      0.81      0.82       102
            Faces       0.99      1.00      0.99        95
       Faces_easy       1.00      0.99      0.99        86
         Leopards       1.00      1.00      1.00        30
       Motorbikes       1.00      1.00      1.00       162
        accordion       1.00      1.00      1.00         9
        airplanes       0.98      1.00      0.99       161
           anchor       1.00      0.67      0.80        12
              ant       0.90      0.90      0.90        10
           barrel       0.89      1.00      0.94         8
             bass       1.00      0.92      0.96        13
           beaver       0.75      0.82      0.78        11
        binocular       1.00      1.00      1.00         3
           bonsai       0.98      1.00      0.99        40
            brain       0.90      0.95      0.92        19
     brontosaurus       0.67    

In [13]:
# Save the trained model
torch.save(model.state_dict(), 'davit_caltech101.pth')