In [1]:
import copy
import os
import time

import numpy as np
import matplotlib.pyplot as plt

import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
import torchvision
import torchvision.transforms as transforms
import torchvision.models as models
from torch.optim import lr_scheduler
from torch.utils.data import DataLoader
from torch.utils.tensorboard import SummaryWriter

import timm

In [2]:
# training dataset path
training_dataset_path = './train/'

# Data Preprocessing 

In [3]:
dataset = torchvision.datasets.ImageFolder(
    root=training_dataset_path, 
    transform=transforms.Compose([transforms.ToTensor()])
)

data_loader = torch.utils.data.DataLoader(
    dataset=dataset, 
    batch_size=32, 
    shuffle=True
)

In [4]:
# Check the first batch from the DataLoader
images, _ = next(iter(data_loader))
class_names = dataset.classes
dataset_size = len(dataset)

# Print class names
print(f'Classes: {class_names}')

# Print shape details
print(f"Batch Size: {images.shape[0]}")
print(f"Channels: {images.shape[1]}")
print(f"Height of Image: {images.shape[2]}")
print(f"Width of Image: {images.shape[3]}")  

# Count total images in dataset
dataset_size = len(dataset)

print(f"Total Images: {dataset_size}")

Classes: ['cordana', 'healthy', 'pestalotiopsis', 'sigatoka']
Batch Size: 32
Channels: 3
Height of Image: 224
Width of Image: 224
Total Images: 843


# Data Transformation

In [5]:
# use mean and std based on pretrained weights of pretrained model
# ImageNet Statistics
mean = np.array([0.485, 0.456, 0.406])
std = np.array([0.229, 0.224, 0.225])

In [6]:
# no need to resize! shape is already 224 x 224 px
train_transform = transforms.Compose([
    # convert images to pytorch tensors
    transforms.ToTensor(), 
    # randomly flips images for augmentation
    transforms.RandomHorizontalFlip(),  
    # normalize images based on ImageNet mean and std
    transforms.Normalize(mean, std) 
])

train_dataset = torchvision.datasets.ImageFolder(root=training_dataset_path, transform=train_transform)

In [7]:
train_loader = torch.utils.data.DataLoader(
    dataset=train_dataset, 
    batch_size=32, 
    shuffle=True
)
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(f"Using {device} device")

Using cpu device


# Setup Pretrained Model

In [8]:
num_classes = 4 # healthy, cordona, pestalotiopsis, sigatoka
swin_model = timm.create_model('swin_tiny_patch4_window7_224', pretrained=True, num_classes = 4, global_pool='avg')
swin_model = swin_model.to(device)

loss_function = nn.CrossEntropyLoss()

# SGD = Stochastic Gradient Descent
# lr = learning rate (test values from [0.001, 0.01] or experiment others)
optimizer = optim.SGD(
    swin_model.parameters(), 
    lr=0.001, 
    momentum=0.9, 
)

# Decay LR by a factor of 0.1 every 10 epochs
step_lr_scheduler = lr_scheduler.StepLR(optimizer, step_size=10, gamma=0.1)

num_epochs = 30

writer = SummaryWriter('VIT_transfer_learning/runs/swin_tiny')

# Train Model

In [9]:
def train_model(model, train_loader, scheduler, device, criterion, optimizer, num_epochs):
    since = time.time()
    best_model_wts = copy.deepcopy(model.state_dict())
    best_acc = 0.0
    step = 0

    for epoch in range(num_epochs):
        model.train()
        running_loss = 0.0
        running_corrects = 0

        for images, labels in train_loader:
            images, labels = images.to(device), labels.to(device)

            outputs = model(images)
            _, predicted = torch.max(outputs, 1)
            loss = criterion(outputs, labels)

            optimizer.zero_grad()
            loss.backward()
            optimizer.step()

            writer.add_scalar("Batch Loss", loss.item(), global_step=step)
            acc = (predicted == labels).sum().item() / labels.size(0)
            writer.add_scalar("Batch Accuracy", acc, global_step=step)
            step += 1

            running_loss += loss.item() * images.size(0)
            running_corrects += (predicted == labels).sum().item()
        
        scheduler.step()

        epoch_loss = running_loss / dataset_size
        epoch_accuracy = 100 * running_corrects / dataset_size
        writer.add_scalar("Epoch Training Loss", epoch_loss, epoch)
        writer.add_scalar("Epoch Training Accuracy", epoch_accuracy, epoch)

        for name, param in model.named_parameters():
            writer.add_histogram(f"Weights/{name}", param, epoch)
            if param.grad is not None:
                writer.add_histogram(f"Gradients/{name}", param.grad, epoch)

        print(f'Epoch [{epoch+1}/{num_epochs}], '
            f'Train Loss: {epoch_loss:.4f}, Train Accuracy: {epoch_accuracy:.2f}%')

        if epoch_accuracy > best_acc:
            best_acc = epoch_accuracy
            best_model_wts = copy.deepcopy(model.state_dict())

    time_elapsed = time.time() - since
    print(f'Training complete in {time_elapsed // 60:.0f}m {time_elapsed % 60:.0f}s')
    model.load_state_dict(best_model_wts)
    return model

# Save Model

In [10]:
best_model = train_model(
    swin_model, 
    train_loader, 
    step_lr_scheduler, 
    device, 
    loss_function, 
    optimizer, 
    num_epochs
)

# Change path to new model save file
swin_trained_model = "VIT_transfer_learning/models/swin_tiny_1.pth"
torch.save(best_model.state_dict(), swin_trained_model)



Epoch [1/30], Train Loss: 0.9224, Train Accuracy: 60.26%
Epoch [2/30], Train Loss: 0.2376, Train Accuracy: 95.02%
Epoch [3/30], Train Loss: 0.0688, Train Accuracy: 98.58%
Epoch [4/30], Train Loss: 0.0289, Train Accuracy: 99.53%
Epoch [5/30], Train Loss: 0.0153, Train Accuracy: 100.00%
Epoch [6/30], Train Loss: 0.0164, Train Accuracy: 99.88%
Epoch [7/30], Train Loss: 0.0118, Train Accuracy: 99.88%
Epoch [8/30], Train Loss: 0.0082, Train Accuracy: 100.00%
Epoch [9/30], Train Loss: 0.0066, Train Accuracy: 99.88%
Epoch [10/30], Train Loss: 0.0055, Train Accuracy: 99.88%
Epoch [11/30], Train Loss: 0.0052, Train Accuracy: 100.00%
Epoch [12/30], Train Loss: 0.0029, Train Accuracy: 100.00%
Epoch [13/30], Train Loss: 0.0053, Train Accuracy: 99.88%
Epoch [14/30], Train Loss: 0.0041, Train Accuracy: 100.00%
Epoch [15/30], Train Loss: 0.0029, Train Accuracy: 100.00%
Epoch [16/30], Train Loss: 0.0053, Train Accuracy: 100.00%
Epoch [17/30], Train Loss: 0.0032, Train Accuracy: 100.00%
Epoch [18/30], 

# Evaluate Model

In [11]:
def evaluate_model(model, test_loader, device, criterion):
    since = time.time()
    model.eval()
    running_loss = 0.0
    running_corrects = 0

    with torch.no_grad():
        for images, labels in test_loader:
            images, labels = images.to(device), labels.to(device)

            outputs = model(images)
            _, predicted = torch.max(outputs.data, 1)
            loss = criterion(outputs, labels)

            running_loss += loss.item() * images.size(0)
            running_corrects += (labels == predicted).sum().item()

    epoch_loss = running_loss / test_dataset_size
    epoch_accuracy = 100 * running_corrects / test_dataset_size

    print(f"Eval Loss: {epoch_loss:.4f}, Accuracy: {epoch_accuracy:.2f}%")
    print(f'Got {running_corrects} out of {test_dataset_size} images correctly')
    
    time_elapsed = time.time() - since
    print(f'Evaluation complete in {time_elapsed // 60:.0f}m {time_elapsed % 60:.0f}s')
    return epoch_loss, epoch_accuracy

# Practice Test Dataset from Kaggle

In [15]:
test_dataset_path = './prac_test/'

mean = np.array([0.485, 0.456, 0.406])
std = np.array([0.229, 0.224, 0.225])

test_transform = transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize(mean, std)
])

test_dataset = torchvision.datasets.ImageFolder(
    root=test_dataset_path, 
    transform=test_transform
)

test_loader = torch.utils.data.DataLoader(  
    dataset=test_dataset, 
    batch_size=32, 
    shuffle=True
)

num_classes = 4 # healthy, cordona, pestalotiopsis, sigatoka
swin_model = timm.create_model('swin_tiny_patch4_window7_224', pretrained=False, num_classes = 4, global_pool='avg')
swin_model_trained = "VIT_transfer_learning/models/swin_tiny_1.pth"
state_dict = torch.load(swin_model_trained)
swin_model.load_state_dict(state_dict)

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(f"Using {device} device")
test_dataset_size = len(test_dataset)
loss_function = nn.CrossEntropyLoss()
loss, accuracy = evaluate_model(swin_model, test_loader, device, loss_function)


Using cpu device




Eval Loss: 0.0300, Accuracy: 99.33%
Got 2520 out of 2537 images correctly
Evaluation complete in 3m 51s


# Run TensorBoard

Download models and runs here https://drive.google.com/drive/folders/1vO8MprVQdztSgorqvVyTRAQZL_XACgEh?usp=drive_link

Run in terminal
1. `tensorboard-env\Scripts\activate`
2. `tensorboard --logdir=CNN_transfer_learning\runs` If it doesnt work, install first tensorboard within env
3. Open at http://localhost:6006/