<a href="https://colab.research.google.com/github/changyuhsin1999/WBC-Differential-Learning-Tool/blob/main/SVM.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
import os

# Remove Colab default sample_data
!rm -r ./sample_data

# Clone GitHub files to colab workspace
repo_name = "WBC-Differential-Learning-Tool" # Enter repo name
git_path = 'https://github.com/changyuhsin1999/WBC-Differential-Learning-Tool.git'
!git clone "{git_path}"
data_file = "/content/WBC-Differential-Learning-Tool/data"

rm: cannot remove './sample_data': No such file or directory
fatal: destination path 'WBC-Differential-Learning-Tool' already exists and is not an empty directory.


In [28]:
import torch
import torch.nn as nn
import torch.optim as optim
import random
import numpy as np
from PIL import Image
import torchvision
from torchvision import datasets, transforms

In [29]:
def define_transforms():
    """
    Define transformations for training, validation, and test data.
    For training data we will do resize to 224 * 224, randomized horizontal flipping, rotation, lighting effects, and normalization. 
    For test and val set we will do only center cropping to get to 224 * 224 and normalization
    """

    data_transforms = transforms.Compose([
            transforms.Resize(224),
            transforms.ToTensor(),
            transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
        ])

    return data_transforms

In [39]:
train_percentage = 0.6
val_percentage = 0.15
test_percentage = 0.25

batch_size = 8
num_workers = 2

input_size = 1209600
learning_rate = 0.0001
momentum = 0.1
num_epochs=50

In [31]:
def create_datasets(data_dir, train_percentage, val_percentage):
    """
    Create datasets for training, validation, and test

    Args:
        data_dir (str): path to data directory
        train_percentage (float): percentage of data to use for training
        val_percentage (float): percentage of data to use for validation

    Returns:
        train_dataset (torchvision.datasets.ImageFolder): training dataset
        val_dataset (torchvision.datasets.ImageFolder): validation dataset
        test_dataset (torchvision.datasets.ImageFolder): test dataset
        class_names (list): list of class names
        num_classes (int): number of classes
    """
    ## Define transformations for training, validation, and test data
    data_transforms = define_transforms()

    ## Create Datasets for training, testing and validation sets
    image_dataset = torchvision.datasets.ImageFolder(root=data_file, transform=data_transforms)
    train_size = int(train_percentage * len(image_dataset))
    val_size = int(val_percentage * len(image_dataset))
    test_size = len(image_dataset) - train_size - val_size

    train_dataset, val_dataset, test_dataset = torch.utils.data.random_split(image_dataset, [train_size, val_size, test_size])

    ## get class names associated with labels
    class_names = image_dataset.classes
    num_classes = len(class_names)

    return train_dataset, val_dataset, test_dataset, class_names, num_classes

In [32]:
def create_dataloaders(train_dataset, val_dataset, test_dataset, batch_size, num_workers=2):
    """
    Create dataloaders for training and validation and testing sets

    Args:
        train_dataset (torchvision.datasets.ImageFolder): training dataset
        val_dataset (torchvision.datasets.ImageFolder): validation dataset
        test_dataset (torchvision.datasets.ImageFolder): test dataset
        batch_size (int): batch size
        num_workers (int): number of workers to use for dataloader

    Returns:
        dataloaders (dict): dictionary of dataloaders for training and validation sets
        dataset_sizes (dict): dictionary of sizes of training and validation sets
    """
     
    ## Create DataLoaders for training, testing and validation sets
    train_loader = torch.utils.data.DataLoader(train_dataset, batch_size=batch_size, 
                                            shuffle=True, num_workers=num_workers)
    val_loader = torch.utils.data.DataLoader(val_dataset, batch_size=batch_size, 
                                            shuffle=False, num_workers=num_workers)
    test_loader = torch.utils.data.DataLoader(test_dataset, batch_size=batch_size, 
                                            shuffle=True, num_workers=num_workers)

    ## Set up dict for dataloaders
    dataloaders = {'train':train_loader, 'val':val_loader, 'test': test_loader}

    ## Store size of training and validation sets
    dataset_sizes = {'train': len(train_dataset), 'val': len(val_dataset), 'test': len(test_dataset)}

    return dataloaders, dataset_sizes

In [33]:
train_dataset, val_dataset, test_dataset, class_names, num_classes = create_datasets(data_file, train_percentage, val_percentage)
dataloaders, dataset_sizes = create_dataloaders(train_dataset, val_dataset, test_dataset, batch_size, num_workers)

In [34]:
def train_model(model, input_size, criterion, optimizer, dataloaders, batch_size, device="cpu", num_epochs=1):
    """
    Train the model using transfer learning
    Args:
        model (torchvision.models): model to train
        input_size (int): input size of the model
        criterion (torch.nn.modules.loss): loss function
        optimizer (torch.optim): optimizer
        dataloaders (dict): dictionary of dataloaders for training and validation sets
        device (torch.device): device to train on
        num_epochs (int): number of epochs to train for
    Returns:
        model (torchvision.models): trained model
    """
    ## Load the model to GPU if available
    model = model.to(device)

    ## Train the model
    for epoch in range(num_epochs):
        avg_loss_epoch = 0
        batch_loss = 0
        total_batches = 0

        for images, labels in dataloaders["train"]:
            images = images.to(device)
            labels = labels.to(device)
            images = images.reshape(-1, input_size)
            optimizer.zero_grad()
            
            ## Forward pass        
            outputs = model(images)           
            loss_svm = criterion(outputs, labels, batch_size)    
            
            ## Backward and optimize
            loss_svm.backward()
            optimizer.step()    
            total_batches += 1     
            batch_loss += loss_svm.item()

        ## Print loss every few iterations
        avg_loss_epoch = batch_loss/total_batches
        print ('Epoch [{}/{}], Averge Loss:for epoch {}: {:.4f}]'.format(epoch+1, num_epochs, epoch+1, avg_loss_epoch))
    return model


In [35]:
def test_model(model, test_dataloader, device, input_size):
    """
    Test the trained model performance on test dataset
    Args:
        model (torchvision.models): model to train
        test_dataloader (torch.utils.data.DataLoader): test dataloader
    Returns:
        model (torchvision.models): trained model
    """
    ## Load the model to GPU if available
    model = model.to(device)

    ## Set model to evaluate mode
    model.eval()

    correct = 0.
    total = 0.

    ## Iterate through test dataset
    for images, labels in test_dataloader:
        images = images.to(device)
        labels = labels.to(device)
        ## Reshape images
        images = images.reshape(-1, input_size)
        
        ## Forward pass
        outputs = model(images) 
        
        ## Get predictions
        predicted = torch.argmax(outputs, axis=1)

        ## Calculate accuracy
        total += labels.size(0) 
        correct += (predicted == labels).sum()    

    print('Accuracy of the SVM model on the val images: %f %%' % (100 * (correct.float() / total)))

In [37]:
## Torch parameters being used
TORCH_VERSION = ".".join(torch.__version__.split(".")[:2])
CUDA_VERSION = torch.__version__.split("+")[-1]
print("torch: ", TORCH_VERSION, "; cuda: ", CUDA_VERSION)

np.random.seed(0)
random.seed(0)
torch.manual_seed(0)

class SVM_Loss(torch.nn.modules.Module):
    """
    SVM Loss function
    """    
    def __init__(self):
        """
        Initialize the SVM Loss function
        """
        super(SVM_Loss,self).__init__()

    def forward(self, outputs, labels, batch_size):
        """
        Forward pass of the SVM Loss function
        """
        return torch.sum(torch.clamp(1 - outputs.t()*labels, min=0))/batch_size

torch:  2.0 ; cuda:  cu118


In [40]:
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
svm_model = nn.Linear(input_size,num_classes)

  ## Loss and optimizer
svm_loss_criteria = SVM_Loss()
svm_optimizer = torch.optim.SGD(svm_model.parameters(), lr=learning_rate, momentum=momentum)
total_step = len(dataloaders["train"])

    ## Train model
model = train_model(svm_model, input_size, svm_loss_criteria, svm_optimizer, dataloaders, batch_size, device, num_epochs)

Epoch [1/50], Averge Loss:for epoch 1: 1.0993]
Epoch [2/50], Averge Loss:for epoch 2: 0.8413]
Epoch [3/50], Averge Loss:for epoch 3: 0.8413]
Epoch [4/50], Averge Loss:for epoch 4: 0.8413]
Epoch [5/50], Averge Loss:for epoch 5: 0.8413]
Epoch [6/50], Averge Loss:for epoch 6: 0.8413]
Epoch [7/50], Averge Loss:for epoch 7: 0.8413]
Epoch [8/50], Averge Loss:for epoch 8: 0.8413]
Epoch [9/50], Averge Loss:for epoch 9: 0.8413]
Epoch [10/50], Averge Loss:for epoch 10: 0.8413]
Epoch [11/50], Averge Loss:for epoch 11: 0.8413]
Epoch [12/50], Averge Loss:for epoch 12: 0.8413]
Epoch [13/50], Averge Loss:for epoch 13: 0.8413]
Epoch [14/50], Averge Loss:for epoch 14: 0.8413]
Epoch [15/50], Averge Loss:for epoch 15: 0.8413]
Epoch [16/50], Averge Loss:for epoch 16: 0.8413]
Epoch [17/50], Averge Loss:for epoch 17: 0.8413]
Epoch [18/50], Averge Loss:for epoch 18: 0.8413]
Epoch [19/50], Averge Loss:for epoch 19: 0.8413]
Epoch [20/50], Averge Loss:for epoch 20: 0.8413]
Epoch [21/50], Averge Loss:for epoch 2

In [41]:
model_dir = '/content/WBC-Differential-Learning-Tool/models'
filename = 'SVM_SGD.pt'

# Save the entire model
torch.save(model, os.path.join(model_dir,filename))

In [44]:
model = torch.load("/content/WBC-Differential-Learning-Tool/models/SVM_SGD.pt")
test_model(model, dataloaders["test"], device, input_size)