In [2]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
# for dirname, _, filenames in os.walk('/kaggle/input'):
#     for filename in filenames:
#         print(os.path.join(dirname, filename))

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

In [1]:
!pip install timm

In [69]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from skimage import io

import torch
import torch.nn as nn
import torchvision.transforms as transforms

import timm

import gc
import os
import time
import random
from datetime import datetime
import shutil

from PIL import Image
from tqdm.notebook import tqdm
from sklearn import model_selection, metrics
from shutil import copyfile

In [3]:
def seed_everything(seed):
    """
    Seeds basic parameters for reproductibility of results
    
    Arguments:
        seed {int} -- Number of the seed
    """
    random.seed(seed)
    os.environ["PYTHONHASHSEED"] = str(seed)
    np.random.seed(seed)
    torch.manual_seed(seed)
    torch.cuda.manual_seed(seed)
    torch.backends.cudnn.deterministic = True
    torch.backends.cudnn.benchmark = False

seed_everything(111)

In [4]:
# general global variables
DATA_PATH = "/kaggle/input/pollen-dataset/Pollen_data"
IMAGES_PATH = os.path.join(DATA_PATH, "images")

In [5]:
df = pd.read_csv(os.path.join(DATA_PATH, "data.csv"))
df.head()

In [6]:
df.info()

In [7]:
df.label.value_counts().plot(kind="bar")

In [8]:
# Here for splitting the data into train, test and validation. We will using train_test_split from sklearn

# First divide the data into train data (80%) and remaining data(20%)
# Second divide the remaining data into validation (10%) and test data(10%)
train_df, remaining_df = model_selection.train_test_split(df, test_size=0.2, random_state=42, stratify=df.label.values)

In [9]:
# Training data distribution
train_df.label.value_counts().plot(kind="bar")
plt.title("Training data distribution")

In [10]:
train_df.label.value_counts()

In [11]:
remaining_df.label.value_counts().plot(kind="bar")
plt.title("Except training data distribution")

In [12]:
# Dividing remaining data into validation and test set
valid_df, test_df = model_selection.train_test_split(remaining_df, test_size=0.5, random_state=42, stratify=remaining_df.label.values)

In [13]:
valid_df.label.value_counts()

In [14]:
valid_df.label.value_counts().plot(kind="bar")
plt.title("Validation data distribution")

In [15]:
test_df.label.value_counts().plot(kind="bar")
plt.title("Test data distribution")

In [16]:
from torch.utils.data import DataLoader
class PollenDataset(torch.utils.data.Dataset):
    """
    Helper Class to create the pytorch dataset
    """

    def __init__(self, df, data_path=DATA_PATH, transforms=None):
        super().__init__()
        self.df_data = df.values
        self.data_path = data_path
        self.transforms = transforms
        self.data_dir = "images"

    def __len__(self):
        return len(self.df_data)

    def __getitem__(self, index):
        img_name, label = self.df_data[index]
        img_path = os.path.join(self.data_path, self.data_dir, img_name)
        img = Image.open(img_path).convert("RGB")
        label = np.asarray(label, dtype='int64')

        if self.transforms is not None:
            image = self.transforms(img)

        return image, torch.from_numpy(label)

In [17]:
IMG_SIZE = 224
BATCH_SIZE = 16

In [131]:
# create image augmentations

transforms_train = transforms.Compose(
    [
        transforms.Resize((IMG_SIZE, IMG_SIZE)),
        transforms.RandomVerticalFlip(p=0.3),
        transforms.RandomHorizontalFlip(p=0.3),
        transforms.RandomResizedCrop(IMG_SIZE),
        transforms.ToTensor(),
        transforms.Normalize((0.485, 0.456, 0.406), (0.229, 0.224, 0.225)),
    ]
)

transforms_valid = transforms.Compose(
    [
        transforms.Resize((IMG_SIZE, IMG_SIZE)),
        transforms.ToTensor(),
        transforms.Normalize((0.485, 0.456, 0.406), (0.229, 0.224, 0.225)),
    ]
)

In [132]:
# Train and validation dataset with transformations
train_dataset = PollenDataset(train_df, transforms=transforms_train)
valid_dataset = PollenDataset(valid_df, transforms=transforms_valid)

In [133]:
# Train and validation loader 
train_loader = DataLoader(
     dataset=train_dataset,
     batch_size=BATCH_SIZE,
     shuffle = True
     )
valid_loader = DataLoader(
    dataset=valid_dataset,
    batch_size=BATCH_SIZE,
    shuffle=False
    )

In [134]:
train_sample = iter(train_loader)
data, label = train_sample.next()
print(data.shape)
print(data.max())
print(data.min())

In [135]:
data.dtype

In [136]:
print("Available Vision Transformer Models: ")
timm.list_models("vit*")

### Will be using _patch16_224_ and _patch32_224_ vit models for training and testing

In [137]:
# VIT model with patch16
class ViTBase16(nn.Module):
    def __init__(self, n_classes, pretrained=False):

        super(ViTBase16, self).__init__()
        
        self.model = timm.create_model("vit_base_patch16_224", pretrained)
        self.model.head = nn.Linear(self.model.head.in_features, n_classes) # Classification head

    def forward(self, x):
        x = self.model(x)
        return x  

# VIT model with patch32
class ViTBase32(nn.Module):
    def __init__(self, n_classes, pretrained=False):

        super(ViTBase32, self).__init__()
        
        self.model = timm.create_model("vit_base_patch32_224", pretrained)
        self.model.head = nn.Linear(self.model.head.in_features, n_classes) # Classification head

    def forward(self, x):
        x = self.model(x)
        return x

In [138]:
model = ViTBase16(n_classes=4, pretrained=True)

In [139]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(device)

In [140]:
!pip install torchsummary

In [141]:
from torchsummary import summary
model = model.to(device)
summary(model, (3, 224, 224))

In [142]:
total_params = sum(p.numel() for p in model.parameters() if p.requires_grad)
print("total trainable parameters = {} Million".format(str(round(total_params/1000000))))

In [143]:
# Checking the model performance with out any training, on training set, so that we can 
# Conclude Later during that the model is atleast converging later on
baseline_train_loss = 0.0
baseline_train_accuracy = 0.0
criterion = nn.CrossEntropyLoss()
with torch.no_grad():
    model.eval()
    for data, target in train_loader:
        data, target = data.to(device, dtype=torch.float32), target.to(device, dtype=torch.int64)
        output = model(data)
        loss = criterion(output, target)
        accuracy = (output.argmax(dim=1) == target).float().mean()
        baseline_train_loss += loss
        baseline_train_accuracy += accuracy

In [144]:
print("Total Training batches = {}".format(len(train_loader)))
print("Baseline Training Data Loss = {}".format(baseline_train_loss/len(train_loader)))
print("Baseline Training Data Accuracy = {} %".format(100*baseline_train_accuracy/len(train_loader)))

In [145]:
baseline_valid_loss = 0.0
baseline_valid_accuracy = 0.0
with torch.no_grad():
    model.eval()
    for data, target in valid_loader:
        data, target = data.to(device, dtype=torch.float32), target.to(device, dtype=torch.int64)
        output = model(data)
        loss = criterion(output, target)
        accuracy = (output.argmax(dim=1) == target).float().mean()
        baseline_valid_loss += loss
        baseline_valid_accuracy += accuracy

In [146]:
print("Total Validation batches = {}".format(len(valid_loader)))
print("Baseline Validation Data Loss = {}".format(baseline_valid_loss/len(valid_loader)))
print("Baseline Validation Data Accuracy = {} %".format(100*baseline_valid_accuracy/len(valid_loader)))

## Freezing all the layers

In [147]:
for param in model.parameters():
    param.requires_grad = False

In [148]:
total_params = sum(p.numel() for p in model.parameters() if p.requires_grad)
print("total trainable parameters = {} Million".format(str(round(total_params/1000000))))

In [149]:
model.model

In [150]:
def unfreeze_blocks(model, blocks=[11]): # default unfreeze the 11th block only
    for i in blocks:
        for param in model.model.blocks[i].parameters():
            param.requires_grad = True
    return model

In [151]:
model = unfreeze_blocks(model, [10, 11])

In [152]:
total_params = sum(p.numel() for p in model.parameters() if p.requires_grad)
print("total trainable parameters = {} Million".format(str(round(total_params/1000000))))

In [153]:
def unfreeze_head(model):
    # Final MLP heads of the classifier, unfreezing
    for param in model.model.norm.parameters():
        param.requires_grad = True

    for param in model.model.pre_logits.parameters():
        param.requires_grad = True

    for param in model.model.head.parameters():
        param.requires_grad = True
    
    return model

In [154]:
model = unfreeze_head(model)
total_params = sum(p.numel() for p in model.parameters() if p.requires_grad)
print("total trainable parameters = {} Million".format(str(round(total_params/1000000))))

#### Unfreezing the head added 4612 extra parameters

In [155]:
def save_checkpoint(state, is_best, filename='checkpoint.pth.tar'):
    torch.save(state, filename)
    if is_best:
        print("Saving the best model !")
        shutil.copyfile(filename, 'model_best.pth.tar')

In [156]:
# Initializations 
LR = 0.001
epochs = 5
check_every = 100
criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(filter(lambda p: p.requires_grad, model.parameters()), lr=LR)
train_loss_array = []
train_acc_array = []
val_loss_array = []
val_acc_array = []
best_acc1 = 0

In [157]:
# Begin training
for epoch in range(epochs):
    model.train()
    epoch_loss = 0.0
    epoch_accuracy = 0.0
    i = 0
    for counter, (data, target) in enumerate(train_loader):
        i += 1
        data, target = data.to(device, dtype=torch.float32), target.to(device, dtype=torch.int64) # load data to device
        
        # clear the gradients of all optimizable variables
        optimizer.zero_grad()
        # compute outputs by passing input to the model
        output = model(data)
        # the batch loss
        loss = criterion(output, target)
        # backward pass: compute gradient of the loss with respect to model parameters
        loss.backward()
        # Calculating accuracy
        accuracy = (output.argmax(dim=1) == target).float().mean()
    
        # update training loss and accuracy
        epoch_loss += loss
        epoch_accuracy += accuracy
        optimizer.step()
        
        if i % check_every == 0:
            # keep track of validation loss
            valid_loss = 0.0
            valid_accuracy = 0.0
            with torch.no_grad():
                model.eval()
                for data, target in valid_loader:
                    data, target = data.to(device, dtype=torch.float32), target.to(device, dtype=torch.int64)
                    output = model(data)
                    loss = criterion(output, target)
                    accuracy = (output.argmax(dim=1) == target).float().mean()
                    # update average validation loss and accuracy
                    valid_loss += loss
                    valid_accuracy += accuracy
            
            # Score transfer to CPU
            valid_loss_cpu = valid_loss.cpu().detach().numpy() 
            valid_accuracy_cpu = valid_accuracy.cpu().detach().numpy() 
            epoch_loss_cpu = epoch_loss.cpu().detach().numpy() 
            epoch_accuracy_cpu = epoch_accuracy.cpu().detach().numpy() 
            
            val_loss_array.append(valid_loss_cpu/len(valid_loader)) 
            val_acc_array.append(valid_accuracy_cpu/len(valid_loader))
            train_loss_array.append(epoch_loss_cpu/(counter+1)) 
            train_acc_array.append(epoch_accuracy_cpu/(counter+1))
            print("[{} epoch {} batch] Train Loss : {:.3f} \t Train Accuracy : {:.3f} \t Valid loss : {:.3f} \t Valid Accuracy : {:.3f}".format(epoch+1, 
                                                                                                                                i, 
                                                                                                                                epoch_loss_cpu/(counter+1), 
                                                                                                                                epoch_accuracy_cpu/(counter+1), 
                                                                                                                                valid_loss_cpu / len(valid_loader), 
                                                                                                                                valid_accuracy_cpu / len(valid_loader)))
            acc1 = valid_accuracy/len(valid_loader)
            is_best = acc1 > best_acc1
            best_acc1 = max(acc1, best_acc1)
            save_checkpoint({
                'epoch': epoch + 1,
                'state_dict': model.state_dict(),
                'best_acc1': best_acc1,
                'optimizer' : optimizer.state_dict(),
            }, is_best)
        model.train()
    
#     print("Epoch : {} Train loss : {} \t Train Accuracy : {}".format(epoch+1, epoch_loss / len(train_loader), epoch_accuracy / len(train_loader)))
print("Finish Training!")

In [158]:
epoch_array = np.linspace(1, epochs, len(train_loss_array))
plt.plot(epoch_array, train_loss_array, label = "Train Loss")
plt.plot(epoch_array, val_loss_array, label = "Validation Loss")
plt.xlabel('Epoch')
plt.ylabel('Loss')
plt.title('Training and Validation Loss Curve')
plt.legend()
plt.show()

In [159]:
plt.plot(epoch_array, train_acc_array, label = "Train Accuracy")
plt.plot(epoch_array, val_acc_array, label = "Validation Accuracy")
plt.xlabel('Epoch')
plt.ylabel('Accuracy')
plt.title('Train and Validation Accuracy Curve')
plt.legend()
plt.show()

### Performance measure of best model

In [160]:
# Best validation accuracy Vit16_224 Model
PATH = 'model_best.pth.tar'
state = torch.load(PATH)

model = ViTBase16(n_classes=4, pretrained=False)
model.load_state_dict(state['state_dict'])
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print("Using {}".format(device))
model = model.to(device=device)

In [161]:
print("######### Generating confusion Matrix on Validation Set ########")
prediction = []
ground_truth = []
with torch.no_grad():
    model.eval()
    for data, target in valid_loader:
        data, target = data.to(device, dtype=torch.float32), target.to(device, dtype=torch.int64)
        output = model(data)
        prediction.extend(output.argmax(dim=1))
        ground_truth.extend(target)

In [162]:
prediction_array = [int(i.cpu().detach().numpy()) for i in prediction]
ground_truth_array = [int(i.cpu().detach().numpy()) for i in ground_truth]

In [163]:
from sklearn.metrics import classification_report, confusion_matrix
print(classification_report(ground_truth_array, prediction_array))
print(confusion_matrix(ground_truth_array, prediction_array))

In [174]:
import seaborn as sns
cm = confusion_matrix(ground_truth_array, prediction_array)
cmn = cm.astype('float') / cm.sum(axis=1)[:, np.newaxis]
fig, ax = plt.subplots(figsize=(10,10))
plt.title("Confusion Matrix of Patch16 model on validation set")
sns.heatmap(cmn, annot=True, fmt='.2f')
plt.ylabel('Actual')
plt.xlabel('Predicted')
plt.show(block=False)