# Introduction
> This notebook contains use of different pretrained models for document classification.

Following models were fine-tuned under different hyperparameters and the experiment was tracked using tensorboard:
* ResNet
* Inception v3
* Densenet
* VGG11_bn
* Squeezenet
* Alexnet


# Setting up working directory

In [None]:
%cd /content/drive/MyDrive/DocumentUpskilling

/content/drive/MyDrive/DocumentUpskilling


# Installing necessary dependencies

In [None]:
!pip install torchinfo

Collecting torchinfo
  Downloading torchinfo-1.8.0-py3-none-any.whl (23 kB)
Installing collected packages: torchinfo
Successfully installed torchinfo-1.8.0


# Importing Libraries

In [None]:
import matplotlib.pyplot as plt
import torch
import torchvision

from torch import nn
from torchvision import transforms
from torchinfo import summary
from document_classification import data_setup, engine

In [None]:
# Setup device agnostic code
device = "cuda" if torch.cuda.is_available() else "cpu"
device

'cuda'

In [None]:
from pathlib import Path

In [None]:
DATA_DIR = "/content/drive/MyDrive/DocumentUpskilling/dataset/documents"
data_path = Path(DATA_DIR)
train_dir = data_path / "train"
test_dir = data_path / "test"

train_dir, test_dir

(PosixPath('/content/drive/MyDrive/DocumentUpskilling/dataset/documents/train'),
 PosixPath('/content/drive/MyDrive/DocumentUpskilling/dataset/documents/test'))

In [None]:
# Get a set of pretrained model weights
weights = torchvision.models.EfficientNet_B0_Weights.DEFAULT
weights

EfficientNet_B0_Weights.IMAGENET1K_V1

# Data Augmentation

In [None]:
manual_transforms = transforms.Compose([
  transforms.Resize((224, 224)),
  transforms.RandomRotation(75),
  transforms.RandomHorizontalFlip(0.6),
  transforms.RandomVerticalFlip(0.6),
  transforms.RandomPerspective(0.4),
  transforms.ToTensor(),
  transforms.Normalize(mean=[0.485, 0.456, 0.406],
               std=[0.229, 0.224, 0.225]),
])

In [None]:
# creating train and test dataloader with data augmentation
train_dataloader, test_dataloader, class_names = data_setup.create_dataloaders(train_dir=train_dir,
                                                                              test_dir=test_dir,
                                                                               transform=manual_transforms,
                                                                               batch_size=4)
train_dataloader, test_dataloader, class_names

(<torch.utils.data.dataloader.DataLoader at 0x7b61c4d66bc0>,
 <torch.utils.data.dataloader.DataLoader at 0x7b61a442b370>,
 ['citizenship', 'license', 'others', 'passport'])

# Pre-trained Models

### Helper Functions

A function to create summary writer instance for logging into tensorboard.

In [None]:
from torch.utils.tensorboard import SummaryWriter
def create_writer(experiment_name: str,
                  model_name: str,
                  extra: str=None):
  from datetime import datetime
  import os

  timestamp = datetime.now().strftime("%Y-%m-%d")
  if extra:
    log_dir = os.path.join("runs", timestamp, experiment_name, model_name, extra)
  else:
     log_dir = os.path.join("runs", timestamp, experiment_name, model_name)

  print(f"[INFO] Created SummaryWriter, saving to: {log_dir}......")
  return SummaryWriter(log_dir=log_dir)

A function to create data loaders according to the model selection.

In [None]:
def train_test_dataloader(input_size):
  custom_transforms = transforms.Compose([
        transforms.RandomResizedCrop(input_size),
        transforms.RandomHorizontalFlip(),
        transforms.ToTensor(),
        transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
    ])
  print("[INFO] Preparing Data Loaders")
  train_dataloader, test_dataloader, class_names = data_setup.create_dataloaders(train_dir=train_dir,
                                                                              test_dir=test_dir,
                                                                               transform=custom_transforms,
                                                                               batch_size=4)
  return train_dataloader, test_dataloader

Initialize different pre-trained models

In [None]:
from torchvision import models

def set_parameter_requires_grad(model, feature_extracting):
    if feature_extracting:
        for param in model.parameters():
            param.requires_grad = False

def initialize_model(model_name, num_classes, feature_extract, use_pretrained=True):
    # Initialize these variables which will be set in this if statement. Each of these
    #   variables is model specific.
    model_ft = None
    input_size = 0

    print("[INFO] Initializing Model")

    if model_name == "resnet":
        """ Resnet18
        """
        model_ft = models.resnet18(pretrained=use_pretrained)
        set_parameter_requires_grad(model_ft, feature_extract)
        num_ftrs = model_ft.fc.in_features
        model_ft.fc = nn.Linear(num_ftrs, num_classes)
        input_size = 224


        train_dataloader, test_dataloader = train_test_dataloader(input_size)

    elif model_name == "alexnet":
        """ Alexnet
        """
        model_ft = models.alexnet(pretrained=use_pretrained)
        set_parameter_requires_grad(model_ft, feature_extract)
        num_ftrs = model_ft.classifier[6].in_features
        model_ft.classifier[6] = nn.Linear(num_ftrs,num_classes)
        input_size = 224

        train_dataloader, test_dataloader= train_test_dataloader(input_size)


    elif model_name == "vgg":
        """ VGG11_bn
        """
        model_ft = models.vgg11_bn(pretrained=use_pretrained)
        set_parameter_requires_grad(model_ft, feature_extract)
        num_ftrs = model_ft.classifier[6].in_features
        model_ft.classifier[6] = nn.Linear(num_ftrs,num_classes)
        input_size = 224

        train_dataloader, test_dataloader = train_test_dataloader(input_size)

    elif model_name == "squeezenet":
        """ Squeezenet
        """
        model_ft = models.squeezenet1_0(pretrained=use_pretrained)
        set_parameter_requires_grad(model_ft, feature_extract)
        model_ft.classifier[1] = nn.Conv2d(512, num_classes, kernel_size=(1,1), stride=(1,1))
        model_ft.num_classes = num_classes
        input_size = 224

        train_dataloader, test_dataloader = train_test_dataloader(input_size)

    elif model_name == "densenet":
        """ Densenet
        """
        model_ft = models.densenet121(pretrained=use_pretrained)
        set_parameter_requires_grad(model_ft, feature_extract)
        num_ftrs = model_ft.classifier.in_features
        model_ft.classifier = nn.Linear(num_ftrs, num_classes)
        input_size = 224

        train_dataloader, test_dataloader = train_test_dataloader(input_size)

    elif model_name == "inception":
        """ Inception v3
        Be careful, expects (299,299) sized images and has auxiliary output
        """
        model_ft = models.inception_v3(pretrained=use_pretrained)
        set_parameter_requires_grad(model_ft, feature_extract)
        # Handle the auxilary net
        num_ftrs = model_ft.AuxLogits.fc.in_features
        model_ft.AuxLogits.fc = nn.Linear(num_ftrs, num_classes)
        # Handle the primary net
        num_ftrs = model_ft.fc.in_features
        model_ft.fc = nn.Linear(num_ftrs,num_classes)
        input_size = 299

        train_dataloader, test_dataloader = train_test_dataloader(input_size)

    else:
        print("Invalid model name, exiting...")
        exit()

    return model_ft, input_size, train_dataloader, test_dataloader

In [None]:
def final_train(model_name, epochs, train_dataloader, test_dataloader, optimizer, loss_fn):

  torch.manual_seed(42)
  torch.cuda.manual_seed(42)
  # Start the timer
  from timeit import default_timer as timer
  start_time = timer()

  # create writer
  writer = create_writer("doc_classification",model_name, f"{epochs} epochs")
  # train and save the results
  results = engine.train(model=model_name,
                        train_dataloader=train_dataloader,
                        test_dataloader=test_dataloader,
                        optimizer=optimizer,
                        loss_fn=loss_fn,
                        epochs=epochs,
                        writer=writer,
                        track_experiment=True,
                        device=device)

  end_time = timer()
  print(f"[INFO] Total training time :{end_time-start_time:.3f} seconds")

In [None]:
model_list = ["resnet", "alexnet", "vgg", "squeezenet", "inception", "densenet"]

In [None]:
# Initialize the model for this run
import torch.optim as optim
loss_fn = nn.CrossEntropyLoss()
feature_extract = True
for model_name in model_list:
  model_ft, input_size, train_dataloader, test_dataloader = initialize_model(model_name=model_name, num_classes=4, feature_extract=True)
  # Print the model we just instantiated
  params_to_update = model_ft.parameters()
  print("Params to learn:")
  if feature_extract:
      params_to_update = []
      for name,param in model_ft.named_parameters():
          if param.requires_grad == True:
              params_to_update.append(param)
              print("\t",name)
  else:
      for name,param in model_ft.named_parameters():
          if param.requires_grad == True:
              print("\t",name)

  optimizer_ft = optim.SGD(params_to_update, lr=0.001, momentum=0.9)
  print(f"Training {model_name}")
  print("-"*10)
  final_train(model_ft, epochs=25, train_dataloader=train_dataloader, test_dataloader=test_dataloader,optimizer=optimizer_ft, loss_fn)

[INFO] Initializing Model




[INFO] Preparing Data Loaders
Params to learn:
	 fc.weight
	 fc.bias
Training resnet
----------
[INFO] Created SummaryWriter, saving to: runs/2024-01-16/doc_classification/resnet/25 epochs......


  0%|          | 0/25 [00:00<?, ?it/s]

Epoch: 1 | train_loss: 1.6616 | train_acc: 0.1167 | test_loss: 1.5312 | test_acc: 0.2500
Epoch: 2 | train_loss: 1.5228 | train_acc: 0.2667 | test_loss: 1.5527 | test_acc: 0.1875
Epoch: 3 | train_loss: 1.5867 | train_acc: 0.1333 | test_loss: 1.3314 | test_acc: 0.2500
Epoch: 4 | train_loss: 1.5711 | train_acc: 0.2500 | test_loss: 1.6756 | test_acc: 0.2500
Epoch: 5 | train_loss: 1.5817 | train_acc: 0.2333 | test_loss: 1.5746 | test_acc: 0.3125
Epoch: 6 | train_loss: 1.6636 | train_acc: 0.1833 | test_loss: 1.5089 | test_acc: 0.2500
Epoch: 7 | train_loss: 1.5236 | train_acc: 0.2167 | test_loss: 1.4774 | test_acc: 0.2500
Epoch: 8 | train_loss: 1.5949 | train_acc: 0.2000 | test_loss: 1.5101 | test_acc: 0.2500
Epoch: 9 | train_loss: 1.5724 | train_acc: 0.1833 | test_loss: 1.3985 | test_acc: 0.2500
Epoch: 10 | train_loss: 1.5642 | train_acc: 0.2167 | test_loss: 1.4920 | test_acc: 0.3125
Epoch: 11 | train_loss: 1.5758 | train_acc: 0.2500 | test_loss: 1.4943 | test_acc: 0.1875
Epoch: 12 | train_l



[INFO] Preparing Data Loaders
Params to learn:
	 classifier.6.weight
	 classifier.6.bias
Training alexnet
----------
[INFO] Created SummaryWriter, saving to: runs/2024-01-16/doc_classification/alexnet/25 epochs......


  0%|          | 0/25 [00:00<?, ?it/s]

Epoch: 1 | train_loss: 1.6616 | train_acc: 0.1167 | test_loss: 1.5312 | test_acc: 0.2500
Epoch: 2 | train_loss: 1.5228 | train_acc: 0.2667 | test_loss: 1.5527 | test_acc: 0.1875
Epoch: 3 | train_loss: 1.5867 | train_acc: 0.1333 | test_loss: 1.3314 | test_acc: 0.2500
Epoch: 4 | train_loss: 1.5711 | train_acc: 0.2500 | test_loss: 1.6756 | test_acc: 0.2500
Epoch: 5 | train_loss: 1.5817 | train_acc: 0.2333 | test_loss: 1.5746 | test_acc: 0.3125
Epoch: 6 | train_loss: 1.6636 | train_acc: 0.1833 | test_loss: 1.5089 | test_acc: 0.2500
Epoch: 7 | train_loss: 1.5236 | train_acc: 0.2167 | test_loss: 1.4774 | test_acc: 0.2500
Epoch: 8 | train_loss: 1.5949 | train_acc: 0.2000 | test_loss: 1.5101 | test_acc: 0.2500
Epoch: 9 | train_loss: 1.5724 | train_acc: 0.1833 | test_loss: 1.3985 | test_acc: 0.2500
Epoch: 10 | train_loss: 1.5642 | train_acc: 0.2167 | test_loss: 1.4920 | test_acc: 0.3125
Epoch: 11 | train_loss: 1.5758 | train_acc: 0.2500 | test_loss: 1.4943 | test_acc: 0.1875
Epoch: 12 | train_l



[INFO] Preparing Data Loaders
Params to learn:
	 classifier.6.weight
	 classifier.6.bias
Training vgg
----------
[INFO] Created SummaryWriter, saving to: runs/2024-01-16/doc_classification/vgg/25 epochs......


  0%|          | 0/25 [00:00<?, ?it/s]

Epoch: 1 | train_loss: 1.6616 | train_acc: 0.1167 | test_loss: 1.5312 | test_acc: 0.2500
Epoch: 2 | train_loss: 1.5228 | train_acc: 0.2667 | test_loss: 1.5527 | test_acc: 0.1875
Epoch: 3 | train_loss: 1.5867 | train_acc: 0.1333 | test_loss: 1.3314 | test_acc: 0.2500
Epoch: 4 | train_loss: 1.5711 | train_acc: 0.2500 | test_loss: 1.6756 | test_acc: 0.2500
Epoch: 5 | train_loss: 1.5817 | train_acc: 0.2333 | test_loss: 1.5746 | test_acc: 0.3125
Epoch: 6 | train_loss: 1.6636 | train_acc: 0.1833 | test_loss: 1.5089 | test_acc: 0.2500
Epoch: 7 | train_loss: 1.5236 | train_acc: 0.2167 | test_loss: 1.4774 | test_acc: 0.2500
Epoch: 8 | train_loss: 1.5949 | train_acc: 0.2000 | test_loss: 1.5101 | test_acc: 0.2500
Epoch: 9 | train_loss: 1.5724 | train_acc: 0.1833 | test_loss: 1.3985 | test_acc: 0.2500
Epoch: 10 | train_loss: 1.5642 | train_acc: 0.2167 | test_loss: 1.4920 | test_acc: 0.3125
Epoch: 11 | train_loss: 1.5758 | train_acc: 0.2500 | test_loss: 1.4943 | test_acc: 0.1875
Epoch: 12 | train_l



  0%|          | 0/25 [00:00<?, ?it/s]

Epoch: 1 | train_loss: 1.6616 | train_acc: 0.1167 | test_loss: 1.5312 | test_acc: 0.2500
Epoch: 2 | train_loss: 1.5228 | train_acc: 0.2667 | test_loss: 1.5527 | test_acc: 0.1875
Epoch: 3 | train_loss: 1.5867 | train_acc: 0.1333 | test_loss: 1.3314 | test_acc: 0.2500
Epoch: 4 | train_loss: 1.5711 | train_acc: 0.2500 | test_loss: 1.6756 | test_acc: 0.2500
Epoch: 5 | train_loss: 1.5817 | train_acc: 0.2333 | test_loss: 1.5746 | test_acc: 0.3125
Epoch: 6 | train_loss: 1.6636 | train_acc: 0.1833 | test_loss: 1.5089 | test_acc: 0.2500
Epoch: 7 | train_loss: 1.5236 | train_acc: 0.2167 | test_loss: 1.4774 | test_acc: 0.2500
Epoch: 8 | train_loss: 1.5949 | train_acc: 0.2000 | test_loss: 1.5101 | test_acc: 0.2500
Epoch: 9 | train_loss: 1.5724 | train_acc: 0.1833 | test_loss: 1.3985 | test_acc: 0.2500
Epoch: 10 | train_loss: 1.5642 | train_acc: 0.2167 | test_loss: 1.4920 | test_acc: 0.3125
Epoch: 11 | train_loss: 1.5758 | train_acc: 0.2500 | test_loss: 1.4943 | test_acc: 0.1875
Epoch: 12 | train_l



[INFO] Preparing Data Loaders
Params to learn:
	 AuxLogits.fc.weight
	 AuxLogits.fc.bias
	 fc.weight
	 fc.bias
Training inception
----------
[INFO] Created SummaryWriter, saving to: runs/2024-01-16/doc_classification/inception/25 epochs......


  0%|          | 0/25 [00:00<?, ?it/s]

Epoch: 1 | train_loss: 1.5148 | train_acc: 0.1667 | test_loss: 1.4400 | test_acc: 0.1875
Epoch: 2 | train_loss: 1.4638 | train_acc: 0.1833 | test_loss: 1.4599 | test_acc: 0.1875
Epoch: 3 | train_loss: 1.4647 | train_acc: 0.2167 | test_loss: 1.4160 | test_acc: 0.1875
Epoch: 4 | train_loss: 1.4464 | train_acc: 0.2500 | test_loss: 1.4949 | test_acc: 0.2500
Epoch: 5 | train_loss: 1.4559 | train_acc: 0.2500 | test_loss: 1.4896 | test_acc: 0.2500
Epoch: 6 | train_loss: 1.5324 | train_acc: 0.2000 | test_loss: 1.3844 | test_acc: 0.2500
Epoch: 7 | train_loss: 1.4779 | train_acc: 0.1667 | test_loss: 1.4188 | test_acc: 0.3125
Epoch: 8 | train_loss: 1.4858 | train_acc: 0.2667 | test_loss: 1.4090 | test_acc: 0.3750
Epoch: 9 | train_loss: 1.4883 | train_acc: 0.1500 | test_loss: 1.3917 | test_acc: 0.2500
Epoch: 10 | train_loss: 1.5180 | train_acc: 0.1667 | test_loss: 1.4455 | test_acc: 0.1875
Epoch: 11 | train_loss: 1.4368 | train_acc: 0.2833 | test_loss: 1.4737 | test_acc: 0.1875
Epoch: 12 | train_l



[INFO] Preparing Data Loaders
Params to learn:
	 classifier.weight
	 classifier.bias
Training densenet
----------
[INFO] Created SummaryWriter, saving to: runs/2024-01-16/doc_classification/densenet/25 epochs......


  0%|          | 0/25 [00:00<?, ?it/s]

Epoch: 1 | train_loss: 1.6616 | train_acc: 0.1167 | test_loss: 1.5312 | test_acc: 0.2500
Epoch: 2 | train_loss: 1.5228 | train_acc: 0.2667 | test_loss: 1.5527 | test_acc: 0.1875
Epoch: 3 | train_loss: 1.5867 | train_acc: 0.1333 | test_loss: 1.3314 | test_acc: 0.2500
Epoch: 4 | train_loss: 1.5711 | train_acc: 0.2500 | test_loss: 1.6756 | test_acc: 0.2500
Epoch: 5 | train_loss: 1.5817 | train_acc: 0.2333 | test_loss: 1.5746 | test_acc: 0.3125
Epoch: 6 | train_loss: 1.6636 | train_acc: 0.1833 | test_loss: 1.5089 | test_acc: 0.2500
Epoch: 7 | train_loss: 1.5236 | train_acc: 0.2167 | test_loss: 1.4774 | test_acc: 0.2500
Epoch: 8 | train_loss: 1.5949 | train_acc: 0.2000 | test_loss: 1.5101 | test_acc: 0.2500
Epoch: 9 | train_loss: 1.5724 | train_acc: 0.1833 | test_loss: 1.3985 | test_acc: 0.2500
Epoch: 10 | train_loss: 1.5642 | train_acc: 0.2167 | test_loss: 1.4920 | test_acc: 0.3125
Epoch: 11 | train_loss: 1.5758 | train_acc: 0.2500 | test_loss: 1.4943 | test_acc: 0.1875
Epoch: 12 | train_l

### Tensorboard Visualization

In [None]:
%load_ext tensorboard
%tensorboard --logdir runs