<a href="https://colab.research.google.com/github/aashishpiitkEigenlytics/lapsrn-document/blob/main/images_to_vgg16_feature_vectors.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
# Load the images and their regions
# Pass the regions through the respective fine-tuned vgg16 which are region specific
# Save these image features in a zip file with the lables if possible
# modify the script for full model to accomodate this type of data

In [None]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [None]:
import numpy as np
import pandas as pd
import os, math, sys
import glob, itertools
import argparse, random

import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.autograd import Variable
from torchvision.models import vgg19
import torchvision.models as models
import torchvision.transforms as transforms
from torch.utils.data import DataLoader, Dataset
from torchvision.utils import save_image, make_grid

import plotly
import plotly.express as px
import plotly.graph_objects as go
import matplotlib.pyplot as plt

from PIL import Image
from tqdm import tqdm_notebook as tqdm
from sklearn.model_selection import train_test_split

random.seed(42)
import warnings
warnings.filterwarnings("ignore")

import math
from pathlib import Path
from torchsummary import summary
import torchvision
from PIL import Image, ImageOps

device = 'cuda' if torch.cuda.is_available() else 'cpu'

In [None]:
# number of epochs of training
n_epochs = 50
# name of the dataset
dataset_path = "/content/train/test/"
# size of the batches
batch_size = 8
# adam: learning rate
lr = 0.00008
# adam: decay of first order momentum of gradient
b1 = 0.5
# adam: decay of second order momentum of gradient
b2 = 0.999
# epoch from which to start lr decay
decay_epoch = 100
# number of cpu threads to use during batch generation
n_cpu = 8
# high res. image height
hr_height = 64
# high res. image width
hr_width = 64
# number of image channels
channels = 1

# os.makedirs("images", exist_ok=True)
# os.makedirs("saved_models", exist_ok=True)

cuda = torch.cuda.is_available()
hr_shape = (hr_height, hr_width)



In [None]:
! unzip -q /content/drive/MyDrive/rvl_cdip_test_dataset/rvl_cdip_test_dataset.zip -d /content/train

In [None]:
doc2label = {
    'advertisement':0,
    'budget':1,
    'email':2,
    'file_folder':3,
    'form':4,
    'handwritten':5,
    'invoice':6,
    'letter':7,
    'memo':8,
    'news_article':9,
    'presentation':10,
    'questionnaire':11,
    'resume':12,
    'scientific_publication':13,
    'scientific_report':14,
    'specification':15
}

In [None]:
train_path = []
for path in Path('/content/train/content/train/test').rglob('*.tif'):
  target = str(str(path).split('/')[-2])
  train_path.append((path, doc2label[target]))

In [None]:
class ImageDataset(Dataset):
  def __init__(self, files):
    #super(ImageDataset, self)

    self.files = files
    self.trans = transforms.Compose([
                                transforms.Grayscale(),
                                transforms.Resize((780,600)), 
                                transforms.ToTensor()
    ])
    self.trans2 = transforms.Resize((227,227))

    #self.trans1 = transforms.ToTensor()
  
  def __getitem__(self, index):
    img = Image.open(self.files[index % len(self.files)][0])
    target = self.files[index % len(self.files)][1]
    
    output_dict = {
        'targets' : torch.tensor(target),
        'holistic' : self.create_holistic(img),
    }

    return output_dict
  
  def create_header(self, x):
    # trans1 = transforms.ToTensor()
    x = self.trans(x)

    x = x[:][:, :, :256]
    return self.trans2(x)


  def create_right_half(self, x):
    x = self.trans(x)

    x = x[:][:, -300:, 100:-100]
    return self.trans2(x)
  def create_left_half(self, x):
    x = self.trans(x)

    x = x[:][:, :300, 100:-100]
    return self.trans2(x)
  def create_footer(self, x):
    x = self.trans(x)

    x = x[:][:, :, -256:]
    return self.trans2(x)

  def create_holistic(self, x):
    
    return self.trans(x)

  def __len__(self):
      return len(self.files)


In [None]:
# train_path = train_path[:len(train_path)//5]

In [None]:
len(train_path)

39996

In [None]:
## incorporate the labels somehow when preparing the dataset usign ImageDataset

In [None]:
train_paths, test_paths = train_test_split(train_path, test_size=0.1)
train_paths = train_paths[:len(train_paths)]
test_paths = test_paths[:len(test_paths)]

#train_paths, test_paths = train_test_split(sorted(glob.glob(dataset_path + "/*.*")), test_size=0.02, random_state=42)
train_dataloader = DataLoader(ImageDataset(train_paths), batch_size=batch_size, shuffle=True, num_workers=n_cpu)
test_dataloader = DataLoader(ImageDataset(test_paths), batch_size=int(batch_size), shuffle=True, num_workers=n_cpu)

In [None]:
len(train_dataloader)

4500

In [None]:
print(len(test_dataloader))

500


In [None]:
class VGG16_fine_tune_last_layer(nn.Module):
  def __init__(self):
    super(VGG16_fine_tune_last_layer, self).__init__()

    vgg16_model = torchvision.models.vgg16(pretrained = True)
    self.feature_extractor = nn.Sequential(*list(vgg16_model.features.children()))
    self.avg_pool = vgg16_model.avgpool#nn.Sequential(*list(vgg16_model.avgpool.children()))
    self.classifier = nn.Sequential(*list(vgg16_model.classifier.children()))

    for i,(name, param) in enumerate(self.feature_extractor.named_parameters()):
      if(i<22):
        param.requires_grad = False
    for i,(name, param) in enumerate(self.feature_extractor.named_parameters()):
      if(i>=22):
        param.requires_grad = True
    for name, param in self.avg_pool.named_parameters():
      param.requires_grad = True
    for name, param in self.classifier.named_parameters():
      param.requires_grad = False

    self.last_layer = nn.Sequential(nn.Linear(25088, 256, bias=True),
                                    nn.ReLU(),
                                    nn.Linear(256,128, bias=True),
                                    nn.ReLU(),
                                    nn.Linear(128,16, bias=True))

  def forward(self, x):
    x = self.feature_extractor(x)
    x = self.avg_pool(x)
    #x = self.classifier(torch.flatten(x, start_dim=1))
    
    x = self.last_layer(torch.flatten(x, start_dim=1))

    #output = self.last_layer(x)
    return x


In [None]:
vgg16_fine_tune_last_layer = VGG16_fine_tune_last_layer().to(device)

Downloading: "https://download.pytorch.org/models/vgg16-397923af.pth" to /root/.cache/torch/hub/checkpoints/vgg16-397923af.pth


HBox(children=(FloatProgress(value=0.0, max=553433881.0), HTML(value='')))




In [None]:
print(vgg16_fine_tune_last_layer)

VGG16_fine_tune_last_layer(
  (feature_extractor): Sequential(
    (0): Conv2d(3, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (1): ReLU(inplace=True)
    (2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (3): ReLU(inplace=True)
    (4): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (5): Conv2d(64, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (6): ReLU(inplace=True)
    (7): Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (8): ReLU(inplace=True)
    (9): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (10): Conv2d(128, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (11): ReLU(inplace=True)
    (12): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (13): ReLU(inplace=True)
    (14): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (15): ReLU(inplace=True)
    (16): MaxPool2d(kernel_size=2,

In [None]:
print(type(vgg16_fine_tune_last_layer.feature_extractor.named_parameters()))

<class 'generator'>


In [None]:
for name, param in vgg16_fine_tune_last_layer.named_parameters():
    if param.requires_grad:
        print(name)

feature_extractor.26.weight
feature_extractor.26.bias
feature_extractor.28.weight
feature_extractor.28.bias
last_layer.0.weight
last_layer.0.bias
last_layer.2.weight
last_layer.2.bias
last_layer.4.weight
last_layer.4.bias


In [None]:
## how should i save the images --> one thing is for sure that once you have loaded the train/test dataloader, obtain all the imagses from it so that order is not disturbed

In [None]:
## keep the batch size one, and save the processed image features in different folders


In [None]:
## but when i will use them i will use them in batches, so it is a better idea to save in the form of batches

In [None]:
with torch.no_grad():
  for batch_idx, batch in enumerate(train_dataloader):
    if(batch_idx == 10):
      break
    holistic_img = batch['holistic']
    holistic_img = holistic_img.repeat_interleave(3, dim=1)
    feature_tensor = vgg16_fine_tune_last_layer(holistic_img)
    torch.save(feature_tensor, f'/content/holistic_image_features/{batch_idx}.pt')

KeyboardInterrupt: ignored