In [1]:
import os
from torch.utils.tensorboard import SummaryWriter
import pandas as pd
import torch
from torchvision import models, transforms
from torchsummary import summary
from torch import nn
from torch.utils.data import Dataset, DataLoader
from skimage import io, transform
import tensorboard

In [None]:
!cp /content/drive/MyDrive/datasets/archive.zip ./
!unzip archive.zip
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

In [3]:
df = pd.read_csv("labels_final.csv")
df

Unnamed: 0,path,label
0,imagesv/v/o/h/voh71d00/509132755+-2755.tif,3
1,imagesl/l/x/t/lxt19d00/502213303.tif,3
2,imagesx/x/e/d/xed05a00/2075325674.tif,2
3,imageso/o/j/b/ojb60d00/517511301+-1301.tif,3
4,imagesq/q/z/k/qzk17e00/2031320195.tif,7
...,...,...
47995,imagesk/k/q/l/kql82f00/tob07414.87.tif,10
47996,imagesi/i/r/r/irr80c00/2084343690_3692.tif,12
47997,imagesa/a/z/h/azh32d00/2063887153_7176.tif,6
47998,imagesg/g/p/d/gpd45f00/0060075263.tif,8


In [29]:
# Dataset and Dataloader
class DocumentDataset(Dataset):
    def __init__(self, csv_file, root_dir='./data_final',transform=None):
        self.document_df = pd.read_csv(csv_file)
        self.root_dir = root_dir
        self.transform = transform

    def __len__(self):
        return len(self.document_df)

    def __getitem__(self,idx):
        if torch.is_tensor(idx):
            idx = idx.tolist()

        img_name = os.path.join(self.root_dir, self.document_df.iloc[idx, 0])
        img_label = self.document_df.iloc[idx, 1]
        image = io.imread(img_name)

        if self.transform is not None:
            image = self.transform(image)

        return image,img_label

h,w = (224,224)
custom_transforms = transforms.Compose([
                                        transforms.ToPILImage(),
                                        transforms.Resize([h, w]),
                                        transforms.RandomPerspective(0.2),
                                        transforms.Lambda(lambda img : img.convert("RGB")),
                                        transforms.ToTensor()
                                        ])

N = len(df)
test_size = int(0.2*N)
train_size = N-test_size

document_dataset = DocumentDataset("labels_final.csv", root_dir="./data_final",transform=custom_transforms)
test_data, train_data = torch.utils.data.random_split(document_dataset, [test_size, train_size], generator=torch.Generator().manual_seed(42))

train_loader = DataLoader(train_data,batch_size=64,shuffle=False)
test_loader =  DataLoader(test_data,batch_size=64,shuffle=False)

In [30]:
def check_accuracy(loader, model):
  num_correct = 0
  num_samples = 0
  model.eval()
  with torch.no_grad():
    for x,y in loader:
      x = x.to(device=device)
      y = y.to(device=device)

      scores = model(x)
      _,predictions = scores.max(1)
      num_correct += (predictions==y).sum()
      num_samples += predictions.size(0)

  model.train()
  return float(float(num_correct)/float(num_samples))*100

def train(model, train_loader, test_loader ,optimizer, criterion, goal_acc, num_epochs, writer, scheduler=None, scheduler_onplateau_metric=None):
    !rm runs/ # remove old logs
    writer.add_graph(model, torch.randn(1,3,224,224).to(device))
    for epoch in range(num_epochs):
        batch_loss_sum = 0
        for batch_idx,(data, targets) in enumerate(train_loader):
            data = data.to(device=device)
            targets = targets.to(device=device)

            scores = model(data)
            loss = criterion(scores,targets)

            optimizer.zero_grad()
            loss.backward()

            optimizer.step()

            batch_loss_sum += loss.item()
        batch_loss_avg = batch_loss_sum/len(train_loader)
        test_acc=check_accuracy(test_loader,model)

        writer.add_scalar('Accuracy/test', test_acc, epoch+1)
        writer.add_scalar('Loss/train', batch_loss_avg,epoch+1)

        print("Epoch [{}/{}]: Train Loss: {:.3f} Test Accuracy: {:.3f}"\
                .format(str(epoch+1).zfill(2),num_epochs,batch_loss_avg,test_acc))
    
        if(float(test_acc) > goal_acc and epoch!=0):# We stop training after reaching goal test acc
            print("Training stoppped after surpassing {}% test accuracy.".format(goal_acc))
            writer.close()
            break

In [6]:
vgg_freeze = models.vgg16(pretrained=True,progress=False).features
for layer in vgg_freeze:
    layer.requires_grad = False
vgg_freeze

Downloading: "https://download.pytorch.org/models/vgg16-397923af.pth" to /root/.cache/torch/hub/checkpoints/vgg16-397923af.pth


Sequential(
  (0): Conv2d(3, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (1): ReLU(inplace=True)
  (2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (3): ReLU(inplace=True)
  (4): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  (5): Conv2d(64, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (6): ReLU(inplace=True)
  (7): Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (8): ReLU(inplace=True)
  (9): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  (10): Conv2d(128, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (11): ReLU(inplace=True)
  (12): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (13): ReLU(inplace=True)
  (14): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (15): ReLU(inplace=True)
  (16): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  (17): Conv2d(256, 512, kernel_si

Model 1


In [31]:
model1_last_layers = nn.Sequential(
          nn.Conv2d(in_channels=512,kernel_size=4,out_channels=256),# 7,7 to 4,4
          nn.ReLU(),
          
          nn.Flatten(),
          nn.Linear(in_features = 256*4*4, out_features=256*2),
          nn.ReLU(),
          nn.Dropout(),

          nn.Linear(in_features = 256*2, out_features=256),
          nn.ReLU(),
          nn.Dropout(),

          nn.Linear(in_features = 256, out_features=16)

        )
model1 = nn.Sequential(
        vgg_freeze,
        model1_last_layers
        )
model1.to(device=device)

Sequential(
  (0): Sequential(
    (0): Conv2d(3, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (1): ReLU(inplace=True)
    (2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (3): ReLU(inplace=True)
    (4): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (5): Conv2d(64, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (6): ReLU(inplace=True)
    (7): Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (8): ReLU(inplace=True)
    (9): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (10): Conv2d(128, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (11): ReLU(inplace=True)
    (12): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (13): ReLU(inplace=True)
    (14): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (15): ReLU(inplace=True)
    (16): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1

In [32]:
lr1 = 0.01
goal_acc1 = 60
num_epochs1 = 25
optimizer1 = torch.optim.SGD(model1[1].parameters(), lr=lr1, momentum=0.9, nesterov=True)
criterion1 = nn.CrossEntropyLoss()
writer1 = SummaryWriter('runs/model1')

train(model = model1,
      train_loader = train_loader,
      test_loader = test_loader,
      optimizer = optimizer1,
      criterion = criterion1,
      goal_acc = goal_acc1,
      num_epochs = num_epochs1,
      writer = writer1,
      scheduler = None,
      scheduler_onplateau_metric=None)

Epoch [01/25]: Train Loss: 1.752 Test Accuracy: 58.302
Epoch [02/25]: Train Loss: 1.309 Test Accuracy: 65.979
Training stoppped after surpassing 60% test accuracy.


Model 2

In [38]:
model2_last_layers = nn.Sequential(
          nn.Conv2d(in_channels=512,kernel_size=7,out_channels=128), #512x7x7 to 128x1x1
          nn.ReLU(),

          nn.Conv2d(in_channels=128, kernel_size=1, out_channels=64),
          nn.ReLU(),

          nn.Flatten(),

          nn.Linear(in_features = 64, out_features=16)
        )
model2 = nn.Sequential(
        vgg_freeze,
        model2_last_layers
        )
model2.to(device)

Sequential(
  (0): Sequential(
    (0): Conv2d(3, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (1): ReLU(inplace=True)
    (2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (3): ReLU(inplace=True)
    (4): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (5): Conv2d(64, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (6): ReLU(inplace=True)
    (7): Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (8): ReLU(inplace=True)
    (9): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (10): Conv2d(128, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (11): ReLU(inplace=True)
    (12): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (13): ReLU(inplace=True)
    (14): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (15): ReLU(inplace=True)
    (16): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1

In [39]:
lr2 = 0.01
goal_acc2 = 60
num_epochs2 = 25
optimizer2 = torch.optim.SGD(model2[1].parameters(), lr=lr2, momentum=0.9, nesterov=True)
criterion2 = nn.CrossEntropyLoss()
writer2 = SummaryWriter('runs/model2')

train(model = model2,
      train_loader = train_loader,
      test_loader = test_loader,
      optimizer = optimizer2,
      criterion = criterion2,
      goal_acc = goal_acc2,
      num_epochs = num_epochs2,
      writer = writer2,
      scheduler = None,
      scheduler_onplateau_metric=None)

Epoch [01/25]: Train Loss: 1.353 Test Accuracy: 64.760
Epoch [02/25]: Train Loss: 1.013 Test Accuracy: 68.146
Training stoppped after surpassing 60% test accuracy.


In [50]:
model3_last_layers = nn.Sequential(
          nn.Conv2d(in_channels=512,kernel_size=7,out_channels=128), #512x7x7 to 128x1x1
          nn.ReLU(),

          nn.Conv2d(in_channels=128, kernel_size=1, out_channels=64),
          nn.ReLU(),

          nn.Flatten(),

          nn.Linear(in_features = 64, out_features=16)
        )

for l in vgg_freeze[17:]:
    l.requires_grad = True

model3 = nn.Sequential(*(list(vgg_freeze)+
        list(model3_last_layers))
        )
model3.to(device)

Sequential(
  (0): Conv2d(3, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (1): ReLU(inplace=True)
  (2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (3): ReLU(inplace=True)
  (4): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  (5): Conv2d(64, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (6): ReLU(inplace=True)
  (7): Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (8): ReLU(inplace=True)
  (9): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  (10): Conv2d(128, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (11): ReLU(inplace=True)
  (12): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (13): ReLU(inplace=True)
  (14): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (15): ReLU(inplace=True)
  (16): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  (17): Conv2d(256, 512, kernel_si

In [52]:
lr3 = 0.01
goal_acc3 = 6
num_epochs3 = 25
optimizer3 = torch.optim.SGD(model3[17:].parameters(), lr=lr3, momentum=0.9, nesterov=True)
criterion3 = nn.CrossEntropyLoss()
writer3 = SummaryWriter('runs/model3')

train(model = model3,
      train_loader = train_loader,
      test_loader = test_loader,
      optimizer = optimizer3,
      criterion = criterion3,
      goal_acc = goal_acc3,
      num_epochs = num_epochs3,
      writer = writer3,
      scheduler = None,
      scheduler_onplateau_metric=None)

Epoch [01/25]: Train Loss: 1.168 Test Accuracy: 76.281


KeyboardInterrupt: ignored