<a href="https://colab.research.google.com/github/ericburdett/cs601r-dl/blob/master/ner.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Named Entity Recognition on Handwritten Documents

In [2]:
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torch.utils.data import Dataset, DataLoader
import numpy as np
import matplotlib.pyplot as plt
from torchvision import transforms, utils, datasets
from tqdm import tqdm
from torch.nn.parameter import Parameter
import pdb
import torchvision
import os
import gzip
import tarfile
from PIL import Image, ImageOps
import gc
import pdb
import pandas as pd
from PIL import Image
from matplotlib.pyplot import imshow
from google.colab import drive
drive.mount('/content/drive')
from IPython.core.ultratb import AutoFormattedTB
__ITB__ = AutoFormattedTB(mode = 'Verbose',color_scheme='LightBg', tb_offset = 1)

assert torch.cuda.is_available(), "Request a GPU from Runtime > Change Runtime"

Go to this URL in a browser: https://accounts.google.com/o/oauth2/auth?client_id=947318989803-6bn6qk8qdgf4n4g3pfee6491hc0brc4i.apps.googleusercontent.com&redirect_uri=urn%3aietf%3awg%3aoauth%3a2.0%3aoob&response_type=code&scope=email%20https%3a%2f%2fwww.googleapis.com%2fauth%2fdocs.test%20https%3a%2f%2fwww.googleapis.com%2fauth%2fdrive%20https%3a%2f%2fwww.googleapis.com%2fauth%2fdrive.photos.readonly%20https%3a%2f%2fwww.googleapis.com%2fauth%2fpeopleapi.readonly

Enter your authorization code:
··········
Mounted at /content/drive


In [0]:
!cp "drive/My Drive/datasets/esposalles.zip" "/content"

In [0]:
!unzip -q esposalles.zip
!rm esposalles.zip

In [0]:
class EsposallesDataset(Dataset):
  def __init__(self, label='category', img_size=128):
    if not os.path.exists('/content/labels.csv'):
      raise Exception('Esposalles dataset does not exist in /content/labels.csv')

    self.img_size = img_size
    self.label = label
    self.path = '/content/Images/'
    self.labels_df = pd.read_csv('/content/labels.csv', sep='\t', header=None, names=['word', 'category', 'person', 'transcription', 'page'])

    unique_labels = self.labels_df[label].drop_duplicates().values

    self.num_to_label = dict()
    self.label_to_num = dict()


    for index, label in zip(range(len(unique_labels)), unique_labels):
      self.num_to_label[index] = label
      self.label_to_num[label] = index

  def dicts(self):
    return self.num_to_label, self.label_to_num
  
  def num_labels(self):
    return len(self.num_to_label)

  def resize(self, img):
    old_size = img.size
    ratio = float(self.img_size)/max(old_size)
    new_size = tuple([int(x*ratio) for x in old_size])

    img = img.resize(new_size, Image.ANTIALIAS)

    new_img = Image.new("RGB", (self.img_size, self.img_size))
    new_img.paste(img, ((self.img_size-new_size[0])//2,
                        (self.img_size-new_size[1])//2))
    
    return new_img

  def df(self):
    return self.labels_df

  def open_image(self, path):
    img = Image.open(path + '.png')
    img = self.resize(img)
    x = transforms.functional.to_tensor(img)
    # x = x.view(-1, self.img_size ** 2) # Shape the image tensor in a way that can be consumable by the GRU

    return x

  def __getitem__(self, index):    
    pages = self.labels_df[self.labels_df['page'] == index]

    imgs = []
    labels = []

    for _, row in pages.iterrows():
      img = self.open_image(self.path + row['word'])
      label_text = row[self.label]
      label_num = self.label_to_num[label_text]

      imgs.append(img)
      labels.append(label_num)

    return imgs, labels

  def __len__(self):
    return len(self.labels_df['page'].drop_duplicates()) 

In [0]:
class RNN(nn.Module):
  def __init__(self, in_channels, hidden_channel_size, input_size, hidden_size, output_size, num_layers=1):
    super(RNN, self).__init__()

    self.in_channels = in_channels
    self.hidden_channel_size = hidden_channel_size
    self.input_size = input_size
    self.hidden_size = hidden_size
    self.output_size = output_size
    self.num_layers = num_layers

    self.conv1 = nn.Conv2d(self.in_channels, self.hidden_channel_size, kernel_size=3, padding=1)
    self.conv2 = nn.Conv2d(self.hidden_channel_size, self.hidden_channel_size, kernel_size=3, padding=1)
    self.conv3 = nn.Conv2d(self.hidden_channel_size, 1, kernel_size=3, padding=1)
    self.gru = nn.GRU(input_size**2, hidden_size, num_layers)
    self.linear = nn.Linear(hidden_size, output_size)

  def forward(self, input_img, hidden):
    input_img = input_img.unsqueeze(0) # unsqueeze to comply with mini-batch for conv2d
    out = self.conv1(input_img)
    out = self.conv2(out)
    out = self.conv3(out)
    out = out.view(1, -1, self.input_size**2) # reshape to expected shape for gru
    out, hidden = self.gru(out, hidden)
    out = self.linear(out)

    return out.view(-1), hidden

  def init_hidden(self):
    return torch.zeros(self.num_layers, 1, self.hidden_size)

In [49]:
batch_size = 5
input_size = 10
num_layers = 2
hidden_size = 20
seq_len = 3
rnn = nn.GRU(input_size, hidden_size, num_layers)
inp = torch.randn(batch_size, seq_len, input_size)
h0 = torch.randn(num_layers, seq_len, hidden_size)
output, hn = rnn(inp, h0)
print(h0.shape)
print(output.shape)
print(output.view(-1, output.shape[1] * output.shape[2]).shape)

torch.Size([2, 3, 20])
torch.Size([5, 3, 20])
torch.Size([5, 60])


In [58]:
# rnn = nn.GRU()

conv = nn.Conv2d(3, 32, kernel_size=3, padding=1)
conv2 = nn.Conv2d(32, 32, kernel_size=3, padding=1)
conv3 = nn.Conv2d(32, 10, kernel_size=3, padding=1)

# dataset[0][0][0].unsqueeze(0).shape

conv3(conv2(conv(dataset[0][0][0].unsqueeze(0)))).shape

torch.Size([1, 10, 512, 512])

In [88]:
dataset = EsposallesDataset(img_size=512)


x = dataset[0][0][0]
model = RNN(3, 50, 512, 200, 6, num_layers=3)
hidden0 = model.init_hidden()
y_hat, hidden = model(x, hidden0)
hidden.shape

torch.Size([3, 1, 200])

In [107]:
input_size = 256

dataset = EsposallesDataset(img_size=input_size)
data_loader = DataLoader(train_dataset,
                          batch_size=1,
                          num_workers=4,
                          shuffle=False)


hidden_size = 200
output_size = dataset.num_labels()
num_layers = 3

model = RNN(3, input_size, hidden_size, output_size, num_layers=3)
model = model.cuda()
optimizer = torch.optim.Adam(model.parameters(), lr=1e-3)
objective = nn.CrossEntropyLoss()

for epoch in range(NUM_EPOCHS):
  loop = tqdm(total=len(data_loader), position=0, leave=True)

  for batch, (x, y_truth) in enumerate(data_loader):
    x, y_truth = x.cuda(async=True), y_truth.cuda(async=True)
    print(x)
    print(y_truth)

    loss = 0
    hidden = model.init_hidden()
    optimizer.zero_grad()

    accs = []
    losses = []

    for word, label in zip(x, y_truth):
      optimizer.zero_grad()
      pred, hidden = model(word, hidden))

      acc = torch.eq(y_hat.argmax(), label)
      ls = objective(pred, label)

      accs.append(acc.item())
      losses.append(ls.item())

      loss += ls

    loss.backward()
    optimizer.step()

    loop.set_description('Epoch:{}, Loss:{}, Acc:{}'.format(epoch, np.mean(losses), np.mean(accs)))
    loop.update(1)


SyntaxError: ignored