# Table detector

## Colab requirements

Before restarting runtime (remember to select GPU runtime)$\dots$

In [None]:
!git clone https://github.com/Wadaboa/table-detector.git
!pip install -r table-detector/init/colab_requirements.txt

After restarting runtime$\dots$

In [None]:
import os, sys

sys.path.insert(0, "table-detector")
os.chdir("table-detector")

## Imports

In [1]:
import sys

sys.path.insert(0, "src")

In [2]:
import torch
import torchvision
import wandb
import yaml
import ipywidgets as widgets
import numpy as np
import matplotlib.pyplot as plt
from ipywidgets import interact, fixed

import learning
import transforms
import detectors
import utils
import train

%load_ext autoreload
%autoreload 2

## Initialization

### Matplotlib

In [3]:
%matplotlib inline
plt.rcParams['figure.figsize'] = [10, 12]
plt.rcParams['figure.dpi'] = 100
plt.rcParams['axes.xmargin'] = .05
plt.rcParams['axes.ymargin'] = .05
plt.style.use('ggplot')

### Weights & biases

In [None]:
!wandb login

In [None]:
!wandb enabled

### PyTorch and numpy

In [4]:
DEVICE = utils.get_device()
DEVICE

device(type='cpu')

In [5]:
if DEVICE.type != "cpu":
    !nvidia-smi

## Utils

In [80]:
def show(img):
    np_img = utils.to_numpy(img)
    plt.imshow(np_img)

def draw_predictions(dataset, index, model, conf_thresh=0.5, width=4):
    images = []
    for image, _ in dataset[index]:
        output = model([image])[0]
        boxes = torch.tensor([])
        colors = None
        if len(output["boxes"]) > 0:
            mask = (output["scores"] >= conf_thresh).repeat(1, 4).view(-1, 4)
            boxes = output["boxes"][mask].view(-1, 4)
            colors = utils.generate_colors(len(boxes))
        img = utils.draw_bounding_boxes(
            utils.denormalize_image(image), boxes, width=width, colors=colors
        )
        images.append(img)
    grid = torchvision.utils.make_grid(images)
    show(grid)

def draw_targets(dataset, index, width=4):
    images = []
    for image, target in dataset[index]:
        colors = utils.generate_colors(len(target["boxes"]))
        img = utils.draw_bounding_boxes(
            utils.denormalize_image(image), target["boxes"], width=width, colors=colors
        )
        images.append(img)
    grid = torchvision.utils.make_grid(images)
    show(grid)

## Data loading

In [7]:
with open('parameters.yml', 'r') as conf:
    args = yaml.load(conf, Loader=yaml.FullLoader)
params = utils.Struct(**args)
params.generic.device = DEVICE
args

{'dataset': {'train': 'marmot',
  'val': 'marmot',
  'dummy': {'enabled': False, 'size': 10},
  'marmot': {'path': ['datasets/marmot/table_recognition/data/english/positive',
    'datasets/marmot/table_recognition/data/chinese/positive']},
  'icdar13': {'path': ['datasets/icdar13/icdar2013-competition-dataset-with-gt/competition-dataset-eu',
    'datasets/icdar13/icdar2013-competition-dataset-with-gt/competition-dataset-us']}},
 'generic': {'random_seed': 42,
  'workers': 4,
  'device': 'cpu',
  'wandb': {'enabled': False,
   'project': 'table-detector',
   'entity': 'wadaboa',
   'watch': 'all'}},
 'training': {'train_split': 0.8,
  'epochs': 28,
  'log_interval': 1,
  'batch_size': 2,
  'checkpoints': {'save': True, 'frequency': 3, 'path': 'checkpoints'}},
 'backbone': {'family': 'vgg',
  'type': 'vgg16',
  'pretrained': True,
  'input_size': {'exact': {'width': 224, 'height': 224},
   'bound': {'min': 800, 'max': 1300}},
  'imagenet_params': {'mean': [0.485, 0.456, 0.406],
   'std':

In [81]:
train_dataset, test_dataset = train.get_dataset(params)

In [9]:
interact(
    draw_targets, 
    dataset=fixed(train_dataset), 
    index=widgets.IntSlider(min=0, max=len(train_dataset), step=1, value=10)
);

interactive(children=(IntSlider(value=10, description='index', max=766), IntSlider(value=4, description='width…

## Training

In [10]:
train_dataloader = train.get_train_dataloader(params, train_dataset)
test_dataloader = train.get_test_dataloader(params, test_dataset)

In [73]:
params.detector.type = "faster_rcnn"
detector = detectors.get_detector(params, train.NUM_CLASSES)

In [None]:
train.wandb_init(params, args)
train.wandb_watch(params, detector)

In [12]:
optimizer = train.get_optimizer(params, detector)
lr_scheduler = train.get_lr_scheduler(params, optimizer)

In [43]:
learning.training_loop(
    params, detector, optimizer, train_dataloader,
    test_dataloader, lr_scheduler=lr_scheduler
)

AssertionError: 

In [None]:
train.wandb_finish(params)

## Testing

In [74]:
detector.eval()

FasterRCNN(
  (backbone): Backbone(type=vgg16, family=vgg, pretrained=True, input_shape=3x224x224, output_shape=512x7x7)
  (faster_rcnn): FasterRCNN(
    (transform): GeneralizedRCNNTransform(
        Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
        Resize(min_size=(800,), max_size=1333, mode='bilinear')
    )
    (backbone): Backbone(type=vgg16, family=vgg, pretrained=True, input_shape=3x224x224, output_shape=512x7x7)
    (rpn): RegionProposalNetwork(
      (anchor_generator): AnchorGenerator()
      (head): RPNHead(
        (conv): Conv2d(512, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
        (cls_logits): Conv2d(512, 9, kernel_size=(1, 1), stride=(1, 1))
        (bbox_pred): Conv2d(512, 36, kernel_size=(1, 1), stride=(1, 1))
      )
    )
    (roi_heads): RoIHeads(
      (box_roi_pool): MultiScaleRoIAlign()
      (box_head): TwoMLPHead(
        (fc6): Linear(in_features=25088, out_features=1024, bias=True)
        (fc7): Linear(in_features=1024,

In [82]:
interact(
    draw_predictions, 
    dataset=fixed(test_dataset),
    index=widgets.IntSlider(min=0, max=len(test_dataset), step=1, value=10),
    model=fixed(detector),
    conf_thresh=widgets.FloatSlider(min=0.0, max=1.0, step=0.05, value=0.5)
);

interactive(children=(IntSlider(value=10, description='index', max=192), FloatSlider(value=0.5, description='c…