# model

> The TATR model for table extraction and OCR   

In [1]:
#| default_exp model

In [1]:
from nbdev.showdoc import *
from fastcore.test import *
from fastcore.utils import *

In [2]:
#| export
from huggingface_hub import hf_hub_download
from transformers import AutoImageProcessor, TableTransformerForObjectDetection
import torch
from PIL import Image


In [3]:
device = "cuda" if torch.cuda.is_available() else "cpu"
print(device)

cpu


## Load model

Let's pull Table Transformer from hugging face. We're using the "no_timm" version, because that's what we say on some other pages...

In [4]:
#| export

image_processor = AutoImageProcessor.from_pretrained("microsoft/table-transformer-detection")
model = TableTransformerForObjectDetection.from_pretrained("microsoft/table-transformer-detection")


The `max_size` parameter is deprecated and will be removed in v4.26. Please specify in `size['longest_edge'] instead`.


model.safetensors:   0%|          | 0.00/46.8M [00:00<?, ?B/s]

Some weights of the model checkpoint at microsoft/table-transformer-detection were not used when initializing TableTransformerForObjectDetection: ['model.backbone.conv_encoder.model.layer2.0.downsample.1.num_batches_tracked', 'model.backbone.conv_encoder.model.layer3.0.downsample.1.num_batches_tracked', 'model.backbone.conv_encoder.model.layer4.0.downsample.1.num_batches_tracked']
- This IS expected if you are initializing TableTransformerForObjectDetection from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing TableTransformerForObjectDetection from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


# Example with an image from the output folder

In [5]:
file_path = "samples/output/Lipincott, 1905_page_41.png"
image = Image.open(file_path).convert("RGB")

In [6]:
inputs = image_processor(images=image, return_tensors="pt")
outputs = model(**inputs)

In [7]:
target_sizes = torch.tensor([image.size[::-1]])
results = image_processor.post_process_object_detection(outputs, threshold=0.9, target_sizes=target_sizes)[
    0
]

In [8]:
for score, label, box in zip(results["scores"], results["labels"], results["boxes"]):
    box = [round(i, 2) for i in box.tolist()]
    print(
        f"Detected {model.config.id2label[label.item()]} with confidence "
        f"{round(score.item(), 3)} at location {box}"
    )

Detected table with confidence 1.0 at location [235.28, 604.4, 1553.19, 2026.8]


# Try the Model with the entire output folder


In [11]:
import os
import pandas as pd
import numpy as np

directory_path = 'samples/output'
files = []

# Walk through the directory
for dirpath, dirnames, filenames in os.walk(directory_path):
    for filename in filenames:
        files.append(os.path.join(dirpath, filename))


In [16]:
def runModel(files):
    file_path = []
    label_list = []
    score_list = []
    tbox_list = []
    for path in files:
        image = Image.open(path).convert("RGB")
        inputs = image_processor(images=image, return_tensors="pt")
        outputs = model(**inputs)
        target_sizes = torch.tensor([image.size[::-1]])
        results = image_processor.post_process_object_detection(outputs, threshold=0.9, target_sizes=target_sizes)[
            0
        ]
        for score, label, box in zip(results["scores"], results["labels"], results["boxes"]):
            box = [round(i, 2) for i in box.tolist()]
            file_path.append(path)
            label_list.append(model.config.id2label[label.item()])
            score_list.append(round(score.item(), 3))
            tbox_list.append(box)
    data = pd.DataFrame({"file_path":file_path, "label": label_list, "score": score_list, "tbox": tbox_list})
    data.to_csv(os.path.join(directory_path, "../results.csv"), index=False)
    return data
    

In [17]:
runModel(files)

Unnamed: 0,file_path,label,score,tbox
0,"samples/output/Lipincott, 1905_page_33.png",table,0.999,"[74.93, 333.48, 1385.4, 2476.15]"
1,"samples/output/Lipincott, 1905_page_27.png",table,0.987,"[184.64, 1267.54, 1287.91, 1564.98]"
2,"samples/output/Lipincott, 1905_page_108.png",table,0.916,"[326.62, 649.78, 944.81, 2375.05]"
3,"samples/output/Lipincott, 1905_page_30.png",table,0.999,"[226.2, 516.41, 1543.08, 1670.81]"
4,"samples/output/Lipincott, 1905_page_3.png",table rotated,0.93,"[61.36, 594.36, 1377.08, 2443.32]"
5,"samples/output/Lipincott, 1905_page_2.png",table rotated,0.999,"[36.73, 646.84, 1254.78, 2308.77]"
6,"samples/output/Lipincott, 1905_page_31.png",table,0.901,"[51.8, 351.79, 1366.5, 2263.87]"
7,"samples/output/Lipincott, 1905_page_6.png",table,0.904,"[511.85, 360.43, 1159.34, 2351.64]"
8,"samples/output/Lipincott, 1905_page_35.png",table,1.0,"[40.25, 366.57, 1349.74, 2460.58]"
9,"samples/output/Lipincott, 1905_page_34.png",table,0.968,"[210.58, 356.46, 1534.16, 1135.86]"


In [None]:
#| export
## put functions and classes here

In [None]:
#| hide
# Don't show this cell in the docs

In [None]:
#| hide
import nbdev; nbdev.nbdev_export()