In [2]:
import torch
from transformers import AutoTokenizer, AutoModel


print("CUDA Version:", torch.version.cuda)
print("CUDA available:", torch.cuda.is_available())
if torch.cuda.is_available():
    print("GPU Name:", torch.cuda.get_device_name(0))
    print("GPU Memory:", torch.cuda.get_device_properties(0).total_memory / 1e9, "GB")


device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print("Using device:", device)


tokenizer = AutoTokenizer.from_pretrained('ucaslcl/GOT-OCR2_0', trust_remote_code=True)
model = AutoModel.from_pretrained(
    'ucaslcl/GOT-OCR2_0',
    trust_remote_code=True,
    low_cpu_mem_usage=True,
    use_safetensors=True,
    pad_token_id=tokenizer.eos_token_id
)

if torch.cuda.is_available():
    model = model.cuda()
model = model.eval()


print("\nModel device:", next(model.parameters()).device)

CUDA Version: 12.4
CUDA available: True
GPU Name: Tesla T4
GPU Memory: 15.828320256 GB
Using device: cuda


The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


tokenizer_config.json:   0%|          | 0.00/300 [00:00<?, ?B/s]

tokenization_qwen.py:   0%|          | 0.00/9.47k [00:00<?, ?B/s]

A new version of the following files was downloaded from https://huggingface.co/ucaslcl/GOT-OCR2_0:
- tokenization_qwen.py
. Make sure to double-check they do not contain any added malicious code. To avoid downloading new versions of the code file, you can pin a revision.


qwen.tiktoken:   0%|          | 0.00/2.56M [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/149 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/986 [00:00<?, ?B/s]

modeling_GOT.py:   0%|          | 0.00/33.8k [00:00<?, ?B/s]

got_vision_b.py:   0%|          | 0.00/16.1k [00:00<?, ?B/s]

A new version of the following files was downloaded from https://huggingface.co/ucaslcl/GOT-OCR2_0:
- got_vision_b.py
. Make sure to double-check they do not contain any added malicious code. To avoid downloading new versions of the code file, you can pin a revision.


render_tools.py:   0%|          | 0.00/1.99k [00:00<?, ?B/s]

A new version of the following files was downloaded from https://huggingface.co/ucaslcl/GOT-OCR2_0:
- render_tools.py
. Make sure to double-check they do not contain any added malicious code. To avoid downloading new versions of the code file, you can pin a revision.
A new version of the following files was downloaded from https://huggingface.co/ucaslcl/GOT-OCR2_0:
- modeling_GOT.py
- got_vision_b.py
- render_tools.py
. Make sure to double-check they do not contain any added malicious code. To avoid downloading new versions of the code file, you can pin a revision.


model.safetensors:   0%|          | 0.00/1.43G [00:00<?, ?B/s]

generation_config.json:   0%|          | 0.00/117 [00:00<?, ?B/s]


Model device: cuda:0


In [1]:

!pip install verovio
!pip install tiktoken
!pip install transformers
!pip install safetensors
!pip install torch torchvision

Collecting verovio
  Downloading verovio-5.0.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (5.1 kB)
Downloading verovio-5.0.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (8.2 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m8.2/8.2 MB[0m [31m26.0 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: verovio
Successfully installed verovio-5.0.0
Collecting tiktoken
  Downloading tiktoken-0.8.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (6.6 kB)
Downloading tiktoken-0.8.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (1.2 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m1.2/1.2 MB[0m [31m17.6 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: tiktoken
Successfully installed tiktoken-0.8.0
Collecting nvidia-cuda-nvrtc-cu12==12.4.127 (from torch)
  Downloading nvidia_cuda_nvrtc_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collec

In [3]:

!apt-get install poppler-utils
!pip install pdf2image

from pdf2image import convert_from_path
import os


image = convert_from_path('/content/10.pdf')[0]
temp_image_path = "temp_page.png"
image.save(temp_image_path, "PNG")


try:
    res = model.chat(tokenizer, temp_image_path, ocr_type='ocr')
    print("OCR Result:", res)
except Exception as e:
    print(f"Error during OCR: {str(e)}")
finally:

    if os.path.exists(temp_image_path):
        os.remove(temp_image_path)

Reading package lists... Done
Building dependency tree... Done
Reading state information... Done
The following NEW packages will be installed:
  poppler-utils
0 upgraded, 1 newly installed, 0 to remove and 19 not upgraded.
Need to get 186 kB of archives.
After this operation, 696 kB of additional disk space will be used.
Get:1 http://archive.ubuntu.com/ubuntu jammy-updates/main amd64 poppler-utils amd64 22.02.0-2ubuntu0.6 [186 kB]
Fetched 186 kB in 2s (122 kB/s)
Selecting previously unselected package poppler-utils.
(Reading database ... 124926 files and directories currently installed.)
Preparing to unpack .../poppler-utils_22.02.0-2ubuntu0.6_amd64.deb ...
Unpacking poppler-utils (22.02.0-2ubuntu0.6) ...
Setting up poppler-utils (22.02.0-2ubuntu0.6) ...
Processing triggers for man-db (2.10.2-1) ...
Collecting pdf2image
  Downloading pdf2image-1.17.0-py3-none-any.whl.metadata (6.2 kB)
Downloading pdf2image-1.17.0-py3-none-any.whl (11 kB)
Installing collected packages: pdf2image
Success

The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:151643 for open-end generation.
The attention mask is not set and cannot be inferred from input because pad token is same as eos token. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
The `seen_tokens` attribute is deprecated and will be removed in v4.41. Use the `cache_position` model input instead.
`get_max_cache()` is deprecated for all Cache classes. Use `get_max_cache_shape()` instead. Calling `get_max_cache()` will raise error from v4.48


OCR Result: San Francisco, CA 94111
inquire@apexbridge.mail
template.net
222 555 777
Consulting Invoice
Invoice Number: 001234
Invoice Date: October 9, 2055
Due Date: November 9, 2055
Bill To:
. Client: Houston Smith
. Company: Organivu
. Address: Bakersfield, CA 93301
. Contact Number: 222 555 7777
Description of Services Rendered:
Date
Description of Services
Hours
Rate
Amount
October 1, 2055
Consulting Services
10
$150
$1,500
October 5, 2055
Strategy Development
5
$200
$1,000
Subtotal: $2,500
Tax (10%): $250
Total Amount Due: $2,750
