In [1]:
"""
Application to provide benchmark timers for code. 
Usage: 
# from my_timer_class import MyTimer
from my_timer_func import my_timer
import time

@MyTimer3(name="decorator")
@my_timer
"""

import functools
import time

def my_timer(orig_func):
    import time
    @functools.wraps(orig_func)
    def wrapper_timer(*args, **kwargs):
        tic = time.perf_counter()
        value = orig_func(*args, **kwargs)
        toc = time.perf_counter()
        elapsed_time = toc - tic
        print(f"Elapsed time to run {orig_func.__name__}: {elapsed_time:0.4f} seconds")
        return value
    return wrapper_timer


class MyTimer():
    # usage:
    #
    # from MyTimer import MyTimer
    # with MyTimer():
    #    func(x,y)

    def __init__(self):
        self.start = time.time()
        self.start_p = time.perf_counter()

    def __enter__(self):
        return self

    def __exit__(self, *args, **kwargs):
        end = time.time()
        end_p = time.perf_counter()
        runtime = end - self.start
        runtime_p = end_p - self.start_p
        msg = 'The function took {time} seconds to complete'
        print(msg.format(time=runtime))
        msg_p = 'The function took {time} perf seconds to complete'
        print(msg_p.format(time=runtime_p))

In [2]:
r"""
conda env remove --name trOCR
conda env create --name trOCR --file environment.yml

cache folder
C:\Users\techexpert\.cache\huggingface\hub

nvidia-smi for GPU info

cd scripts
python trOCR.py
"""

from PIL import Image 
import torch
from transformers import TrOCRProcessor, VisionEncoderDecoderModel 
import os
import requests


@my_timer
# https://huggingface.co/microsoft/trocr-base-handwritten
def run_trOCR(model_name="microsoft/trocr-base-handwritten", images=""):
    """
    There are 3 main models to choose from, small, base and large. 
    Some other fine-tuned models: IAM Handwritten, SROIE Receipts
    """
    processor = TrOCRProcessor.from_pretrained(model_name, use_fast = True)
    model = VisionEncoderDecoderModel.from_pretrained(model_name)
    print(model)

    # Check for GPU availability
    device = "cuda" if torch.cuda.is_available() else "cpu"
    print(f"running on {device}")
    model.to(device)  # Move model to GPU
    pixel_values = processor(image, return_tensors="pt").pixel_values.to(device)
    generated_ids = model.generate(pixel_values, max_new_tokens=1000)
    generated_text = processor.batch_decode(generated_ids, skip_special_tokens=True)[0]
    print(generated_text)


  from .autonotebook import tqdm as notebook_tqdm


In [3]:
model_id = "microsoft/trocr-base-handwritten" # indus tre, This is a sample of text

link_image = "datasets/text_recognition_mcocr_data/text_recognition_mcocr_data/mcocr_public_145013aagqw_5.jpg"
image = Image.open(link_image).convert("RGB")
run_trOCR(model_id, image)

Some weights of VisionEncoderDecoderModel were not initialized from the model checkpoint at microsoft/trocr-base-handwritten and are newly initialized: ['encoder.pooler.dense.bias', 'encoder.pooler.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


AttributeError: 'VisionEncoderDecoderModel' object has no attribute 'summarize'