In [None]:
!pip install torch==2.0.1 torchvision==0.15.2 transformers==4.37.2 tiktoken==0.6.0 verovio==4.3.1 accelerate==0.28.0


Solved by pip install numpy==1.26.4 --upgrade.
Note that it needs numpy<2 otherwise raising another error.

In [None]:
!pip install numpy==1.26.4 --upgrade

In [None]:
!pip show numpy

In [None]:
from transformers import AutoModel, AutoTokenizer
import torch
import os
import json
from tqdm import tqdm
import gc
from google.colab import drive
import time

# Google Drive connection
drive.mount('/content/drive')

def is_image_file(filename):
    return filename.lower().endswith(('.png', '.jpg', '.jpeg'))

def run_got_ocr_on_existing_json(image_root, input_json_path, model_path='ucaslcl/GOT-OCR2_0'):
    """Appends GOT-OCR result to existing JSON file."""

    device = 'cuda' if torch.cuda.is_available() else 'cpu'
    print(f"Using device: {device}")

    print("Initializing models...")

    # Load model and tokenizer
    tokenizer = AutoTokenizer.from_pretrained(model_path, trust_remote_code=True)
    model = AutoModel.from_pretrained(
        model_path,
        trust_remote_code=True,
        low_cpu_mem_usage=True,
        device_map='cuda' if torch.cuda.is_available() else 'cpu',
        use_safetensors=True,
        pad_token_id=tokenizer.eos_token_id
    )
    model.eval()
    if torch.cuda.is_available():
        model.cuda()

    model_name = model_path.split('/')[-1].lower()

    #Load current JSON
    with open(input_json_path, 'r', encoding='utf-8') as f:
        data = json.load(f)

    start_time = time.time()

    for filename in tqdm([k for k in data if not k.startswith("_")], desc="OCR process"):
        
        for root, _, files in os.walk(image_root):
            if 'text_category' in root:
                continue
            if filename in files:
                img_path = os.path.join(root, filename)
                try:
                    # Get OCR result with GOT-OCR chat function
                    extracted_text = model.chat(tokenizer, img_path, ocr_type='ocr')

                    #write OCR results
                    data[filename]["models"][model_name] = {
                        "prediction": extracted_text,
                        "cer": None,
                        "wer": None
                    }
                except Exception as e:
                    print(f"{filename} error: {str(e)}")
                break  

    elapsed = round(time.time() - start_time,2)
    print(f"\n OCR completed: {elapsed:.2f} seconds")

    meta = data.get("_meta", {})
    processing_times = meta.get("processing_times", {})
    processing_times[model_name] = elapsed
    meta["processing_times"] = processing_times
    data["_meta"] = meta


    # save updated JSON
    with open(input_json_path, 'w', encoding='utf-8') as f:
        json.dump(data, f, ensure_ascii=False, indent=2)

    print(f" Updated JSON saved: {input_json_path}")
    return data




input_json_path = '/content/drive/MyDrive/nutuk/benchmark/converted_data.json'
image_root = '/content/drive/MyDrive/nutuk/benchmark/'


# start OCR process
updated = run_got_ocr_on_existing_json(image_root, input_json_path)