In [None]:
!pip install anthropic

In [None]:
import json
import os
import time
import anthropic
import base64
import re
from google.colab import userdata
from google.colab import drive
from tqdm import tqdm

drive.mount('/content/drive')


def is_image_file(filename):
    return filename.lower().endswith(('.png', '.jpg', '.jpeg'))


def run_claude_on_existing_json(image_root, input_json_path, model_name='claude-3-7-sonnet-20250219'):
    """Appends Claude result to existing JSON file."""

    # Initialize Claude client
    api_key = userdata.get('claude')
    if not api_key:
        api_key = os.getenv('ANTHROPIC_API_KEY')

    if not api_key:
        raise ValueError("Anthropic API key not found. Set it in Colab userdata as 'claude' or as ANTHROPIC_API_KEY environment variable.")

    client = anthropic.Anthropic(api_key=api_key)

    def encode_image(image_path):
        with open(image_path, "rb") as image_file:
            return base64.b64encode(image_file.read()).decode('utf-8')

    def get_image_media_type(image_path):
        extension = os.path.splitext(image_path)[1].lower()
        if extension == ".jpg" or extension == ".jpeg":
            return "image/jpeg"
        elif extension == ".png":
            return "image/png"
        elif extension == ".gif":
            return "image/gif"
        elif extension == ".webp":
            return "image/webp"
        else:
            return "application/octet-stream"

    def process_image_with_claude(image_path, prompt):
        base64_image = encode_image(image_path)
        media_type = get_image_media_type(image_path)

        claude_model = model_name

        message = client.messages.create(
            model=claude_model,
            max_tokens=4000,
            messages=[
                {
                    "role": "user",
                    "content": [
                        {
                            "type": "image",
                            "source": {
                                "type": "base64",
                                "media_type": media_type,
                                "data": base64_image,
                            },
                        },
                        {
                            "type": "text",
                            "text": prompt,
                        }
                    ],
                }
            ]
        )

        response_text = ""
        if message.content and isinstance(message.content, list) and len(message.content) > 0:
            if message.content[0].type == "text":
                response_text = message.content[0].text

        return response_text

    print("Initializing Claude processing...")

    #Load current JSON
    with open(input_json_path, 'r', encoding='utf-8') as f:
        data = json.load(f)

    start_time = time.time()

    prompt = (
        "Analyze this image and do OCR carefully. Do not change the meaning of texts. Just extract the text. Provide only text of the image. "
        "In the output I just want the text not your explanation or answer."
    )

    for filename in tqdm([k for k in data if not k.startswith("_")], desc="OCR process"):
        for root, _, files in os.walk(image_root):
            if 'text_category' in root:
                continue
            if filename in files:
                img_path = os.path.join(root, filename)
                try:
                    extracted_text = process_image_with_claude(img_path, prompt)

                    #write OCR results
                    data[filename]["models"][model_name] = {
                        "prediction": extracted_text,
                        "cer": None,
                        "wer": None
                    }

                except Exception as e:
                    print(f"{filename} error: {str(e)}")
                break  

    elapsed = round(time.time() - start_time,2)
    print(f"\n OCR completed: {elapsed:.2f} seconds")

    meta = data.get("_meta", {})
    processing_times = meta.get("processing_times", {})
    processing_times[model_name] = elapsed
    meta["processing_times"] = processing_times
    data["_meta"] = meta

    # save updated JSON
    with open(input_json_path, 'w', encoding='utf-8') as f:
        json.dump(data, f, ensure_ascii=False, indent=2)

    print(f" Updated JSON saved: {input_json_path}")
    return data



input_json_path = '/content/drive/MyDrive/nutuk/benchmark/converted_data.json'
image_root = '/content/drive/MyDrive/nutuk/benchmark/'


# start OCR process
updated = run_claude_on_existing_json(image_root, input_json_path)