In [None]:
!pip install qwen-vl-utils

In [None]:
# CUDA ve Torch control
!nvcc --version
!python -c "import torch; print(torch.__version__)"


In [None]:
!pip uninstall -y flash-attn

In [None]:
!pip install flash-attn --no-build-isolation

In [None]:
!pip install --upgrade transformers accelerate einops Pillow packaging

In [None]:
!pip install git+https://github.com/huggingface/transformers accelerate


In [None]:
import gc
import torch
from google.colab import drive
from transformers import Qwen2_5_VLForConditionalGeneration, AutoProcessor
from qwen_vl_utils import process_vision_info
from flash_attn import flash_attn_func
import os
import json
import glob
from tqdm import tqdm
from PIL import Image
import time  #for measuring time


#Memory management optimization
os.environ["PYTORCH_CUDA_ALLOC_CONF"] = "expandable_segments:True"

# Mount Google Drive
drive.mount('/content/drive')

def read(image_path, processor, model):
    try:
        question = "Extract the Turkish text from the image exactly as it appears. Do not repeat, comment, translate, or add any text. Return only the raw text. If no text is detected, return an empty response."
        messages = [
            {
                "role": "user",
                "content": [
                    {
                        "type": "image",
                        "image": image_path,
                    },
                    {"type": "text", "text": question},
                ],
            }
        ]

        text = processor.apply_chat_template(
            messages, tokenize=False, add_generation_prompt=True
        )
        image_inputs, video_inputs = process_vision_info(messages)
        inputs = processor(
            text=[text],
            images=image_inputs,
            videos=video_inputs,
            padding=True,
            return_tensors="pt",
        )
        inputs = inputs.to("cuda" if torch.cuda.is_available() else "cpu")

        generated_ids = model.generate(**inputs, max_new_tokens=128)
        generated_ids_trimmed = [
            out_ids[len(in_ids):] for in_ids, out_ids in zip(inputs.input_ids, generated_ids)
        ]
        output_text = processor.batch_decode(
            generated_ids_trimmed, skip_special_tokens=True, clean_up_tokenization_spaces=False
        )

        ocr_text = output_text[0]

        del inputs, generated_ids, generated_ids_trimmed, output_text
        torch.cuda.empty_cache()
        torch.cuda.synchronize()

        return ocr_text
    except Exception as e:
        return f"Error processing image: {str(e)}"

def run_qwen_on_existing_json(image_root, input_json_path):
    """Appends Qwen result to existing JSON file."""


    cuda = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    print(f"Using device: {cuda}")

    print("Initializing models...")

    model_path = "Qwen/Qwen2.5-VL-7B-Instruct"
    model_name = model_path.split("/")[-1].lower()

    model = Qwen2_5_VLForConditionalGeneration.from_pretrained(
        model_path, torch_dtype=torch.bfloat16,attn_implementation="flash_attention_2", device_map="auto"
    )
    processor = AutoProcessor.from_pretrained(model_path)

    # Load existing JSON
    with open(input_json_path, 'r', encoding='utf-8') as f:
        data = json.load(f)

    start_time = time.time()

    for filename in tqdm([k for k in data if not k.startswith("_")], desc="OCR process"):       
        for root, _, files in os.walk(image_root):
            if 'text_category' in root:
                continue
            if filename in files:
                img_path = os.path.join(root, filename)
                try:
                    extracted_text = read(img_path, processor, model)

                    # write ocr result
                    data[filename]["models"][model_name] = {
                        "prediction": extracted_text,
                        "cer": None,
                        "wer": None
                    }
                except Exception as e:
                    print(f"{filename} error: {str(e)}")
                break  

        #gc.collect()
        #torch.cuda.empty_cache()
        #torch.cuda.synchronize()

    elapsed = round(time.time() - start_time,2)
    print(f"\n OCR finished: {elapsed:.2f} saniye")

    meta = data.get("_meta", {})
    processing_times = meta.get("processing_times", {})
    processing_times[model_name] = elapsed
    meta["processing_times"] = processing_times
    data["_meta"] = meta

    # Save updated JSON file
    with open(input_json_path, 'w', encoding='utf-8') as f:
        json.dump(data, f, ensure_ascii=False, indent=2)

    print(f"Updated JSON saved: {input_json_path}")
    return data



input_json_path = '/content/drive/MyDrive/nutuk/benchmark/converted_data.json'
image_root = '/content/drive/MyDrive/nutuk/benchmark/'


#start OCR process
updated = run_qwen_on_existing_json(image_root, input_json_path)