# **Task 1: Text Extraction using OCR**

In [1]:
# !pip install git+https://github.com/huggingface/transformers

In [2]:
# !pip install qwen-vl-utils gradio torch torchvision

In [None]:
from transformers import Qwen2VLForConditionalGeneration, AutoTokenizer, AutoProcessor
from qwen_vl_utils import process_vision_info
import gradio as gr
import torch
from PIL import Image
import re
import io

In [4]:
def OCRmodel():

    # default: Load the model on the available device(s)
    model = Qwen2VLForConditionalGeneration.from_pretrained("Qwen/Qwen2-VL-2B-Instruct", trust_remote_code=True, torch_dtype=torch.float32)
    # default processer
    processor = AutoProcessor.from_pretrained("Qwen/Qwen2-VL-2B-Instruct", trust_remote_code=True)

    # Move the model to the correct device (GPU if available, otherwise CPU)
    device = "cuda" if torch.cuda.is_available() else "cpu"
    model.to(device)

    return model, processor

In [None]:
model, processor = OCRmodel()

In [51]:
# Function to read the image and process it for OCR
def ocr(image_data):
    """
    Process the uploaded image and extract text using the OCR model.

    Args:
    image_data: Image data in bytes.

    Returns:
    Extracted text as a string.
    """
    text_query = "Extract all the text in Sanskrit and English from the image."
    # Prepare messages for the model with the image
    messages = [
        {
            "role": "user",
            "content": [
                {"type": "image", "image": image_data},
                {"type": "text", "text": text_query}],
        }
    ]

    # Prepare text and image input for the model
    text = processor.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
    image_inputs, _ = process_vision_info(messages)

    # Process inputs
    inputs = processor(
        text=[text],
        images=image_inputs,
        padding=True,
        return_tensors="pt"
    )
    inputs = inputs.to("cuda" if torch.cuda.is_available() else "cpu")  # Move to GPU if available

    # Generate the output from the model
    with torch.no_grad():
        # Inference: Generation of the output
        generated_ids = model.generate(**inputs, max_new_tokens=2000, no_repeat_ngram_size=3, temperature=0.7)
        generated_ids_trimmed = [
            out_ids[len(in_ids) :] for in_ids, out_ids in zip(inputs.input_ids, generated_ids)
        ]
        output_text = processor.batch_decode(
            generated_ids_trimmed, skip_special_tokens=True, clean_up_tokenization_spaces=False
        )
    return " ".join(output_text).strip()

In [36]:
image_path = "/content/test.jpg"

# Load the image using the file path
image = Image.open(image_path)

# Extract text using the OCR function
extracted_text = ocr(image)
print("Extracted Text: ", extracted_text)

Extracted Text:  **English Text:**

World War I or the First World War (28 July 1914 – 11 November 1 18) was a global conflict between two coalitions: the Allies (or Entente) and the Central Powers. Fighting took place mainly in Europe and the Middle East, as well as in parts of Africa and the Asia-Pacific, and in Europe was characterised by trench warfare and the use of artillery, machine guns, and chemical weapons (gas). World War I was one of the deadliest conflicts in history, resulting in an estimated 9 million military dead and 23 million wounded, plus up to 8 million civilian deaths from causes including genocide (including the Armenian genocide). The movement of large numbers of people was a major factor in the Spanish flu pandemic, which killed millions.

**Sanskrit Text:**
युद्ध (WWI) या WW1 के संक्षिप्त रूप में जाना जा रहा है। यह एक वेश्विक युद् द्ध था और जो 28 जुलाई 1 नवंबर 1 से 10 नोवं बर्ष तक चला थी। इस यूद्द ने 6 करोड़ योगी पीयोग्य (गोरो) सहित 7 करॉडँ से अधिक संस्कृति को

# **Task 2: Web Application Development**

In [47]:
# Function to highlight search terms in the text
def highlight_keywords(text, keywords):
    pattern = "|".join(re.escape(keyword) for keyword in keywords)
    highlighted_text = re.sub(f"({pattern})", rf'<mark style="background-color:{"red"};">\1</mark>', text, flags=re.IGNORECASE)
    return highlighted_text

In [52]:
# Gradio interface function
def process_image(image, search_query):

    extracted_text = ocr(image)

    if search_query:
        # Highlight matching keywords
        keywords = search_query.split()  # Split input into individual keywords
        highlighted_text = highlight_keywords(extracted_text, keywords)
    else:
        highlighted_text = extracted_text

    return highlighted_text

In [53]:
# Gradio Interface
application = gr.Interface(
    fn=process_image,  # Function to process the image and search query
    inputs=[
        gr.Image(type="pil", label="Upload Image"),  # Image input
        gr.Textbox(label="Enter search keywords")  # Textbox for search query
    ],
    outputs=gr.HTML(label="Extracted and Highlighted Text")  # Output area
)

In [None]:
# Launch the Gradio app
application.launch(share=True)