<a href="https://colab.research.google.com/github/arekarnarayan/AIModels/blob/dev1/OCR_Gemma3_v0.2.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [29]:
import os
import torch
from transformers import pipeline, AutoTokenizer, AutoModelForCausalLM
import gradio as gr
from PIL import Image

def initialize_vision_model():
    vision_model = pipeline(
        "image-to-text",
        model="Salesforce/blip-image-captioning-base",
        device_map="auto"
    )
    return vision_model

def process_image(image):
    if image is None:
        return "Please upload an image."

    try:
        # Initialize vision model
        vision_model = initialize_vision_model()

        # Initialize language model
        model_id = "google/gemma-7b"
        tokenizer = AutoTokenizer.from_pretrained(
            model_id,
            token=os.getenv("HUGGINGFACEHUB_API_TOKEN")
        )
        model = AutoModelForCausalLM.from_pretrained(
            model_id,
            device_map="auto",
            torch_dtype=torch.bfloat16,
            token=os.getenv("HUGGINGFACEHUB_API_TOKEN")
        )

        # Get image description
        image_description = vision_model(image)[0]['generated_text']

        # Create prompt
        prompt = f"""Based on this image description: "{image_description}"
        Please analyze and extract all readable text content.
        Present the results in a clear, structured format."""

        # Generate response
        inputs = tokenizer(prompt, return_tensors="pt").to(model.device)
        outputs = model.generate(
            **inputs,
            max_length=500,
            temperature=0.7,
            do_sample=True,
            pad_token_id=tokenizer.eos_token_id
        )
        response = tokenizer.decode(outputs[0], skip_special_tokens=True)

        return f"""
### Image Description
{image_description}

### Analysis
{response}
"""
    except Exception as e:
        return f"Error processing image: {str(e)}"

# Create Gradio interface with simplified configuration
demo = gr.Interface(
    fn=process_image,
    inputs=gr.Image(type="pil", label="Upload Image"),
    outputs=gr.Markdown(label="Analysis Results"),
    title="Image Text Analysis",
    description="Upload an image to analyze its text content.",
)

if __name__ == "__main__":
    # Check for API token
    if not os.getenv("HUGGINGFACEHUB_API_TOKEN"):
        raise ValueError("Please set the HUGGINGFACEHUB_API_TOKEN environment variable")

    # Launch with minimal configuration
    demo.launch(
        server_port=None,  # Let Gradio find an available port
        share=True,       # Don't create public URL
        debug=True        # Show detailed error messages
    )


Colab notebook detected. This cell will run indefinitely so that you can see errors and logs. To turn off, set debug=False in launch().
* Running on public URL: https://23b9662bc9344e79df.gradio.live

This share link expires in 1 week. For free permanent hosting and GPU upgrades, run `gradio deploy` from the terminal in the working directory to deploy to Hugging Face Spaces (https://huggingface.co/spaces)


Using existing dataset file at: .gradio/flagged/dataset1.csv


Device set to use cpu


Loading checkpoint shards:   0%|          | 0/4 [00:00<?, ?it/s]



Keyboard interruption in main thread... closing server.
Killing tunnel 127.0.0.1:7864 <> https://23b9662bc9344e79df.gradio.live
