In [1]:
from dotenv import load_dotenv
import os
import getpass


# Load environment variables from .env file
load_dotenv()

# Access groq_key
groq_key = os.getenv("GROQ_API_KEY")
if "GROQ_API_KEY" not in os.environ:
    os.environ["GROQ_API_KEY"] = getpass.getpass(groq_key)

In [2]:
# 1. Imports
import gradio as gr
import cv2
import numpy as np
from PIL import Image, ImageDraw, ImageFont
import arabic_reshaper
from bidi.algorithm import get_display
from surya.recognition import RecognitionPredictor
from surya.detection import DetectionPredictor
from surya.layout import LayoutPredictor
from langchain_groq import ChatGroq
from langchain_core.prompts import PromptTemplate

# 2. Initialize predictors
det_predictor = DetectionPredictor()
rec_predictor = RecognitionPredictor()
layout_predictor = LayoutPredictor()

# 3. Initialize Groq LLaMA-3
llm = ChatGroq(
    model="llama-3.1-8b-instant",
    temperature=0,
    max_tokens=None,
    timeout=None,
    max_retries=2,
)

In [9]:
import gradio as gr
import json

# Map options to file paths
document_options = {
    "Passport": {"image": "./Qwen/Output/Passport/Passport.png", "json": "./Qwen/Output/Passport/Result 2/Passport.json"},
    "ID": {"image": "./Qwen/Output/UAE_ID/UAE_ID.jpg", "json": "./Qwen/Output/UAE_ID/UAE_ID.json"},
    "Bill": {"image": "./Qwen/Output/Bill/bill1.png", "json": "./Qwen/Output/Bill/Bill.json"},
    "Bank Statement": {"image": "./Qwen/Output/Bank Statement/BankStatement2.jpeg", "json": "./Qwen/Output/Bank Statement/BankStatement2.json"}
}

# Load selected document data
def load_document(doc_type):
    paths = document_options.get(doc_type, {})
    image = Image.open(paths["image"]) if "image" in paths else None
    json_data = {}

    if "json" in paths:
        with open(paths["json"], "r", encoding="utf-8") as f:
            json_data = json.load(f)

    return image, json.dumps(json_data, indent=2, ensure_ascii=False)


In [10]:
# QA function using the selected document's JSON
def qa_on_json(question, json_content):
    prompt_template = f"""
You are a document QA assistant.

Here is the extracted JSON content from the uploaded document:

{json_content}

Question: {question}

Answer:
"""
    response = llm.invoke(prompt_template)
    return response.content

# Gradio Interface
with gr.Blocks() as iface:
    gr.Markdown("# 📄 Document JSON QA Demo")
    gr.Markdown("Select a document type to view its image and JSON, then ask questions using LLaMA-3 via Groq.")

    doc_choice = gr.Dropdown(
        choices=list(document_options.keys()),
        label="Choose Document Type",
        value="Passport"
    )

    image_display = gr.Image(label="Document Image")
    json_display = gr.Textbox(label="Extracted JSON", lines=20)
    
    # Load selected document
    doc_choice.change(
        load_document,
        inputs=doc_choice,
        outputs=[image_display, json_display]
    )

    gr.Markdown("### ❓ Ask a Question about this document")
    question_input = gr.Textbox(label="Your Question")
    answer_output = gr.Textbox(label="LLaMA-3 Answer")

    question_input.submit(qa_on_json, inputs=[question_input, json_display], outputs=answer_output)

iface.launch()


* Running on local URL:  http://127.0.0.1:7863
* To create a public link, set `share=True` in `launch()`.


