In [1]:
!pip install paddlepaddle
!pip install paddleocr
!pip install shapely
!pip install google-generativeai
!pip install gradio
!pip install fastapi --upgrade

Collecting paddlepaddle
  Downloading paddlepaddle-3.0.0-cp311-cp311-manylinux1_x86_64.whl.metadata (8.9 kB)
Collecting astor (from paddlepaddle)
  Downloading astor-0.8.1-py2.py3-none-any.whl.metadata (4.2 kB)
Collecting opt_einsum==3.3.0 (from paddlepaddle)
  Downloading opt_einsum-3.3.0-py3-none-any.whl.metadata (6.5 kB)
Downloading paddlepaddle-3.0.0-cp311-cp311-manylinux1_x86_64.whl (192.8 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m192.8/192.8 MB[0m [31m3.9 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading opt_einsum-3.3.0-py3-none-any.whl (65 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m65.5/65.5 kB[0m [31m2.6 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading astor-0.8.1-py2.py3-none-any.whl (27 kB)
Installing collected packages: opt_einsum, astor, paddlepaddle
  Attempting uninstall: opt_einsum
    Found existing installation: opt_einsum 3.4.0
    Uninstalling opt_einsum-3.4.0:
      Successfully uninstalled opt_einsum-3.4.0
Successful

In [2]:
import cv2
import numpy as np
from PIL import Image
import paddleocr
import gradio as gr
from google import genai
import tempfile
import os




In [None]:
def preprocess_image(image):
       # Convert the image to grayscale.
       image = cv2.cvtColor(np.array(image), cv2.COLOR_RGB2GRAY)
       # Apply Gaussian blur to reduce noise.
       image = cv2.GaussianBlur(image, (5, 5), 0)
       # Apply thresholding to highlight text.
       _, image = cv2.threshold(image, 0, 255, cv2.THRESH_BINARY + cv2.THRESH_OTSU)
       # Convert back to a Pillow image.
       image = Image.fromarray(image)
       return image

ocr = paddleocr.PaddleOCR(use_angle_cls=True, lang='en', det_model_dir='path/to/detection/model', rec_model_dir='path/to/recognition/model')

GOOGLE_API_KEY = "INSERT YOUR KEY HERE"
# genai.configure(api_key=GOOGLE_API_KEY)  # Removed configure
client = genai.Client(api_key=GOOGLE_API_KEY) # Changed to Client

def paddle_receipt(receipt):
    result = ""
    if receipt is None:
        return "There is no image uploaded"
    try:
        read = ocr.ocr(receipt, cls=True)  # Perform OCR
        if read and len(read) > 0 and len(read[0]) > 0:
            counter = 1
            for i in read[0]:
                if counter == 5:
                    result += "\n"
                    counter = 1
                result += i[1][0] + " "
                counter += 1
            return result  # Return extracted text if successful
        else:
            return "No text detected"  # Return message if no text detected
    except Exception as e:
        return f"Error during OCR: {e}"  # Return error message if exception occurs


def analyze_receipt_with_gemini(user_prompt, ocr_text=None):
    prompt = f"""
    Analyze the following text from a receipt and extract key information,
    including the store name, store address, date, total amount, change received,
    money given to the cashier, and any items purchased.
    If possible, also identify the payment method.  Provide the output in a conversational format.

    Receipt Text:
    ```{ocr_text}```
    """
    if ocr_text:
        # If ocr_text is provided, include it in the prompt
        prompt = f"""
        {user_prompt}

        Receipt Text:
        ```{ocr_text}```
        """
    else:
        # If no ocr_text is provided, use the user_prompt
        prompt = user_prompt
    try:
        response = client.models.generate_content(  # Use client.generate_content
            model="gemini-2.0-flash",  # Explicitly specify the model.
            contents=prompt, # changed the contents
        )
        return response.text if response.text else "No information extracted."
    except Exception as e:
        return f"Error during Gemini analysis: {e}"

def process_image_and_chat(image, chat_history):
    if image is None:
        return "Please upload a receipt image.", chat_history

    with tempfile.NamedTemporaryFile(delete=False, suffix=".jpg") as temp_image:
        image.save(temp_image.name)
        image_path = temp_image.name

    ocr_text = paddle_receipt(temp_image.name)
    os.unlink(temp_image.name)
    if "Error:" in ocr_text:
        chat_history.append((None, ocr_text))
        return chat_history  # Return the chat_history, which now contains the error

    # Pass the ocr_text for initial analysis
    gemini_response = analyze_receipt_with_gemini("Analyze this receipt:", ocr_text)
    chat_history.append(("", gemini_response))  # Append as (user_msg, bot_response)
    return chat_history  # Return the updated chat history


def launch_chatbot():
    with gr.Blocks() as app:
        gr.Markdown("## Receipt Analysis Chatbot")

        # Chatbot and input are defined *within* the Blocks context
        chatbot = gr.Chatbot(height=300)
        user_input = gr.Textbox(label="Enter your message") # Added user input textbox
        image_input = gr.Image(label="Upload Receipt Image", type="pil")
        chat_state = gr.State([])

        # Function to handle user text input
        def respond_to_text(user_message, chat_history):
            if user_message:
                # Pass user message directly to Gemini for analysis
                gemini_response = analyze_receipt_with_gemini(user_message, chat_history)
                chat_history.append((user_message, gemini_response))
            return chat_history, chat_history

        # Set the .on method for the user_input.
        user_input.submit(
            fn=respond_to_text,
            inputs=[user_input, chat_state],
            outputs=[chatbot, chat_state]
        )

        # Define a function to handle button clicks.
        def on_image_upload(image, chat_history):
            updated_history = process_image_and_chat(image, chat_history)
            return updated_history, updated_history

        # Set the .on method for the image_input.
        image_input.change(
            fn=on_image_upload,
            inputs=[image_input, chat_state],
            outputs=[chatbot, chat_state]
        )

    app.launch()


if __name__ == "__main__":
    launch_chatbot()


download https://paddleocr.bj.bcebos.com/PP-OCRv3/english/en_PP-OCRv3_det_infer.tar to path/to/detection/model/en_PP-OCRv3_det_infer.tar


100%|██████████| 3910/3910 [00:16<00:00, 230.55it/s] 


download https://paddleocr.bj.bcebos.com/PP-OCRv4/english/en_PP-OCRv4_rec_infer.tar to path/to/recognition/model/en_PP-OCRv4_rec_infer.tar


100%|██████████| 10000/10000 [00:17<00:00, 568.10it/s]


download https://paddleocr.bj.bcebos.com/dygraph_v2.0/ch/ch_ppocr_mobile_v2.0_cls_infer.tar to /root/.paddleocr/whl/cls/ch_ppocr_mobile_v2.0_cls_infer/ch_ppocr_mobile_v2.0_cls_infer.tar


100%|██████████| 2138/2138 [00:14<00:00, 145.59it/s]

[2025/05/07 03:27:57] ppocr DEBUG: Namespace(help='==SUPPRESS==', use_gpu=False, use_xpu=False, use_npu=False, use_mlu=False, use_gcu=False, ir_optim=True, use_tensorrt=False, min_subgraph_size=15, precision='fp32', gpu_mem=500, gpu_id=0, image_dir=None, page_num=0, det_algorithm='DB', det_model_dir='path/to/detection/model', det_limit_side_len=960, det_limit_type='max', det_box_type='quad', det_db_thresh=0.3, det_db_box_thresh=0.6, det_db_unclip_ratio=1.5, max_batch_size=10, use_dilation=False, det_db_score_mode='fast', det_east_score_thresh=0.8, det_east_cover_thresh=0.1, det_east_nms_thresh=0.2, det_sast_score_thresh=0.5, det_sast_nms_thresh=0.2, det_pse_thresh=0, det_pse_box_thresh=0.85, det_pse_min_area=16, det_pse_scale=1, scales=[8, 16, 32], alpha=1.0, beta=1.0, fourier_degree=5, rec_algorithm='SVTR_LCNet', rec_model_dir='path/to/recognition/model', rec_image_inverse=True, rec_image_shape='3, 48, 320', rec_batch_num=6, max_text_length=25, rec_char_dict_path='/usr/local/lib/pytho




It looks like you are running Gradio on a hosted a Jupyter notebook. For the Gradio app to work, sharing must be enabled. Automatically setting `share=True` (you can turn this off by setting `share=False` in `launch()` explicitly).

Colab notebook detected. To show errors in colab notebook, set debug=True in launch()
* Running on public URL: https://93ca516feaac413899.gradio.live

This share link expires in 1 week. For free permanent hosting and GPU upgrades, run `gradio deploy` from the terminal in the working directory to deploy to Hugging Face Spaces (https://huggingface.co/spaces)
