In [3]:
import gradio as gr
import pandas as pd
import json 
from FactureModel import Facture
import constants as const
from gradio import Interface, Image, Checkbox, Textbox
from functions import readDataSet, imageObjCreation, readImgOCR, pydanticParser, instructionsFormat, LLMModelCall

def process_invoice(image_path, do_easyocr):
    # Step 1: Process the image to extract text using OCR
    with open(image_path, "rb") as f:
        image_bytes = f.read()
    image_obj = imageObjCreation(image_bytes)
    ocr_text = readImgOCR(image_bytes, image_obj, do_easyocr)

    # Step 2: Setup Pydantic parser and format instructions
    parser = pydanticParser()
    system_instructions, prompt = instructionsFormat(parser, ocr_text)

    # Step 3: Call the LLM model to extract invoice data
    extracted_data = LLMModelCall(prompt, system_instructions)
    facture_data = parser.parse(extracted_data)
   
    date = facture_data.date
    monto = facture_data.monto
    facture_number = facture_data.facture_number

    return date,monto,facture_number

# Define the Gradio interface
iface = gr.Interface(
    fn=process_invoice,
    inputs=[
        gr.Image(type="filepath", label="Upload Invoice Image"),
        gr.Checkbox(label="Use EasyOCR"),
    ],
     outputs=[
        gr.Textbox(label="Date"),
        gr.Textbox(label="Monto"),
        gr.Textbox(label="Facture Number")],
    title="Invoice Data Extractor",
    description="Upload an invoice image and extract data from it using OCR and LLM",
)

# Launch the Gradio app
iface.launch()

Running on local URL:  http://127.0.0.1:7861

To create a public link, set `share=True` in `launch()`.




IMPORTANT: You are using gradio version 3.50.2, however version 4.29.0 is available, please upgrade.
--------


Neither CUDA nor MPS are available - defaulting to CPU. Note: This module is much faster with a GPU.
Downloading detection model, please wait. This may take several minutes depending upon your network connection.


Progress: |██████████████████████████████████████████████████| 100.0% Complete

Downloading recognition model, please wait. This may take several minutes depending upon your network connection.


Progress: |██████████████████████████████████████████████████| 100.0% CompleteTexto extraído con OCR:
Facuure 6)>ili orange SM ART CALL D ITRBUTDN Num éro de Facture F-0123-1879887 192 ETG 4 RES B N com pte client 10564756 BD ZERKTOUNI Date d 'éditin 23/01/2023 20000 CASABLANCA Vos coordonnées SM ART CALL DSTRBUTDN Votre facuure du 18 Janvir 2023 192 ETG 4 RES B BD ZERKTOUNI M ontant totalà payer 396 , DH Dom iiliatin bancaie Date li ie de paim ente 08/02/2023 PIssu Jsl ,/ Tiuhie :SM ART CALL DITRBUTDN Banque :ATTIARI AFABANK N de Com pte Pourvotre sécurité en cette pérpde, nous vous 007780000629500000017864 recom m andons de payer vos factures en lggne . Votre CE 001685924000018 sécurité estnotre rité Nous contacter Résum é de h facuure M ontanten DH Parcourir 0 range M aroc Vos forfaits/abonnem ents etoptins 330,00 Lotisem enth colne I m m eubb bs Q uate Tem ps M ontanthT 330,00 Votre servie clients TVA (20%) ^.G.ci 66,00 178 depui un m obi 0 range (appelgratui) (+212)05 20 178 178 T

Neither CUDA nor MPS are available - defaulting to CPU. Note: This module is much faster with a GPU.


Texto extraído con OCR:
~i2i J;lil ~5001+ { ~c ! Auro-ealcepreneur Date 28/03/2023 Facture numéro 000 001 Client : Smart Call Distribution Adresse 3, Angle Rues Alfred de Musset et Abdelkader El Mazini ler Etage, Quartier Gautier, Casablanca Dësignätíön 'Quantité Prixuniraïre Total RD.V 30 120,00 DH 3600.00 DH AVANCE SUR REMUNERATION 500,00 500,00 Montant en dirhams Total Net à payer 3100,00 (Hors champ de la TVA') ARRETE LA PRESENTE FACTURE A LA SOMME DE #TROIS MILLE CENT DIRHAMS # FoyN# 'Art 89 II = 1' c, Code Général des Impôts. KCE: Auto Entrepreneur Mouhcine FTOUHI CNIE A 251909 Adresse 3, Imm 97, GH 23, Lotissement Firdaous, Oulfa, Casblanca ICE (N' d'inscription au registre national de lauto-entrepreneur) 003159152000040 IF : 52682016 Taxe professionnelle N' 25909366 TEL 0696579734 Mail Mouhcine22f@gmail com Mounchno Con Auto-Entr Oocw SsyY 00315


In [1]:
pip install gradio

Note: you may need to restart the kernel to use updated packages.


# Gradio with Query and OCR 

In [None]:
# import gradio as gr
# import pandas as pd
# import json 
# from FactureModel import Facture
# import constants as const
# from gradio import Interface, Image, Checkbox, Textbox
# from functions import readDataSet, imageObjCreation, readImgOCR, pydanticParser, instructionsFormat, LLMModelCall, query_pinecone

# def process_invoice(image_path, do_easyocr):
#     # Step 1: Process the image to extract text using OCR
#     with open(image_path, "rb") as f:
#         image_bytes = f.read()
#     image_obj = imageObjCreation(image_bytes)
#     ocr_text = readImgOCR(image_bytes, image_obj, do_easyocr)

#     # Step 2: Setup Pydantic parser and format instructions
#     parser = pydanticParser()
#     system_instructions, prompt = instructionsFormat(parser, ocr_text)

#     # Step 3: Call the LLM model to extract invoice data
#     extracted_data = LLMModelCall(prompt, system_instructions)
#     facture_data = parser.parse(extracted_data)
   
#     date = facture_data.date
#     monto = facture_data.monto
#     facture_number = facture_data.facture_number

    
#     return date,monto,facture_number

# def query_database(query_vector_text):
#     try:
#         query_vector= json.loads(query_vector_text)
#         db_results = query_pinecone(query_vector)
#     except json.JSONDecodeError:
#         db_results = "Invalid query vector format. Please enter a valid JSON array."
#     except Exception as e:
#         db_results = str(e)

#     return db_results
    
# def main_menu(choice):
#     if choice == "Extract Invoice Data":
#         return gr.update(visible=False), gr.update(visible=True), gr.update(visible=False)
#     elif choice == "Query Pinecone Database":
#         return gr.update(visible=False), gr.update(visible=False), gr.update(visible=True)
    
# menu_choices = ["Extract Invoice Data", "Query Pinecone Database"]
# with gr.Blocks() as demo:
#     with gr.Row():
#         dropdown = gr.Dropdown(menu_choices, label="Choose an option")
#         submit = gr.Button("Submit")

#     with gr.Row(visible=False) as invoice_interface_row:
#         with gr.Column():
#             invoice_interface = gr.Interface(
#                 fn=process_invoice,
#                 inputs=[
#                     gr.Image(type="filepath", label="Upload Invoice Image"),
#                     gr.Checkbox(label="Use EasyOCR"),
#                 ],
#                 outputs=[
#                     gr.Textbox(label="Date"),
#                     gr.Textbox(label="Monto"),
#                     gr.Textbox(label="Facture Number")
#                 ],
#                 title="Invoice Data Extractor",
#                 description="Upload an invoice image and extract data from it using OCR and LLM"
#             ).render()

#     with gr.Row(visible=False) as pinecone_interface_row:
#         with gr.Column():
#             pinecone_query_interface = gr.Interface(
#                 fn=query_database,
#                 inputs=[
#                     gr.Textbox(label="Query Vector", placeholder="Enter query vector as a list of numbers")
#                 ],
#                 outputs=[
#                     gr.Textbox(label="Pinecone Results")
#                 ],
#                 title="Pinecone Database Query",
#                 description="Enter a query vector to search the Pinecone database."
#             ).render()

#     submit.click(
#         main_menu,
#         inputs=[dropdown],
#         outputs=[dropdown, invoice_interface_row, pinecone_interface_row]
#     )

# # Launch the main menu
# demo.launch()